From 77d44533340751c9c04bd61affa8b4f2d2f8e9b8 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 5 Oct 2017 23:03:03 +0200 Subject: [PATCH 01/19] add new class for searches --- lib/SearchDescription.php | 58 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 lib/SearchDescription.php diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php new file mode 100644 index 00000000..e46dc464 --- /dev/null +++ b/lib/SearchDescription.php @@ -0,0 +1,58 @@ + Date: Fri, 6 Oct 2017 00:14:48 +0200 Subject: [PATCH 02/19] move initial search setup to new class type --- lib/Geocode.php | 113 ++++++++++++++++---------------------- lib/SearchDescription.php | 64 +++++++++++++++++++-- 2 files changed, 107 insertions(+), 70 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index a403fa10..88a969a5 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -1023,76 +1023,59 @@ class Geocode $aSearchResults = array(); if ($sQuery || $this->aStructuredQuery) { - // Start with a blank search - $aSearches = array( - array( - 'iSearchRank' => 0, - 'iNamePhrase' => -1, - 'sCountryCode' => false, - 'aName' => array(), - 'aAddress' => array(), - 'aFullNameAddress' => array(), - 'aNameNonSearch' => array(), - 'aAddressNonSearch' => array(), - 'sOperator' => '', - 'aFeatureName' => array(), - 'sClass' => '', - 'sType' => '', - 'sHouseNumber' => '', - 'sPostcode' => '', - 'oNear' => $oNearPoint - ) - ); + // Start with a single blank search + $aSearches = array(new SearchDescription()); - // Any 'special' terms in the search? - $bSpecialTerms = false; - preg_match_all('/\\[([\\w_]*)=([\\w_]*)\\]/', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER); - foreach ($aSpecialTermsRaw as $aSpecialTerm) { - $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery); - if (!$bSpecialTerms) { - $aNewSearches = array(); - foreach ($aSearches as $aSearch) { - $aNewSearch = $aSearch; - $aNewSearch['sClass'] = $aSpecialTerm[1]; - $aNewSearch['sType'] = $aSpecialTerm[2]; - $aNewSearches[] = $aNewSearch; - } - - $aSearches = $aNewSearches; - $bSpecialTerms = true; - } + if ($oNearPoint) { + $aSearches[0]->setNear($oNearPoint); } - preg_match_all('/\\[([\\w ]*)\\]/u', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER); - if (isset($this->aStructuredQuery['amenity']) && $this->aStructuredQuery['amenity']) { - $aSpecialTermsRaw[] = array('['.$this->aStructuredQuery['amenity'].']', $this->aStructuredQuery['amenity']); + if ($sQuery) { + $sQuery = $aSearches[0]->extractKeyValuePairs($sQuery); + } + + $sSpecialTerm = ''; + if ($sQuery) { + preg_match_all( + '/\\[([\\w ]*)\\]/u', + $sQuery, + $aSpecialTermsRaw, + PREG_SET_ORDER + ); + foreach ($aSpecialTermsRaw as $aSpecialTerm) { + $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery); + if (!$sSpecialTerm) { + $sSpecialTerm = $aSpecialTerm[1]; + } + } + } + if (!$sSpecialTerm && $this->aStructuredQuery + && isset($this->aStructuredQuery['amenity'])) { + $sSpecialTerm = $this->aStructuredQuery['amenity']; unset($this->aStructuredQuery['amenity']); } - foreach ($aSpecialTermsRaw as $aSpecialTerm) { - $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery); - if ($bSpecialTerms) { - continue; - } - - $sToken = chksql($this->oDB->getOne("SELECT make_standard_name('".pg_escape_string($aSpecialTerm[1])."') AS string")); - $sSQL = 'SELECT * '; - $sSQL .= 'FROM ( '; - $sSQL .= ' SELECT word_id, word_token, word, class, type, country_code, operator'; - $sSQL .= ' FROM word '; + if ($sSpecialTerm && !$aSearches[0]->hasOperator()) { + $sSpecialTerm = pg_escape_string($sSpecialTerm); + $sToken = chksql( + $this->oDB->getOne("SELECT make_standard_name('$sSpecialTerm')"), + "Cannot decode query. Wrong encoding?" + ); + $sSQL = 'SELECT class, type FROM word '; $sSQL .= ' WHERE word_token in (\' '.$sToken.'\')'; - $sSQL .= ') AS x '; - $sSQL .= ' WHERE (class is not null AND class not in (\'place\'))'; + $sSQL .= ' AND class is not null AND class not in (\'place\')'; if (CONST_Debug) var_Dump($sSQL); $aSearchWords = chksql($this->oDB->getAll($sSQL)); $aNewSearches = array(); - foreach ($aSearches as $aSearch) { + foreach ($aSearches as $oSearch) { foreach ($aSearchWords as $aSearchTerm) { - $aNewSearch = $aSearch; - $aNewSearch['sClass'] = $aSearchTerm['class']; - $aNewSearch['sType'] = $aSearchTerm['type']; - $aNewSearches[] = $aNewSearch; - $bSpecialTerms = true; + $oNewSearch = clone $oSearch; + $oNewSearch->setPoiSearch( + Operator::TYPE, + $aSearchTerm['class'], + $aSearchTerm['type'], + ); + $aNewSearches[] = $oNewSearch; } } $aSearches = $aNewSearches; @@ -1212,10 +1195,10 @@ class Geocode foreach ($aGroupedSearches as $aSearches) { foreach ($aSearches as $aSearch) { - if ($aSearch['iSearchRank'] < $this->iMaxRank) { - if (!isset($aReverseGroupedSearches[$aSearch['iSearchRank']])) $aReverseGroupedSearches[$aSearch['iSearchRank']] = array(); - $aReverseGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; + if (!isset($aReverseGroupedSearches[$aSearch->getRank()])) { + $aReverseGroupedSearches[$aSearch->getRank()] = array(); } + $aReverseGroupedSearches[$aSearch->getRank()][] = $aSearch; } } @@ -1226,9 +1209,9 @@ class Geocode // Re-group the searches by their score, junk anything over 20 as just not worth trying $aGroupedSearches = array(); foreach ($aSearches as $aSearch) { - if ($aSearch['iSearchRank'] < $this->iMaxRank) { - if (!isset($aGroupedSearches[$aSearch['iSearchRank']])) $aGroupedSearches[$aSearch['iSearchRank']] = array(); - $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; + if ($aSearch->getRank() < $this->iMaxRank) { + if (!isset($aGroupedSearches[$aSearch->getRank()])) $aGroupedSearches[$aSearch->getRank()] = array(); + $aGroupedSearches[$aSearch->getRank()][] = $aSearch; } } ksort($aGroupedSearches); diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index e46dc464..f2785c1e 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -8,15 +8,17 @@ namespace Nominatim; abstract final class Operator { /// No operator selected. - const NONE = -1; + const NONE = 0; + /// Search for POI of the given type. + const TYPE = 1; /// Search for POIs near the given place. - const NEAR = 0; + const NEAR = 2; /// Search for POIS in the given place. - const IN = 1; + const IN = 3; /// Search for POIS named as given. - const NAME = 3; + const NAME = 4; /// Search for postcodes. - const POSTCODE = 4; + const POSTCODE = 5; } /** @@ -55,4 +57,56 @@ class SearchDescription /// Index of phrase currently processed private $iNamePhrase = -1; + + public getRank() + { + return $this->iSearchRank; + } + + /** + * Set the geographic search radius. + */ + public setNear(&$oNearPoint) + { + $this->oNearPoint = $oNearPoint; + } + + public setPoiSearch($iOperator, $sClass, $sType) + { + $this->iOperator = $iOperator; + $this->sClass = $sClass; + $this->sType = $sType; + } + + public hasOperator() + { + return $this->iOperator != Operator::NONE; + } + + /** + * Extract special terms from the query, amend the search + * and return the shortended query. + * + * Only the first special term found will be used but all will + * be removed from the query. + */ + public extractKeyValuePairs(&$oDB, $sQuery) + { + // Search for terms of kind [=]. + preg_match_all( + '/\\[([\\w_]*)=([\\w_]*)\\]/', + $sQuery, + $aSpecialTermsRaw, + PREG_SET_ORDER + ); + + foreach ($aSpecialTermsRaw as $aTerm) { + $sQuery = str_replace($aTerm[0], ' ', $sQuery); + if (!$this->hasOperator()) { + $this->setPoiSearch(Operator::TYPE, $aTerm[1], $aTerm[2]); + } + } + + return $sQuery; + } }; From 96b6a1a41892224b79fb99917981843aef6a4465 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 7 Oct 2017 12:01:56 +0200 Subject: [PATCH 03/19] use SearchDescription class in query loop --- lib/Geocode.php | 493 +++++---------------------------- lib/SearchDescription.php | 559 +++++++++++++++++++++++++++++++++++++- 2 files changed, 625 insertions(+), 427 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index 88a969a5..f65a485a 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -1237,440 +1237,89 @@ class Geocode $iQueryLoop = 0; foreach ($aGroupedSearches as $iGroupedRank => $aSearches) { $iGroupLoop++; - foreach ($aSearches as $aSearch) { + foreach ($aSearches as $oSearch) { $iQueryLoop++; $searchedHousenumber = -1; if (CONST_Debug) echo "
Search Loop, group $iGroupLoop, loop $iQueryLoop"; - if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens); + if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($oSearch)), $aValidTokens); - // No location term? - if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress'])) { - if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber'] && !$aSearch['oNear']) { - // Just looking for a country by code - look it up - if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) { - $sSQL = "SELECT place_id FROM placex WHERE country_code='".$aSearch['sCountryCode']."' AND rank_search = 4"; - if ($bBoundingBoxSearch) - $sSQL .= " AND _st_intersects($this->sViewboxSmallSQL, geometry)"; - $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1"; - if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); - } else { - $aPlaceIDs = array(); - } - } else { - if (!$bBoundingBoxSearch && !$aSearch['oNear']) continue; - if (!$aSearch['sClass']) continue; - - $sSQL = "SELECT COUNT(*) FROM pg_tables WHERE tablename = 'place_classtype_".$aSearch['sClass']."_".$aSearch['sType']."'"; - if (chksql($this->oDB->getOne($sSQL))) { - $sSQL = "SELECT place_id FROM place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct"; - if ($sCountryCodesSQL) $sSQL .= " JOIN placex USING (place_id)"; - if ($aSearch['oNear']) { - $sSQL .= " WHERE ".$aSearch['oNear']->withinSQL('ct.centroid'); - } else { - $sSQL .= " WHERE st_contains($this->sViewboxSmallSQL, ct.centroid)"; - } - if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; - if (sizeof($this->aExcludePlaceIDs)) { - $sSQL .= " AND place_id not in (".join(',', $this->aExcludePlaceIDs).")"; - } - if ($this->sViewboxCentreSQL) { - $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, ct.centroid) ASC"; - } elseif ($aSearch['oNear']) { - $sSQL .= " ORDER BY ".$aSearch['oNear']->distanceSQL('ct.centroid').' ASC'; - } - $sSQL .= " limit $this->iLimit"; - if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); - } else if ($aSearch['oNear']) { - $sSQL = "SELECT place_id "; - $sSQL .= "FROM placex "; - $sSQL .= "WHERE class='".$aSearch['sClass']."' "; - $sSQL .= " AND type='".$aSearch['sType']."'"; - $sSQL .= " AND ".$aSearch['oNear']->withinSQL('geometry'); - $sSQL .= " AND linked_place_id is null"; - if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; - $sSQL .= " ORDER BY ".$aSearch['oNear']->distanceSQL('centroid')." ASC"; - $sSQL .= " LIMIT $this->iLimit"; - if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); - } - } - } elseif ($aSearch['oNear'] && !sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['sClass']) { - // If a coordinate is given, the search must either - // be for a name or a special search. Ignore everythin else. - $aPlaceIDs = array(); - } elseif ($aSearch['sOperator'] == 'postcode') { - $sSQL = "SELECT p.place_id FROM location_postcode p "; - if (sizeof($aSearch['aAddress'])) { - $sSQL .= ", search_name s "; - $sSQL .= "WHERE s.place_id = p.parent_place_id "; - $sSQL .= "AND array_cat(s.nameaddress_vector, s.name_vector) @> ARRAY[".join($aSearch['aAddress'], ",")."] AND "; - } else { - $sSQL .= " WHERE "; - } - $sSQL .= "p.postcode = '".pg_escape_string(reset($aSearch['aName']))."'"; - if ($aSearch['sCountryCode']) { - $sSQL .= " AND p.country_code = '".$aSearch['sCountryCode']."'"; - } elseif ($sCountryCodesSQL) { - $sSQL .= " AND p.country_code in ($sCountryCodesSQL)"; - } - $sSQL .= " LIMIT $this->iLimit"; - if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); - } else { - $aPlaceIDs = array(); - - // First we need a position, either aName or fLat or both - $aTerms = array(); - $aOrder = array(); - - if ($aSearch['sHouseNumber'] && sizeof($aSearch['aAddress'])) { - $sHouseNumberRegex = '\\\\m'.$aSearch['sHouseNumber'].'\\\\M'; - $aOrder[] = ""; - $aOrder[0] = " ("; - $aOrder[0] .= " EXISTS("; - $aOrder[0] .= " SELECT place_id "; - $aOrder[0] .= " FROM placex "; - $aOrder[0] .= " WHERE parent_place_id = search_name.place_id"; - $aOrder[0] .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."' "; - $aOrder[0] .= " LIMIT 1"; - $aOrder[0] .= " ) "; - // also housenumbers from interpolation lines table are needed - $aOrder[0] .= " OR EXISTS("; - $aOrder[0] .= " SELECT place_id "; - $aOrder[0] .= " FROM location_property_osmline "; - $aOrder[0] .= " WHERE parent_place_id = search_name.place_id"; - $aOrder[0] .= " AND startnumber is not NULL"; - $aOrder[0] .= " AND ".intval($aSearch['sHouseNumber']).">=startnumber "; - $aOrder[0] .= " AND ".intval($aSearch['sHouseNumber'])."<=endnumber "; - $aOrder[0] .= " LIMIT 1"; - $aOrder[0] .= " )"; - $aOrder[0] .= " )"; - $aOrder[0] .= " DESC"; - } - - // TODO: filter out the pointless search terms (2 letter name tokens and less) - // they might be right - but they are just too darned expensive to run - if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'], ",")."]"; - if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]"; - if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) { - // For infrequent name terms disable index usage for address - if (CONST_Search_NameOnlySearchFrequencyThreshold - && sizeof($aSearch['aName']) == 1 - && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold - ) { - $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]"; - } else { - $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'], ",")."]"; - if (sizeof($aSearch['aAddressNonSearch'])) { - $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'], ",")."]"; - } - } - } - if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'"; - if ($aSearch['sHouseNumber']) { - $aTerms[] = "address_rank between 16 and 27"; - } elseif (!$aSearch['sClass'] || $aSearch['sOperator'] == 'name') { - if ($this->iMinAddressRank > 0) { - $aTerms[] = "address_rank >= ".$this->iMinAddressRank; - } - if ($this->iMaxAddressRank < 30) { - $aTerms[] = "address_rank <= ".$this->iMaxAddressRank; - } - } - if ($aSearch['oNear']) { - $aTerms[] = $aSearch['oNear']->withinSQL('centroid'); - - $aOrder[] = $aSearch['oNear']->distanceSQL('centroid'); - } elseif ($aSearch['sPostcode']) { - if (!sizeof($aSearch['aAddress'])) { - $aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."' AND ST_DWithin(search_name.centroid, p.geometry, 0.1))"; - } else { - $aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."')"; - } - } - if (sizeof($this->aExcludePlaceIDs)) { - $aTerms[] = "place_id not in (".join(',', $this->aExcludePlaceIDs).")"; - } - if ($sCountryCodesSQL) { - $aTerms[] = "country_code in ($sCountryCodesSQL)"; - } - - if ($bBoundingBoxSearch) $aTerms[] = "centroid && $this->sViewboxSmallSQL"; - if ($oNearPoint) { - $aOrder[] = $oNearPoint->distanceSQL('centroid'); - } - - if ($aSearch['sHouseNumber']) { - $sImportanceSQL = '- abs(26 - address_rank) + 3'; - } else { - $sImportanceSQL = '(CASE WHEN importance = 0 OR importance IS NULL THEN 0.75-(search_rank::float/40) ELSE importance END)'; - } - if ($this->sViewboxSmallSQL) $sImportanceSQL .= " * CASE WHEN ST_Contains($this->sViewboxSmallSQL, centroid) THEN 1 ELSE 0.5 END"; - if ($this->sViewboxLargeSQL) $sImportanceSQL .= " * CASE WHEN ST_Contains($this->sViewboxLargeSQL, centroid) THEN 1 ELSE 0.5 END"; - - $aOrder[] = "$sImportanceSQL DESC"; - if (sizeof($aSearch['aFullNameAddress'])) { - $sExactMatchSQL = ' ( '; - $sExactMatchSQL .= ' SELECT count(*) FROM ( '; - $sExactMatchSQL .= ' SELECT unnest(ARRAY['.join($aSearch['aFullNameAddress'], ",").']) '; - $sExactMatchSQL .= ' INTERSECT '; - $sExactMatchSQL .= ' SELECT unnest(nameaddress_vector)'; - $sExactMatchSQL .= ' ) s'; - $sExactMatchSQL .= ') as exactmatch'; - $aOrder[] = 'exactmatch DESC'; - } else { - $sExactMatchSQL = '0::int as exactmatch'; - } - - if (sizeof($aTerms)) { - $sSQL = "SELECT place_id, "; - $sSQL .= $sExactMatchSQL; - $sSQL .= " FROM search_name"; - $sSQL .= " WHERE ".join(' and ', $aTerms); - $sSQL .= " ORDER BY ".join(', ', $aOrder); - if ($aSearch['sHouseNumber'] || $aSearch['sClass']) { - $sSQL .= " LIMIT 20"; - } elseif (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && $aSearch['sClass']) { - $sSQL .= " LIMIT 1"; - } else { - $sSQL .= " LIMIT ".$this->iLimit; - } - - if (CONST_Debug) var_dump($sSQL); - $aViewBoxPlaceIDs = chksql( - $this->oDB->getAll($sSQL), - "Could not get places for search terms." + $aPlaceIDs = array(); + if ($oSearch->isCountrySearch()) { + // Just looking for a country - look it up + if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) { + $aPlaceIDs = $oSearch->queryCountry( + $this->oDB, + $bBoundingBoxSearch ? $this->sViewboxSmallSQL : '' ); - //var_dump($aViewBoxPlaceIDs); - // Did we have an viewbox matches? - $aPlaceIDs = array(); - $bViewBoxMatch = false; - foreach ($aViewBoxPlaceIDs as $aViewBoxRow) { - //if ($bViewBoxMatch == 1 && $aViewBoxRow['in_small'] == 'f') break; - //if ($bViewBoxMatch == 2 && $aViewBoxRow['in_large'] == 'f') break; - //if ($aViewBoxRow['in_small'] == 't') $bViewBoxMatch = 1; - //else if ($aViewBoxRow['in_large'] == 't') $bViewBoxMatch = 2; - $aPlaceIDs[] = $aViewBoxRow['place_id']; - $this->exactMatchCache[$aViewBoxRow['place_id']] = $aViewBoxRow['exactmatch']; + } + } elseif (!$oSearch->isNamedSearch()) { + // looking for a POI in a geographic area + if (!$bBoundingBoxSearch && !$oSearch->isNearSearch()) { + continue; + } + + $aPlaceIDs = $oSearch->queryNearbyPoi( + $this->oDB, + $sCountryCodesSQL, + $bBoundingBoxSearch ? $this->sViewboxSmallSQL : '', + $sViewboxCentreSQL, + $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '', + $this->iLimit + ); + } elseif ($oSearch->isOperator(Operator::POSTCODE)) { + $aPlaceIDs = $oSearch->queryPostcode( + $oDB, + $sCountryCodesSQL, + $this->iLimit + ); + } else { + // Ordinary search: + // First search for places according to name and address. + $aNamedPlaceIDs = $oSearch->queryNamedPlace( + $this->oDB, + $aWordFrequencyScores, + $sCountryCodesSQL, + $this->iMinAddressRank, + $this->iMaxAddressRank, + $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '', + $bBoundingBoxSearch ? $this->sViewboxSmallSQL : '', + $bBoundingBoxSearch ? $this->sViewboxLargeSQL : '', + $this->iLimit + ); + + if (sizeof($aNamedPlaceIDs)) { + foreach ($aNamedPlaceIDs as $aRow) { + $aPlaceIDs[] = $aRow['place_id']; + $this->exactMatchCache[$aRow['place_id']] = $aRow['exactmatch']; } } - //var_Dump($aPlaceIDs); - //exit; //now search for housenumber, if housenumber provided - if ($aSearch['sHouseNumber'] && sizeof($aPlaceIDs)) { - $searchedHousenumber = intval($aSearch['sHouseNumber']); - $aRoadPlaceIDs = $aPlaceIDs; - $sPlaceIDs = join(',', $aPlaceIDs); + if ($oSearch->hasHouseNumber() && sizeof($aPlaceIDs)) { + $aResult = $oSearch->queryHouseNumber( + $this->oDB, + $aPlaceIDs, + $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '' + $this->iLimit + ); - // Now they are indexed, look for a house attached to a street we found - $sHouseNumberRegex = '\\\\m'.$aSearch['sHouseNumber'].'\\\\M'; - $sSQL = "SELECT place_id FROM placex "; - $sSQL .= "WHERE parent_place_id in (".$sPlaceIDs.") and transliteration(housenumber) ~* E'".$sHouseNumberRegex."'"; - if (sizeof($this->aExcludePlaceIDs)) { - $sSQL .= " AND place_id not in (".join(',', $this->aExcludePlaceIDs).")"; + if (sizeof($aResult)) { + $searchedHousenumber = $aResult['iHouseNumber']; + $aPlaceIDs = $aResults['aPlaceIDs']; + } elseif (!$oSearch->looksLikeFullAddress()) { + $aPlaceIDs = array(); } - $sSQL .= " LIMIT $this->iLimit"; - if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); - - // if nothing found, search in the interpolation line table - if (!sizeof($aPlaceIDs)) { - // do we need to use transliteration and the regex for housenumbers??? - //new query for lines, not housenumbers anymore - $sSQL = "SELECT distinct place_id FROM location_property_osmline"; - $sSQL .= " WHERE startnumber is not NULL and parent_place_id in (".$sPlaceIDs.") and ("; - if ($searchedHousenumber%2 == 0) { - //if housenumber is even, look for housenumber in streets with interpolationtype even or all - $sSQL .= "interpolationtype='even'"; - } else { - //look for housenumber in streets with interpolationtype odd or all - $sSQL .= "interpolationtype='odd'"; - } - $sSQL .= " or interpolationtype='all') and "; - $sSQL .= $searchedHousenumber.">=startnumber and "; - $sSQL .= $searchedHousenumber."<=endnumber"; - - if (sizeof($this->aExcludePlaceIDs)) { - $sSQL .= " AND place_id not in (".join(',', $this->aExcludePlaceIDs).")"; - } - //$sSQL .= " limit $this->iLimit"; - if (CONST_Debug) var_dump($sSQL); - //get place IDs - $aPlaceIDs = chksql($this->oDB->getCol($sSQL, 0)); - } - - // If nothing found try the aux fallback table - if (CONST_Use_Aux_Location_data && !sizeof($aPlaceIDs)) { - $sSQL = "SELECT place_id FROM location_property_aux "; - $sSQL .= " WHERE parent_place_id in (".$sPlaceIDs.") "; - $sSQL .= " AND housenumber = '".pg_escape_string($aSearch['sHouseNumber'])."'"; - if (sizeof($this->aExcludePlaceIDs)) { - $sSQL .= " AND parent_place_id not in (".join(',', $this->aExcludePlaceIDs).")"; - } - //$sSQL .= " limit $this->iLimit"; - if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); - } - - //if nothing was found in placex or location_property_aux, then search in Tiger data for this housenumber(location_property_tiger) - if (CONST_Use_US_Tiger_Data && !sizeof($aPlaceIDs)) { - $sSQL = "SELECT distinct place_id FROM location_property_tiger"; - $sSQL .= " WHERE parent_place_id in (".$sPlaceIDs.") and ("; - if ($searchedHousenumber%2 == 0) { - $sSQL .= "interpolationtype='even'"; - } else { - $sSQL .= "interpolationtype='odd'"; - } - $sSQL .= " or interpolationtype='all') and "; - $sSQL .= $searchedHousenumber.">=startnumber and "; - $sSQL .= $searchedHousenumber."<=endnumber"; - - if (sizeof($this->aExcludePlaceIDs)) { - $sSQL .= " AND place_id not in (".join(',', $this->aExcludePlaceIDs).")"; - } - //$sSQL .= " limit $this->iLimit"; - if (CONST_Debug) var_dump($sSQL); - //get place IDs - $aPlaceIDs = chksql($this->oDB->getCol($sSQL, 0)); - } - - // Fallback to the road (if no housenumber was found) - if (!sizeof($aPlaceIDs) && preg_match('/[0-9]+/', $aSearch['sHouseNumber']) - && ($aSearch['aAddress'] || $aSearch['sCountryCode'])) { - $aPlaceIDs = $aRoadPlaceIDs; - //set to -1, if no housenumbers were found - $searchedHousenumber = -1; - } - //else: housenumber was found, remains saved in searchedHousenumber } - - if ($aSearch['sClass'] && sizeof($aPlaceIDs)) { - $sPlaceIDs = join(',', $aPlaceIDs); - $aClassPlaceIDs = array(); - - if (!$aSearch['sOperator'] || $aSearch['sOperator'] == 'name') { - // If they were searching for a named class (i.e. 'Kings Head pub') then we might have an extra match - $sSQL = "SELECT place_id "; - $sSQL .= " FROM placex "; - $sSQL .= " WHERE place_id in ($sPlaceIDs) "; - $sSQL .= " AND class='".$aSearch['sClass']."' "; - $sSQL .= " AND type='".$aSearch['sType']."'"; - $sSQL .= " AND linked_place_id is null"; - if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; - $sSQL .= " ORDER BY rank_search ASC "; - $sSQL .= " LIMIT $this->iLimit"; - if (CONST_Debug) var_dump($sSQL); - $aClassPlaceIDs = chksql($this->oDB->getCol($sSQL)); - } - - if (!$aSearch['sOperator'] || $aSearch['sOperator'] == 'near') { // & in - $sClassTable = 'place_classtype_'.$aSearch['sClass'].'_'.$aSearch['sType']; - $sSQL = "SELECT count(*) FROM pg_tables "; - $sSQL .= "WHERE tablename = '$sClassTable'"; - $bCacheTable = chksql($this->oDB->getOne($sSQL)); - - $sSQL = "SELECT min(rank_search) FROM placex WHERE place_id in ($sPlaceIDs)"; - - if (CONST_Debug) var_dump($sSQL); - $this->iMaxRank = ((int)chksql($this->oDB->getOne($sSQL))); - - // For state / country level searches the normal radius search doesn't work very well - $sPlaceGeom = false; - if ($this->iMaxRank < 9 && $bCacheTable) { - // Try and get a polygon to search in instead - $sSQL = "SELECT geometry "; - $sSQL .= " FROM placex"; - $sSQL .= " WHERE place_id in ($sPlaceIDs)"; - $sSQL .= " AND rank_search < $this->iMaxRank + 5"; - $sSQL .= " AND ST_Geometrytype(geometry) in ('ST_Polygon','ST_MultiPolygon')"; - $sSQL .= " ORDER BY rank_search ASC "; - $sSQL .= " LIMIT 1"; - if (CONST_Debug) var_dump($sSQL); - $sPlaceGeom = chksql($this->oDB->getOne($sSQL)); - } - - if ($sPlaceGeom) { - $sPlaceIDs = false; - } else { - $this->iMaxRank += 5; - $sSQL = "SELECT place_id FROM placex WHERE place_id in ($sPlaceIDs) and rank_search < $this->iMaxRank"; - if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); - $sPlaceIDs = join(',', $aPlaceIDs); - } - - if ($sPlaceIDs || $sPlaceGeom) { - $fRange = 0.01; - if ($bCacheTable) { - // More efficient - can make the range bigger - $fRange = 0.05; - - $sOrderBySQL = ''; - if ($oNearPoint) { - $sOrderBySQL = $oNearPoint->distanceSQL('l.centroid'); - } elseif ($sPlaceIDs) { - $sOrderBySQL = "ST_Distance(l.centroid, f.geometry)"; - } elseif ($sPlaceGeom) { - $sOrderBySQL = "ST_Distance(st_centroid('".$sPlaceGeom."'), l.centroid)"; - } - - $sSQL = "select distinct i.place_id".($sOrderBySQL?', i.order_term':'')." from ("; - $sSQL .= "select l.place_id".($sOrderBySQL?','.$sOrderBySQL.' as order_term':'')." from ".$sClassTable." as l"; - if ($sCountryCodesSQL) $sSQL .= " join placex as lp using (place_id)"; - if ($sPlaceIDs) { - $sSQL .= ",placex as f where "; - $sSQL .= "f.place_id in ($sPlaceIDs) and ST_DWithin(l.centroid, f.centroid, $fRange) "; - } - if ($sPlaceGeom) { - $sSQL .= " where "; - $sSQL .= "ST_Contains('".$sPlaceGeom."', l.centroid) "; - } - if (sizeof($this->aExcludePlaceIDs)) { - $sSQL .= " and l.place_id not in (".join(',', $this->aExcludePlaceIDs).")"; - } - if ($sCountryCodesSQL) $sSQL .= " and lp.country_code in ($sCountryCodesSQL)"; - $sSQL .= 'limit 300) i '; - if ($sOrderBySQL) $sSQL .= "order by order_term asc"; - if ($this->iOffset) $sSQL .= " offset $this->iOffset"; - $sSQL .= " limit $this->iLimit"; - if (CONST_Debug) var_dump($sSQL); - $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($this->oDB->getCol($sSQL))); - } else { - if ($aSearch['oNear']) { - $fRange = $aSearch['oNear']->radius(); - } - - $sOrderBySQL = ''; - if ($oNearPoint) { - $sOrderBySQL = $oNearPoint->distanceSQL('l.geometry'); - } else { - $sOrderBySQL = "ST_Distance(l.geometry, f.geometry)"; - } - - $sSQL = "SELECT distinct l.place_id".($sOrderBySQL?','.$sOrderBySQL:''); - $sSQL .= " FROM placex as l, placex as f "; - $sSQL .= " WHERE f.place_id in ($sPlaceIDs) "; - $sSQL .= " AND ST_DWithin(l.geometry, f.centroid, $fRange) "; - $sSQL .= " AND l.class='".$aSearch['sClass']."' "; - $sSQL .= " AND l.type='".$aSearch['sType']."' "; - if (sizeof($this->aExcludePlaceIDs)) { - $sSQL .= " AND l.place_id not in (".join(',', $this->aExcludePlaceIDs).")"; - } - if ($sCountryCodesSQL) $sSQL .= " AND l.country_code in ($sCountryCodesSQL)"; - if ($sOrderBySQL) $sSQL .= "ORDER BY ".$sOrderBySQL." ASC"; - if ($this->iOffset) $sSQL .= " OFFSET $this->iOffset"; - $sSQL .= " limit $this->iLimit"; - if (CONST_Debug) var_dump($sSQL); - $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($this->oDB->getCol($sSQL))); - } - } - } - $aPlaceIDs = $aClassPlaceIDs; + // finally get POIs if requested + if ($oSearch->isPoiSearch() && sizeof($aPlaceIDs)) { + $aPlaceIDs = $oSearch->queryPoiByOperator( + $this->oDB, + $aPlaceIDs, + $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '' + $this->iLimit + ); } } @@ -1679,10 +1328,10 @@ class Geocode var_Dump($aPlaceIDs); } - if (sizeof($aPlaceIDs) && $aSearch['sPostcode']) { + if (sizeof($aPlaceIDs) && $oSearch->getPostcode()) { $sSQL = 'SELECT place_id FROM placex'; $sSQL .= ' WHERE place_id in ('.join(',', $aPlaceIDs).')'; - $sSQL .= " AND postcode = '".pg_escape_string($aSearch['sPostcode'])."'"; + $sSQL .= " AND postcode = '".$oSearch->getPostcode()."'"; if (CONST_Debug) var_dump($sSQL); $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL)); if ($aFilteredPlaceIDs) { diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index f2785c1e..99860ce0 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -58,27 +58,105 @@ class SearchDescription /// Index of phrase currently processed private $iNamePhrase = -1; - public getRank() + public function getRank() { return $this->iSearchRank; } + public function getPostCode() + { + return $this->sPostcode; + } + /** * Set the geographic search radius. */ - public setNear(&$oNearPoint) + public function setNear(&$oNearPoint) { $this->oNearPoint = $oNearPoint; } - public setPoiSearch($iOperator, $sClass, $sType) + public function setPoiSearch($iOperator, $sClass, $sType) { $this->iOperator = $iOperator; $this->sClass = $sClass; $this->sType = $sType; } - public hasOperator() + /** + * Check if name or address for the search are specified. + */ + public function isNamedSearch() + { + return sizeof($this->aName) > 0 || sizeof($this->aAddress) > 0; + } + + /** + * Check if only a country is requested. + */ + public function isCountrySearch() + { + return $this->sCountryCode && sizeof($this->aName) == 0 + && !$this->iOperator && !$this->oNear; + } + + /** + * Check if a search near a geographic location is requested. + */ + public function isNearSearch() + { + return (bool) $this->oNear; + } + + public function isPoiSearch() + { + return (bool) $this->sClass; + } + + public function looksLikeFullAddress() + { + return sizeof($this->aName) + && (sizeof($this->aAddress || $this->sCountryCode)) + && preg_match('/[0-9]+/', $this->sHouseNumber); + } + + public function isOperator($iType) + { + return $this->iOperator == $iType; + } + + public function hasHouseNumber() + { + return (bool) $this->sHouseNumber; + } + + public function poiTable() + { + return 'place_classtype_'.$this->sClass.'_'.$this->sType; + } + + public function addressArraySQL() + { + return 'ARRAY['.join(',', $this->aAddress).']'; + } + public function nameArraySQL() + { + return 'ARRAY['.join(',', $this->aName).']'; + } + + public function countryCodeSQL($sVar, $sCountryList) + { + if ($this->sCountryCode) { + return $sVar.' = \''.$this->sCountryCode."'"; + } + if ($sCountryList) { + return $sVar.' in ('.$this->sCountryCode.')'; + } + + return ''; + } + + public function hasOperator() { return $this->iOperator != Operator::NONE; } @@ -90,7 +168,7 @@ class SearchDescription * Only the first special term found will be used but all will * be removed from the query. */ - public extractKeyValuePairs(&$oDB, $sQuery) + public function extractKeyValuePairs($sQuery) { // Search for terms of kind [=]. preg_match_all( @@ -109,4 +187,475 @@ class SearchDescription return $sQuery; } + + public function queryCountry(&$oDB, $sViewboxSQL) + { + $sSQL = 'SELECT place_id FROM placex '; + $sSQL .= "WHERE country_code='".$this->sCountryCode."'"; + $sSQL .= ' AND rank_search = 4'; + if ($ViewboxSQL) { + $sSQL .= " AND ST_Intersects($sViewboxSQL, geometry)"; + } + $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1"; + + if (CONST_Debug) var_dump($sSQL); + + return chksql($oDB->getCol($sSQL)); + } + + public function queryNearbyPoi(&$oDB, $sCountryList, $sViewboxSQL, $sViewboxCentreSQL, $sExcludeSQL, $iLimit) + { + if (!$this->sClass) { + return array(); + } + + $sPoiTable = $this->poiTable(); + + $sSQL = 'SELECT count(*) FROM pg_tables WHERE tablename = \''.$sPoiTable."'"; + if (chksql($oDB->getOne($sSQL))) { + $sSQL = 'SELECT place_id FROM '.$sPoiTable.' ct'; + if ($sCountryList) { + $sSQL .= ' JOIN placex USING (place_id)'; + } + if ($this->oNearPoint) { + $sSQL .= ' WHERE '.$this->oNearPoint->withinSQL('ct.centroid'); + } else { + $sSQL .= " WHERE ST_Contains($sViewboxSQL, ct.centroid)"; + } + if ($sCountryList) { + $sSQL .= " AND country_code in ($sCountryList)"; + } + if ($sExcludeSQL) { + $sSQL .= ' AND place_id not in ('.$sExcludeSQL.')'; + } + if ($sViewboxCentreSQL) { + $sSQL .= " ORDER BY ST_Distance($sViewboxCentreSQL, ct.centroid) ASC"; + } elseif ($this->oNearPoint) { + $sSQL .= ' ORDER BY '.$this->oNearPoint->distanceSQL('ct.centroid').' ASC'; + } + $sSQL .= " limit $iLimit"; + if (CONST_Debug) var_dump($sSQL); + return chksql($this->oDB->getCol($sSQL)); + } + + if ($this->oNearPoint) { + $sSQL = 'SELECT place_id FROM placex WHERE '; + $sSQL .= 'class=\''.$this->sClass."' and type='".$this->sType."'"; + $sSQL .= ' AND '.$this->oNearPoint->withinSQL('geometry'); + $sSQL .= ' AND linked_place_id is null'; + if ($sCountryList) { + $sSQL .= " AND country_code in ($sCountryList)"; + } + $sSQL .= ' ORDER BY '.$this->oNearPoint->distanceSQL('centroid')." ASC"; + $sSQL .= " LIMIT $iLimit"; + if (CONST_Debug) var_dump($sSQL); + return chksql($this->oDB->getCol($sSQL)); + } + + return array(); + } + + public function queryPostcode(&$oDB, $sCountryList, $iLimit) + { + $sSQL = 'SELECT p.place_id FROM location_postcode p '; + + if (sizeof($this->aAddress)) { + $sSQL .= ', search_name s '; + $sSQL .= 'WHERE s.place_id = p.parent_place_id '; + $sSQL .= 'AND array_cat(s.nameaddress_vector, s.name_vector)'; + $sSQL .= ' @> '.$this->addressArraySQL().' AND '; + } else { + $sSQL .= 'WHERE '; + } + + $sSQL .= "p.postcode = '".pg_escape_string(reset($this->$aName))."'"; + $sCountryTerm = $this->countryCodeSQL('p.country_code', $sCountryList); + if ($sCountryTerm) { + $sSQL .= ' AND '.$sCountyTerm; + } + $sSQL .= " LIMIT $iLimit"; + + if (CONST_Debug) var_dump($sSQL); + + return chksql($this->oDB->getCol($sSQL)); + } + + public function queryNamedPlace(&$oDB, $aWordFrequencyScores, $sCountryList, $iMinAddressRank, $iMaxAddressRank, $sExcludeSQL, $sViewboxSmall, $sViewboxLarge, $iLimit) + { + $aTerms = array(); + $aOrder = array(); + + if ($this->sHouseNumber && sizeof($this->aAddress)) { + $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M'; + $aOrder[] = ' ('; + $aOrder[0] .= 'EXISTS('; + $aOrder[0] .= ' SELECT place_id'; + $aOrder[0] .= ' FROM placex'; + $aOrder[0] .= ' WHERE parent_place_id = search_name.place_id'; + $aOrder[0] .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'"; + $aOrder[0] .= ' LIMIT 1'; + $aOrder[0] .= ') '; + // also housenumbers from interpolation lines table are needed + if (preg_match('/[0-9]+/', $this->sHouseNumber)) { + $iHouseNumber = intval($this->sHouseNumber); + $aOrder[0] .= 'OR EXISTS('; + $aOrder[0] .= ' SELECT place_id '; + $aOrder[0] .= ' FROM location_property_osmline '; + $aOrder[0] .= ' WHERE parent_place_id = search_name.place_id'; + $aOrder[0] .= ' AND startnumber is not NULL'; + $aOrder[0] .= ' AND '.$iHouseNumber.'>=startnumber '; + $aOrder[0] .= ' AND '.$iHouseNumber.'<=endnumber '; + $aOrder[0] .= ' LIMIT 1'; + $aOrder[0] .= ')'; + } + $aOrder[0] .= ') DESC'; + } + + if (sizeof($this->aName)) { + $aTerms[] = 'name_vector @> '.$this->nameArraySQL(); + } + if (sizeof($this->aAddress)) { + // For infrequent name terms disable index usage for address + if (CONST_Search_NameOnlySearchFrequencyThreshold + && sizeof($this->aName) == 1 + && $aWordFrequencyScores[$this->aName[reset($this->aName)]] + < CONST_Search_NameOnlySearchFrequencyThreshold + ) { + $aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.$this->addressArraySQL(); + } else { + $aTerms[] = 'nameaddress_vector @> '.$this->addressArraySQL(); + } + } + + $sCountryTerm = $this->countryCodeSQL('p.country_code', $sCountryList); + if ($sCountryTerm) { + $aTerms[] = $sCountryTerm; + } + + if ($this->sHouseNumber) { + $aTerms[] = "address_rank between 16 and 27"; + } elseif (!$this->sClass || $this->iOperator == Operator::NAME) { + if ($iMinAddressRank > 0) { + $aTerms[] = "address_rank >= ".$iMinAddressRank; + } + if ($iMaxAddressRank < 30) { + $aTerms[] = "address_rank <= ".$iMaxAddressRank; + } + } + + if ($this->oNearPoint) { + $aTerms[] = $this->oNearPoint->withinSQL('centroid'); + $aOrder[] = $this->oNearPoint->distanceSQL('centroid'); + } elseif ($this->sPostcode) { + if (!sizeof($this->aAddress)) { + $aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."' AND ST_DWithin(search_name.centroid, p.geometry, 0.1))"; + } else { + $aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."')"; + } + } + + if ($sExcludeSQL) { + $aTerms = 'place_id not in ('.$sExcludeSQL.')'; + } + + if ($sViewboxSmall) { + $aTerms[] = 'centroid && '.$sViewboxSmall; + } + + if ($this->oNearPoint) { + $aOrder[] = $this->oNearPoint->distanceSQL('centroid'); + } + + if ($this->sHouseNumber) { + $sImportanceSQL = '- abs(26 - address_rank) + 3'; + } else { + $sImportanceSQL = '(CASE WHEN importance = 0 OR importance IS NULL THEN 0.75-(search_rank::float/40) ELSE importance END)'; + } + if ($sViewboxSmall) { + $sImportanceSQL .= " * CASE WHEN ST_Contains($sViewboxSmall, centroid) THEN 1 ELSE 0.5 END"; + } + if ($sViewboxLarge) { + $sImportanceSQL .= " * CASE WHEN ST_Contains($sViewboxLarge, centroid) THEN 1 ELSE 0.5 END"; + } + $aOrder[] = "$sImportanceSQL DESC"; + + if (sizeof($this->aFullNameAddress)) { + $sExactMatchSQL = ' ( '; + $sExactMatchSQL .= ' SELECT count(*) FROM ( '; + $sExactMatchSQL .= ' SELECT unnest(ARRAY['.join($this->aFullNameAddress, ",").']) '; + $sExactMatchSQL .= ' INTERSECT '; + $sExactMatchSQL .= ' SELECT unnest(nameaddress_vector)'; + $sExactMatchSQL .= ' ) s'; + $sExactMatchSQL .= ') as exactmatch'; + $aOrder[] = 'exactmatch DESC'; + } else { + $sExactMatchSQL = '0::int as exactmatch'; + } + + if ($this->sHouseNumber || $this->sClass) { + $iLimit = 20; + } + + if (sizeof($aTerms)) { + $sSQL = 'SELECT place_id,'.$sExactMatchSQL; + $sSQL .= ' FROM search_name'; + $sSQL .= ' WHERE '.join(' and ', $aTerms); + $sSQL .= ' ORDER BY '.join(', ', $aOrder); + $sSQL .= ' LIMIT '.$iLimit; + + if (CONST_Debug) var_dump($sSQL); + + return chksql( + $this->oDB->getAll($sSQL), + "Could not get places for search terms." + ); + } + + return array(); + } + + + public function queryHouseNumber(&$oDB, $aRoadPlaceIDs, $sExcludeSQL, $iLimit) + { + $sPlaceIDs = join(',', $aRoadPlaceIDs); + + $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M'; + $sSQL = 'SELECT place_id FROM placex '; + $sSQL .= 'WHERE parent_place_id in ('.$sPlaceIDs.')'; + $sSQL .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'"; + if ($sExcludeSQL) { + $sSQL .= ' AND place_id not in ('.$sExcludeSQL.')'; + } + $sSQL .= " LIMIT $iLimit"; + + if (CONST_Debug) var_dump($sSQL); + + $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); + + if (sizeof($aPlaceIDs)) { + return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => -1); + } + + $bIsIntHouseNumber= (bool) preg_match('/[0-9]+/', $this->sHouseNumber); + $iHousenumber = intval($this->sHouseNumber); + if ($bIsIntHouseNumber) { + // if nothing found, search in the interpolation line table + $sSQL = 'SELECT distinct place_id FROM location_property_osmline'; + $sSQL .= ' WHERE startnumber is not NULL'; + $sSQL .= ' AND parent_place_id in ('.$sPlaceIDs.') AND ('; + if ($iHousenumber % 2 == 0) { + // If housenumber is even, look for housenumber in streets + // with interpolationtype even or all. + $sSQL .= "interpolationtype='even'"; + } else { + // Else look for housenumber with interpolationtype odd or all. + $sSQL .= "interpolationtype='odd'"; + } + $sSQL .= " or interpolationtype='all') and "; + $sSQL .= $iHousenumber.">=startnumber and "; + $sSQL .= $iHousenumber."<=endnumber"; + + if ($sExcludeSQL)) { + $sSQL .= ' AND place_id not in ('.$sExcludeSQL.')'; + } + $sSQL .= " limit $iLimit"; + + if (CONST_Debug) var_dump($sSQL); + + $aPlaceIDs = chksql($this->oDB->getCol($sSQL, 0)); + + if (sizeof($aPlaceIDs)) { + return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => $iHousenumber); + } + } + + // If nothing found try the aux fallback table + if (CONST_Use_Aux_Location_data) { + $sSQL = 'SELECT place_id FROM location_property_aux'; + $sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.')'; + $sSQL .= " AND housenumber = '".$this->sHouseNumber."'"; + if ($sExcludeSQL) { + $sSQL .= " AND place_id not in ($sExcludeSQL)"; + } + $sSQL .= " limit $iLimit"; + + if (CONST_Debug) var_dump($sSQL); + + $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); + + if (sizeof($aPlaceIDs)) { + return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => -1); + } + } + + // If nothing found then search in Tiger data (location_property_tiger) + if (CONST_Use_US_Tiger_Data && $bIsIntHouseNumber) { + $sSQL = 'SELECT distinct place_id FROM location_property_tiger'; + $sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.') and ('; + if ($iHousenumber % 2 == 0) { + $sSQL .= "interpolationtype='even'"; + } else { + $sSQL .= "interpolationtype='odd'"; + } + $sSQL .= " or interpolationtype='all') and "; + $sSQL .= $iHousenumber.">=startnumber and "; + $sSQL .= $iHousenumber."<=endnumber"; + + if ($sExcludeSQL) { + $sSQL .= ' AND place_id not in ('.$sExcludeSQL.')'; + } + $sSQL .= " limit $iLimit"; + + if (CONST_Debug) var_dump($sSQL); + + $aPlaceIDs = chksql($this->oDB->getCol($sSQL, 0)); + + if (sizeof($aPlaceIDs)) { + return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => $iHousenumber); + } + } + + return array(); + } + + + public function queryPoiByOperator(&$oDB, $aParentIDs, $sExcludeSQL, $iLimit) + { + $sPlaceIDs = join(',', $aParentIDs); + $aClassPlaceIDs = array(); + + if ($this->iOperator == Operator::TYPE || $this->iOperator == Operator::NAME) { + // If they were searching for a named class (i.e. 'Kings Head pub') + // then we might have an extra match + $sSQL = 'SELECT place_id FROM placex '; + $sSQL .= " WHERE place_id in ($sPlaceIDs)"; + $sSQL .= " AND class='".$this->sClass."' "; + $sSQL .= " AND type='".$this->sType."'"; + $sSQL .= " AND linked_place_id is null"; + $sSQL .= " ORDER BY rank_search ASC "; + $sSQL .= " LIMIT $iLimit"; + + if (CONST_Debug) var_dump($sSQL); + + $aClassPlaceIDs = chksql($this->oDB->getCol($sSQL)); + } + + // NEAR and IN are handled the same + if ($this->iOperator == Operator::TYPE || $this->iOperator == Operator::NEAR) { + $sClassTable = $this->poiTable(); + $sSQL = "SELECT count(*) FROM pg_tables WHERE tablename = '$sClassTable'"; + $bCacheTable = (bool) chksql($this->oDB->getOne($sSQL)); + + $sSQL = "SELECT min(rank_search) FROM placex WHERE place_id in ($sPlaceIDs)"; + if (CONST_Debug) var_dump($sSQL); + $iMaxRank = (int)chksql($this->oDB->getOne($sSQL)); + + // For state / country level searches the normal radius search doesn't work very well + $sPlaceGeom = false; + if ($iMaxRank < 9 && $bCacheTable) { + // Try and get a polygon to search in instead + $sSQL = 'SELECT geometry FROM placex'; + $sSQL .= " WHERE place_id in ($sPlaceIDs)"; + $sSQL .= " AND rank_search < $iMaxRank + 5"; + $sSQL .= " AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')"; + $sSQL .= " ORDER BY rank_search ASC "; + $sSQL .= " LIMIT 1"; + if (CONST_Debug) var_dump($sSQL); + $sPlaceGeom = chksql($this->oDB->getOne($sSQL)); + } + + if ($sPlaceGeom) { + $sPlaceIDs = false; + } else { + $iMaxRank += 5; + $sSQL = 'SELECT place_id FROM placex'; + $sSQL .= " WHERE place_id in ($sPlaceIDs) and rank_search < $iMaxRank"; + if (CONST_Debug) var_dump($sSQL); + $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); + $sPlaceIDs = join(',', $aPlaceIDs); + } + + if ($sPlaceIDs || $sPlaceGeom) { + $fRange = 0.01; + if ($bCacheTable) { + // More efficient - can make the range bigger + $fRange = 0.05; + + $sOrderBySQL = ''; + if ($this->oNearPoint) { + $sOrderBySQL = $this->oNearPoint->distanceSQL('l.centroid'); + } elseif ($sPlaceIDs) { + $sOrderBySQL = "ST_Distance(l.centroid, f.geometry)"; + } elseif ($sPlaceGeom) { + $sOrderBySQL = "ST_Distance(st_centroid('".$sPlaceGeom."'), l.centroid)"; + } + + $sSQL = 'SELECT distinct i.place_id'; + if ($sOrderBySQL) { + $sSQL .= ', i.order_term'; + } + $sSQL .= ' from (SELECT l.place_id'; + if ($sOrderBySQL) { + $sSQL .= ','.$sOrderBySQL.' as order_term'; + } + $sSQL .= ' from '.$sClassTable.' as l'; + + if ($sPlaceIDs) { + $sSQL .= ",placex as f WHERE "; + $sSQL .= "f.place_id in ($sPlaceIDs) "; + $sSQL .= " AND ST_DWithin(l.centroid, f.centroid, $fRange)"; + } elseif ($sPlaceGeom) { + $sSQL .= " WHERE ST_Contains('$sPlaceGeom', l.centroid)"; + } + + if ($sExcludeSQL) { + $sSQL .= ' AND l.place_id not in ('.$sExcludeSQL.')'; + } + $sSQL .= 'limit 300) i '; + if ($sOrderBySQL) { + $sSQL .= 'order by order_term asc'; + } + $sSQL .= " limit $iLimit"; + + if (CONST_Debug) var_dump($sSQL); + + $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($this->oDB->getCol($sSQL))); + } else { + if ($this->oNearPoint) { + $fRange = $this->oNearPoint->radius(); + } + + $sOrderBySQL = ''; + if ($this->oNearPoint) { + $sOrderBySQL = $this->oNearPoint->distanceSQL('l.geometry'); + } else { + $sOrderBySQL = "ST_Distance(l.geometry, f.geometry)"; + } + + $sSQL = 'SELECT distinct l.place_id'; + if ($sOrderBySQL) { + $sSQL .= ','.$sOrderBySQL.' as orderterm'; + } + $sSQL .= ' FROM placex as l, placex as f'; + $sSQL .= " WHERE f.place_id in ($sPlaceIDs)"; + $sSQL .= " AND ST_DWithin(l.geometry, f.centroid, $fRange)"; + $sSQL .= " AND l.class='".$this->sClass."'"; + $sSQL .= " AND l.type='".$this->sType."'"; + if ($sExcludeSQL) { + $sSQL .= " AND l.place_id not in (".$sExcludeSQL.")"; + } + if ($sOrderBySQL) { + $sSQL .= "ORDER BY orderterm ASC"; + } + $sSQL .= " limit $iLimit"; + + if (CONST_Debug) var_dump($sSQL); + + $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($this->oDB->getCol($sSQL))); + } + } + } + + return $aClassPlaceIDs; + } }; From d72c8633531c859ebf20ed366f1d6976853ffe0d Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 8 Oct 2017 10:06:17 +0200 Subject: [PATCH 04/19] add function to convert array to SQL --- lib/Geocode.php | 10 ++++++++-- lib/SearchDescription.php | 29 ++++++++++------------------- lib/db.php | 5 +++++ 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index f65a485a..f3cc10da 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -411,7 +411,10 @@ class Geocode //$aPlaceIDs is an array with key: placeID and value: tiger-housenumber, if found, else -1 if (sizeof($aPlaceIDs) == 0) return array(); - $sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]"; + $sLanguagePrefArraySQL = getArraySQL( + array_map("getDBQuoted", + $this->aLangPrefOrder) + ); // Get the details for display (is this a redundant extra step?) $sPlaceIDs = join(',', array_keys($aPlaceIDs)); @@ -980,7 +983,10 @@ class Geocode if (!$this->sQuery && !$this->aStructuredQuery) return array(); $sNormQuery = $this->normTerm($this->sQuery); - $sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]"; + $sLanguagePrefArraySQL = getArraySQL( + array_map("getDBQuoted", + $this->aLangPrefOrder) + ); $sCountryCodesSQL = false; if ($this->aCountryCodes) { $sCountryCodesSQL = join(',', array_map('addQuotes', $this->aCountryCodes)); diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index 99860ce0..1a994acd 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -130,20 +130,11 @@ class SearchDescription return (bool) $this->sHouseNumber; } - public function poiTable() + private function poiTable() { return 'place_classtype_'.$this->sClass.'_'.$this->sType; } - public function addressArraySQL() - { - return 'ARRAY['.join(',', $this->aAddress).']'; - } - public function nameArraySQL() - { - return 'ARRAY['.join(',', $this->aName).']'; - } - public function countryCodeSQL($sVar, $sCountryList) { if ($this->sCountryCode) { @@ -263,7 +254,7 @@ class SearchDescription $sSQL .= ', search_name s '; $sSQL .= 'WHERE s.place_id = p.parent_place_id '; $sSQL .= 'AND array_cat(s.nameaddress_vector, s.name_vector)'; - $sSQL .= ' @> '.$this->addressArraySQL().' AND '; + $sSQL .= ' @> '.getArraySQL($this->aAddress).' AND '; } else { $sSQL .= 'WHERE '; } @@ -312,7 +303,7 @@ class SearchDescription } if (sizeof($this->aName)) { - $aTerms[] = 'name_vector @> '.$this->nameArraySQL(); + $aTerms[] = 'name_vector @> '.getArraySQL($this->aName); } if (sizeof($this->aAddress)) { // For infrequent name terms disable index usage for address @@ -321,9 +312,9 @@ class SearchDescription && $aWordFrequencyScores[$this->aName[reset($this->aName)]] < CONST_Search_NameOnlySearchFrequencyThreshold ) { - $aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.$this->addressArraySQL(); + $aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.getArraySQL($this->aAddress); } else { - $aTerms[] = 'nameaddress_vector @> '.$this->addressArraySQL(); + $aTerms[] = 'nameaddress_vector @> '.getArraySQL($this->aAddress); } } @@ -381,11 +372,11 @@ class SearchDescription if (sizeof($this->aFullNameAddress)) { $sExactMatchSQL = ' ( '; - $sExactMatchSQL .= ' SELECT count(*) FROM ( '; - $sExactMatchSQL .= ' SELECT unnest(ARRAY['.join($this->aFullNameAddress, ",").']) '; - $sExactMatchSQL .= ' INTERSECT '; - $sExactMatchSQL .= ' SELECT unnest(nameaddress_vector)'; - $sExactMatchSQL .= ' ) s'; + $sExactMatchSQL .= ' SELECT count(*) FROM ( '; + $sExactMatchSQL .= ' SELECT unnest('.getArraySQL($this->aFullNameAddress).')'; + $sExactMatchSQL .= ' INTERSECT '; + $sExactMatchSQL .= ' SELECT unnest(nameaddress_vector)'; + $sExactMatchSQL .= ' ) s'; $sExactMatchSQL .= ') as exactmatch'; $aOrder[] = 'exactmatch DESC'; } else { diff --git a/lib/db.php b/lib/db.php index 145c57d0..01fc344b 100644 --- a/lib/db.php +++ b/lib/db.php @@ -23,6 +23,11 @@ function getDBQuoted($s) return "'".pg_escape_string($s)."'"; } +function getArraySQL($a) +{ + return 'ARRAY['.join(',', $a).']'; +} + function getPostgresVersion(&$oDB) { $sVersionString = $oDB->getOne('select version()'); From 16268f92ccd11f0c11fe07b2a001b197af3a5b92 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 8 Oct 2017 12:57:22 +0200 Subject: [PATCH 05/19] convert getGroupedSearches to SearchDescription class --- lib/Geocode.php | 200 +++++++++------------------------- lib/SearchDescription.php | 222 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 274 insertions(+), 148 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index f3cc10da..cce38de9 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -716,8 +716,11 @@ class Geocode foreach ($aPhrases as $iPhrase => $aPhrase) { $aNewPhraseSearches = array(); - if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase]; - else $sPhraseType = ''; + if ($bStructuredPhrases) { + $sPhraseType = $aPhraseTypes[$iPhrase]; + } else { + $sPhraseType = ''; + } foreach ($aPhrase['wordsets'] as $iWordSet => $aWordset) { // Too many permutations - too expensive @@ -730,159 +733,60 @@ class Geocode //echo "
$sToken"; $aNewWordsetSearches = array(); - foreach ($aWordsetSearches as $aCurrentSearch) { + foreach ($aWordsetSearches as $oCurrentSearch) { //echo ""; - //var_dump($aCurrentSearch); + //var_dump($oCurrentSearch); //echo ""; // If the token is valid if (isset($aValidTokens[' '.$sToken])) { - // TODO variable should go into aCurrentSearch - $bHavePostcode = false; + // Recheck if the original word shows up in the query. + $bWordInQuery = false; + if (isset($aSearchTerm['word']) && $aSearchTerm['word']) { + $bWordInQuery = $this->normTerm($aSearchTerm['word'])) !== false; + } foreach ($aValidTokens[' '.$sToken] as $aSearchTerm) { - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank']++; - if (($sPhraseType == '' || $sPhraseType == 'country') && !empty($aSearchTerm['country_code']) && $aSearchTerm['country_code'] != '0') { - if ($aSearch['sCountryCode'] === false) { - $aSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']); - // Country is almost always at the end of the string - increase score for finding it anywhere else (optimisation) - if (($iToken+1 != sizeof($aWordset) || $iPhrase+1 != sizeof($aPhrases))) { - $aSearch['iSearchRank'] += 5; - } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - // If it is at the beginning, we can be almost sure that this is the wrong order - // Increase score for all searches. - if ($iToken == 0 && $iPhrase == 0) { - $iGlobalRank++; - } - } - } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode') { - // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both - if ($aSearch['sPostcode'] === '' && - isset($aSearchTerm['word']) && $aSearchTerm['word'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) { - // If we have structured search or this is the first term, - // make the postcode the primary search element. - if (!$bHavePostcode && $aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) { - $aNewSearch = $aSearch; - $aNewSearch['sOperator'] = 'postcode'; - $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); - $aNewSearch['aName'] = array($aSearchTerm['word_id'] => $aSearchTerm['word']); - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch; - $bHavePostcode = true; - } + $aNewSearches = $oCurrentSearch->extendWithFullTerm( + $aSearchTerm, + $bWordInQuery, + isset($aValidTokens[$sToken]) + && strpos($sToken, ' ') === false, + $sPhraseType, + $iToken == 0 && $iPhrase == 0, + $iPhrase == 0, + $iToken + 1 == sizeof($aWordset) + && $iPhrase + 1 == sizeof($aPhrases), + $iGlobalRank + ); - // If we have a structured search or this is not the first term, - // add the postcode as an addendum. - if ($aSearch['sOperator'] !== 'postcode' && ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']))) { - $aSearch['sPostcode'] = $aSearchTerm['word']; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } + foreach ($aNewSearches as $oSearch) { + if ($oSearch->getRank() < $this->iMaxRank) { + $aNewWordsetSearches[] = $oSearch; } - } elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') { - if ($aSearch['sHouseNumber'] === '' && $aSearch['sOperator'] !== 'postcode') { - $aSearch['sHouseNumber'] = $sToken; - // sanity check: if the housenumber is not mainly made - // up of numbers, add a penalty - if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++; - // also must not appear in the middle of the address - if ($aSearch['aAddress'] || $aSearch['aAddressNonSearch']) $aSearch['iSearchRank'] += 1; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - /* - // Fall back to not searching for this item (better than nothing) - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank'] += 1; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - */ - } - } elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) { - // require a normalized exact match of the term - // if we have the normalizer version of the query - // available - if ($aSearch['sOperator'] === '' - && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) { - $aSearch['sClass'] = $aSearchTerm['class']; - $aSearch['sType'] = $aSearchTerm['type']; - if ($aSearchTerm['operator'] == '') { - $aSearch['sOperator'] = sizeof($aSearch['aName']) ? 'name' : 'near'; - $aSearch['iSearchRank'] += 2; - } else { - $aSearch['sOperator'] = 'near'; // near = in for the moment - } - - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { - if (sizeof($aSearch['aName'])) { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) { - $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - } else { - $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iSearchRank'] += 1000; // skip; - } - } else { - $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - //$aSearch['iNamePhrase'] = $iPhrase; - } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } } } // Look for partial matches. // Note that there is no point in adding country terms here - // because country are omitted in the address. + // because country is omitted in the address. if (isset($aValidTokens[$sToken]) && $sPhraseType != 'country') { // Allow searching for a word - but at extra cost foreach ($aValidTokens[$sToken] as $aSearchTerm) { - if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { - if ((!$bStructuredPhrases || $iPhrase > 0) && sizeof($aCurrentSearch['aName']) && strpos($sToken, ' ') === false) { - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank'] += 1; - if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency) { - $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } elseif (isset($aValidTokens[' '.$sToken])) { // revert to the token version? - $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iSearchRank'] += 1; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - foreach ($aValidTokens[' '.$sToken] as $aSearchTermToken) { - if (empty($aSearchTermToken['country_code']) - && empty($aSearchTermToken['lat']) - && empty($aSearchTermToken['class']) - ) { - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank'] += 1; - $aSearch['aAddress'][$aSearchTermToken['word_id']] = $aSearchTermToken['word_id']; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } - } else { - $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } + $aNewSearches = $oCurrentSearch->extendWithPartialTerm( + $aSearchTerm, + $bStructuredPhrases, + $iPhrase, + $aWordFrequencyScores, + isset($aValidTokens[' '.$sToken]) ? $aValidTokens[' '.$sToken] : array() + ); - if ((!$aCurrentSearch['sPostcode'] && !$aCurrentSearch['aAddress'] && !$aCurrentSearch['aAddressNonSearch']) - && (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)) { - $aSearch = $aCurrentSearch; - $aSearch['iSearchRank'] += 1; - if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1; - if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2; - if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency) { - $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - } else { - $aSearch['aNameNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - } - $aSearch['iNamePhrase'] = $iPhrase; - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + foreach ($aNewSearches as $oSearch) { + if ($oSearch->getRank() < $this->iMaxRank) { + $aNewWordsetSearches[] = $oSearch; } } + } - } else { - // Allow skipping a word - but at EXTREAM cost - //$aSearch = $aCurrentSearch; - //$aSearch['iSearchRank']+=100; - //$aNewWordsetSearches[] = $aSearch; } } // Sort and cut @@ -907,9 +811,12 @@ class Geocode // Re-group the searches by their score, junk anything over 20 as just not worth trying $aGroupedSearches = array(); foreach ($aNewPhraseSearches as $aSearch) { - if ($aSearch['iSearchRank'] < $this->iMaxRank) { - if (!isset($aGroupedSearches[$aSearch['iSearchRank']])) $aGroupedSearches[$aSearch['iSearchRank']] = array(); - $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; + $iRank = $aSearch->getRank(); + if ($iRank < $this->iMaxRank) { + if (!isset($aGroupedSearches[$iRank])) { + $aGroupedSearches[$iRank] = array(); + } + $aGroupedSearches[$iRank][] = $aSearch; } } ksort($aGroupedSearches); @@ -927,19 +834,16 @@ class Geocode // Revisit searches, drop bad searches and give penalty to unlikely combinations. $aGroupedSearches = array(); - foreach ($aSearches as $aSearch) { - if (!$aSearch['aName']) { - if ($aSearch['sHouseNumber']) { - continue; - } - } - if ($this->aCountryCodes && $aSearch['sCountryCode'] - && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) { + foreach ($aSearches as $oSearch) { + if (!$oSearch->isValidSearch()) { continue; } - $aSearch['iSearchRank'] += $iGlobalRank; - $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; + $iRank = $oSearch->addToRank($iGlobalRank); + if (!isset($aGroupedSearches[$iRank]) { + $aGroupedSearches[$iRank] = array(); + } + $aGroupedSearches[$iRank][] = $oSearch; } ksort($aGroupedSearches); diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index 1a994acd..67e03931 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -63,6 +63,12 @@ class SearchDescription return $this->iSearchRank; } + public function addToRank($iAddRank) + { + $this->iSearchRank += $iAddRank; + return $this->iSearchRank; + } + public function getPostCode() { return $this->sPostcode; @@ -179,6 +185,222 @@ class SearchDescription return $sQuery; } + public function isValidSearch(&$aCountryCodes) + { + if (!sizeof($this->aName)) { + if ($this->sHouseNumber) { + return false; + } + } + if ($aCountryCodes + && $this->sCounrtyCode + && !in_array($this->sCountryCode, $aCountryCodes) + ) { + return false; + } + + return true; + } + + /////////// Search building functions + + public function extendWithFullTerm($aSearchTerm, $bWordInQuery, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken, &$iGlobalRank) + { + $aNewSearches = array(); + + if (($sPhraseType == '' || $sPhraseType == 'country') + && !empty($aSearchTerm['country_code']) + && $aSearchTerm['country_code'] != '0' + ) { + if (!$this->sCountryCode) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->sCountryCode = $aSearchTerm['country_code']; + // Country is almost always at the end of the string + // - increase score for finding it anywhere else (optimisation) + if (!$bLastToken) { + $oSearch->iSearchRank += 5; + } + $aNewSearches[] = $oSearch; + + // If it is at the beginning, we can be almost sure that + // the terms are in the wrong order. Increase score for all searches. + if ($bFirstToken) { + $iGlobalRank++; + } + } + } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') + && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode' + ) { + // We need to try the case where the postal code is the primary element + // (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) + // so try both. + if (!$this->sPostcode && $bWordInQuery) { + // If we have structured search or this is the first term, + // make the postcode the primary search element. + if ($this->iOperator == Operator::NONE + && ($sPhraseType == 'postalcode' || $bFirstToken) + ) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->iOperator = Operator::POSTCODE; + $oSearch->aAddress = array_merge($this->aAddress, $this->aName); + $oSearch->aName = + array($aSearchTerm['word_id'] => $aSearchTerm['word']); + $aNewSearches[] = $oSearch; + } + + // If we have a structured search or this is not the first term, + // add the postcode as an addendum. + if ($this->iOperator != Operator::POSTCODE + && ($sPhraseType == 'postalcode' || sizeof($this->aName)) + ) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->sPostcode = $aSearchTerm['word']; + $aNewSearches[] = $oSearch; + } + } + } elseif (($sPhraseType == '' || $sPhraseType == 'street') + && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house' + ) { + if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->sHouseNumber = trim($aSearchTerm['word_token']); + // sanity check: if the housenumber is not mainly made + // up of numbers, add a penalty + if (preg_match_all("/[^0-9]/", $oSearch->sHouseNumber, $aMatches) > 2) { + $oSearch->iSearchRank++; + } + // also must not appear in the middle of the address + if (sizeof($this->aAddress) || sizeof($this->aAddressNonSearch)) { + $oSearch->iSearchRank++; + } + $aNewSearches[] = $oSearch; + } + } elseif ($sPhraseType == '' + && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null + ) { + // require a normalized exact match of the term + // if we have the normalizer version of the query + // available + if ($this->iOperator == Operator::NONE + && (isset($aSearchTerm['word']) && $aSearchTerm['word']) + && $bWordInQuery + ) { + $oSearch = clone this; + $oSearch->iSearchRank++; + + $iOp = Operator::NEAR; // near == in for the moment + if ($aSearchTerm['operator'] == '') { + if (sizeof($this->aName)) { + $iOp = Operator::NAME; + } + $oSearch->iSearchRank += 2; + } + + $oSearch->setPoiSearch($iOp, $aSearchTerm['class'], $aSearchTerm['type']); + $aNewWordsetSearches[] = $oSearch; + } + } elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { + $iWordID = $aSearchTerm['word_id']; + if (sizeof($this->aName)) { + if (($sPhraseType == '' || !$bFirstPhrase) + && $sPhraseType != 'country')) + && !$bHasPartial + ) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->aAddress[$iWordID] = $iWordID; + ); + $aNewSearches[] = $oSearch; + } + else { + $this->aFullNameAddress[$iWordID] = $iWordID; + } + } else { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->aName = array($iWordID => $iWordID); + $aNewSearches[] = $oSearch; + } + } + + return $aNewSearches; + } + + public function extendWithPartialTerm($aSearchTerm, $bStructuredPhrases, $iPhrase, &$aWordFrequencyScores, $aFullTokens) + { + // Only allow name terms. + if (!(isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])) { + return array(); + } + + $aNewSearches = array(); + $iWordID = $aSearchTerm['word_id']; + + if ((!$bStructuredPhrases || $iPhrase > 0) + && sizeof($this->aName) + && strpos($aSearchTerm['word_token'], ' ') === false + ) { + if ($aWordFrequencyScores[$iWordID] < CONST_Max_Word_Frequency) { + $oSearch = clone this; + $oSearch->iSearchRank++; + $oSearch->aAddress[$iWordID] = $iWordID; + $aNewSearches[] = $oSearch; + } else { + $oSearch = clone this; + $oSearch->iSearchRank++; + $oSearch->aAddressNonSearch[$iWordID] = $iWordID; + if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) { + $oSearch->iSearchRank += 2; + } + if (sizeof($aFullTokens) { + $oSearch->iSearchRank++; + } + $aNewSearches[] = $oSearch; + + // revert to the token version? + foreach ($aFullTokens as $aSearchTermToken) { + if (empty($aSearchTermToken['country_code']) + && empty($aSearchTermToken['lat']) + && empty($aSearchTermToken['class']) + ) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->aAddress[$aSearchTermToken['word_id']] = $aSearchTermToken['word_id']; + $aNewSearches[] = $oSearch; + } + } + } + } + + if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch) + && (!sizeof($this->aName) || $this->iNamePhrase == $iPhrase) + ) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + if (!sizeof($this->aName)) { + $aSearch->iSearchRank += 1; + } + if (preg_match('#^[0-9]+$#', $sSerchTerm['word_token')) { + $oSearch->iSearchRank += 2; + } + if ($aWordFrequencyScores[$iWordID] < CONST_Max_Word_Frequency) { + $oSearch->aName[$iWordID] = $iWordID; + } else { + $aSearch->aNameNonSearch[$iWordID] = $iWordID; + } + $oSearch->iNamePhrase = $iPhrase; + $aNewSearches[] = $aSearch; + } + + return $aNewSearches; + } + + /////////// Query functions + public function queryCountry(&$oDB, $sViewboxSQL) { $sSQL = 'SELECT place_id FROM placex '; From 75e35f383224fb8eab3d9a28b111499c0a670eff Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 8 Oct 2017 15:26:14 +0200 Subject: [PATCH 06/19] fix syntax errors from introduction of SearchDescription --- lib/Geocode.php | 25 ++++++------ lib/SearchDescription.php | 81 +++++++++++++++++++++++---------------- lib/lib.php | 8 ---- 3 files changed, 60 insertions(+), 54 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index cce38de9..33b02d2b 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -5,6 +5,7 @@ namespace Nominatim; require_once(CONST_BasePath.'/lib/NearPoint.php'); require_once(CONST_BasePath.'/lib/PlaceLookup.php'); require_once(CONST_BasePath.'/lib/ReverseGeocode.php'); +require_once(CONST_BasePath.'/lib/SearchDescription.php'); class Geocode { @@ -743,7 +744,7 @@ class Geocode // Recheck if the original word shows up in the query. $bWordInQuery = false; if (isset($aSearchTerm['word']) && $aSearchTerm['word']) { - $bWordInQuery = $this->normTerm($aSearchTerm['word'])) !== false; + $bWordInQuery = $this->normTerm($aSearchTerm['word']) !== false; } foreach ($aValidTokens[' '.$sToken] as $aSearchTerm) { $aNewSearches = $oCurrentSearch->extendWithFullTerm( @@ -790,13 +791,13 @@ class Geocode } } // Sort and cut - usort($aNewWordsetSearches, 'bySearchRank'); + usort($aNewWordsetSearches, array('Nominatim\SearchDescription', 'bySearchRank')); $aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50); } //var_Dump('
',sizeof($aWordsetSearches)); exit; $aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches); - usort($aNewPhraseSearches, 'bySearchRank'); + usort($aNewPhraseSearches, array('Nominatim\SearchDescription', 'bySearchRank')); $aSearchHash = array(); foreach ($aNewPhraseSearches as $iSearch => $aSearch) { @@ -835,12 +836,12 @@ class Geocode // Revisit searches, drop bad searches and give penalty to unlikely combinations. $aGroupedSearches = array(); foreach ($aSearches as $oSearch) { - if (!$oSearch->isValidSearch()) { + if (!$oSearch->isValidSearch($this->aCountryCodes)) { continue; } $iRank = $oSearch->addToRank($iGlobalRank); - if (!isset($aGroupedSearches[$iRank]) { + if (!isset($aGroupedSearches[$iRank])) { $aGroupedSearches[$iRank] = array(); } $aGroupedSearches[$iRank][] = $oSearch; @@ -983,7 +984,7 @@ class Geocode $oNewSearch->setPoiSearch( Operator::TYPE, $aSearchTerm['class'], - $aSearchTerm['type'], + $aSearchTerm['type'] ); $aNewSearches[] = $oNewSearch; } @@ -1077,8 +1078,8 @@ class Geocode foreach ($aTokens as $sToken) { // Unknown single word token with a number - assume it is a house number - if (!isset($aValidTokens[' '.$sToken]) && strpos($sToken, ' ') === false && preg_match('/[0-9]/', $sToken)) { - $aValidTokens[' '.$sToken] = array(array('class' => 'place', 'type' => 'house')); + if (!isset($aValidTokens[' '.$sToken]) && strpos($sToken, ' ') === false && preg_match('/^[0-9]+$/', $sToken)) { + $aValidTokens[' '.$sToken] = array(array('class' => 'place', 'type' => 'house', 'word_token' => ' '.$sToken)); } } @@ -1173,7 +1174,7 @@ class Geocode $this->oDB, $sCountryCodesSQL, $bBoundingBoxSearch ? $this->sViewboxSmallSQL : '', - $sViewboxCentreSQL, + $this->sViewboxCentreSQL, $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '', $this->iLimit ); @@ -1210,13 +1211,13 @@ class Geocode $aResult = $oSearch->queryHouseNumber( $this->oDB, $aPlaceIDs, - $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '' + $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '', $this->iLimit ); if (sizeof($aResult)) { $searchedHousenumber = $aResult['iHouseNumber']; - $aPlaceIDs = $aResults['aPlaceIDs']; + $aPlaceIDs = $aResult['aPlaceIDs']; } elseif (!$oSearch->looksLikeFullAddress()) { $aPlaceIDs = array(); } @@ -1227,7 +1228,7 @@ class Geocode $aPlaceIDs = $oSearch->queryPoiByOperator( $this->oDB, $aPlaceIDs, - $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '' + $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '', $this->iLimit ); } diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index 67e03931..d18e7eab 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -5,7 +5,7 @@ namespace Nominatim; /** * Operators describing special searches. */ -abstract final class Operator +abstract class Operator { /// No operator selected. const NONE = 0; @@ -103,7 +103,7 @@ class SearchDescription public function isCountrySearch() { return $this->sCountryCode && sizeof($this->aName) == 0 - && !$this->iOperator && !$this->oNear; + && !$this->iOperator && !$this->oNearPoint; } /** @@ -111,7 +111,7 @@ class SearchDescription */ public function isNearSearch() { - return (bool) $this->oNear; + return (bool) $this->oNearPoint; } public function isPoiSearch() @@ -147,7 +147,7 @@ class SearchDescription return $sVar.' = \''.$this->sCountryCode."'"; } if ($sCountryList) { - return $sVar.' in ('.$this->sCountryCode.')'; + return $sVar.' in ('.$sCountryList.')'; } return ''; @@ -193,7 +193,7 @@ class SearchDescription } } if ($aCountryCodes - && $this->sCounrtyCode + && $this->sCountryCode && !in_array($this->sCountryCode, $aCountryCodes) ) { return false; @@ -289,7 +289,7 @@ class SearchDescription && (isset($aSearchTerm['word']) && $aSearchTerm['word']) && $bWordInQuery ) { - $oSearch = clone this; + $oSearch = clone $this; $oSearch->iSearchRank++; $iOp = Operator::NEAR; // near == in for the moment @@ -307,13 +307,12 @@ class SearchDescription $iWordID = $aSearchTerm['word_id']; if (sizeof($this->aName)) { if (($sPhraseType == '' || !$bFirstPhrase) - && $sPhraseType != 'country')) + && $sPhraseType != 'country' && !$bHasPartial ) { $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->aAddress[$iWordID] = $iWordID; - ); $aNewSearches[] = $oSearch; } else { @@ -345,18 +344,18 @@ class SearchDescription && strpos($aSearchTerm['word_token'], ' ') === false ) { if ($aWordFrequencyScores[$iWordID] < CONST_Max_Word_Frequency) { - $oSearch = clone this; + $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->aAddress[$iWordID] = $iWordID; $aNewSearches[] = $oSearch; } else { - $oSearch = clone this; + $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->aAddressNonSearch[$iWordID] = $iWordID; if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) { $oSearch->iSearchRank += 2; } - if (sizeof($aFullTokens) { + if (sizeof($aFullTokens)) { $oSearch->iSearchRank++; } $aNewSearches[] = $oSearch; @@ -382,18 +381,18 @@ class SearchDescription $oSearch = clone $this; $oSearch->iSearchRank++; if (!sizeof($this->aName)) { - $aSearch->iSearchRank += 1; + $oSearch->iSearchRank += 1; } - if (preg_match('#^[0-9]+$#', $sSerchTerm['word_token')) { + if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) { $oSearch->iSearchRank += 2; } if ($aWordFrequencyScores[$iWordID] < CONST_Max_Word_Frequency) { $oSearch->aName[$iWordID] = $iWordID; } else { - $aSearch->aNameNonSearch[$iWordID] = $iWordID; + $oSearch->aNameNonSearch[$iWordID] = $iWordID; } $oSearch->iNamePhrase = $iPhrase; - $aNewSearches[] = $aSearch; + $aNewSearches[] = $oSearch; } return $aNewSearches; @@ -406,7 +405,7 @@ class SearchDescription $sSQL = 'SELECT place_id FROM placex '; $sSQL .= "WHERE country_code='".$this->sCountryCode."'"; $sSQL .= ' AND rank_search = 4'; - if ($ViewboxSQL) { + if ($sViewboxSQL) { $sSQL .= " AND ST_Intersects($sViewboxSQL, geometry)"; } $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1"; @@ -448,7 +447,7 @@ class SearchDescription } $sSQL .= " limit $iLimit"; if (CONST_Debug) var_dump($sSQL); - return chksql($this->oDB->getCol($sSQL)); + return chksql($oDB->getCol($sSQL)); } if ($this->oNearPoint) { @@ -462,7 +461,7 @@ class SearchDescription $sSQL .= ' ORDER BY '.$this->oNearPoint->distanceSQL('centroid')." ASC"; $sSQL .= " LIMIT $iLimit"; if (CONST_Debug) var_dump($sSQL); - return chksql($this->oDB->getCol($sSQL)); + return chksql($oDB->getCol($sSQL)); } return array(); @@ -490,7 +489,7 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - return chksql($this->oDB->getCol($sSQL)); + return chksql($oDB->getCol($sSQL)); } public function queryNamedPlace(&$oDB, $aWordFrequencyScores, $sCountryList, $iMinAddressRank, $iMaxAddressRank, $sExcludeSQL, $sViewboxSmall, $sViewboxLarge, $iLimit) @@ -540,7 +539,7 @@ class SearchDescription } } - $sCountryTerm = $this->countryCodeSQL('p.country_code', $sCountryList); + $sCountryTerm = $this->countryCodeSQL('country_code', $sCountryList); if ($sCountryTerm) { $aTerms[] = $sCountryTerm; } @@ -568,7 +567,7 @@ class SearchDescription } if ($sExcludeSQL) { - $aTerms = 'place_id not in ('.$sExcludeSQL.')'; + $aTerms[] = 'place_id not in ('.$sExcludeSQL.')'; } if ($sViewboxSmall) { @@ -619,7 +618,7 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); return chksql( - $this->oDB->getAll($sSQL), + $oDB->getAll($sSQL), "Could not get places for search terms." ); } @@ -643,7 +642,7 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); + $aPlaceIDs = chksql($oDB->getCol($sSQL)); if (sizeof($aPlaceIDs)) { return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => -1); @@ -668,14 +667,14 @@ class SearchDescription $sSQL .= $iHousenumber.">=startnumber and "; $sSQL .= $iHousenumber."<=endnumber"; - if ($sExcludeSQL)) { + if ($sExcludeSQL) { $sSQL .= ' AND place_id not in ('.$sExcludeSQL.')'; } $sSQL .= " limit $iLimit"; if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL, 0)); + $aPlaceIDs = chksql($oDB->getCol($sSQL, 0)); if (sizeof($aPlaceIDs)) { return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => $iHousenumber); @@ -694,7 +693,7 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); + $aPlaceIDs = chksql($oDB->getCol($sSQL)); if (sizeof($aPlaceIDs)) { return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => -1); @@ -721,7 +720,7 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL, 0)); + $aPlaceIDs = chksql($oDB->getCol($sSQL, 0)); if (sizeof($aPlaceIDs)) { return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => $iHousenumber); @@ -750,18 +749,18 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aClassPlaceIDs = chksql($this->oDB->getCol($sSQL)); + $aClassPlaceIDs = chksql($oDB->getCol($sSQL)); } // NEAR and IN are handled the same if ($this->iOperator == Operator::TYPE || $this->iOperator == Operator::NEAR) { $sClassTable = $this->poiTable(); $sSQL = "SELECT count(*) FROM pg_tables WHERE tablename = '$sClassTable'"; - $bCacheTable = (bool) chksql($this->oDB->getOne($sSQL)); + $bCacheTable = (bool) chksql($oDB->getOne($sSQL)); $sSQL = "SELECT min(rank_search) FROM placex WHERE place_id in ($sPlaceIDs)"; if (CONST_Debug) var_dump($sSQL); - $iMaxRank = (int)chksql($this->oDB->getOne($sSQL)); + $iMaxRank = (int)chksql($oDB->getOne($sSQL)); // For state / country level searches the normal radius search doesn't work very well $sPlaceGeom = false; @@ -774,7 +773,7 @@ class SearchDescription $sSQL .= " ORDER BY rank_search ASC "; $sSQL .= " LIMIT 1"; if (CONST_Debug) var_dump($sSQL); - $sPlaceGeom = chksql($this->oDB->getOne($sSQL)); + $sPlaceGeom = chksql($oDB->getOne($sSQL)); } if ($sPlaceGeom) { @@ -784,7 +783,7 @@ class SearchDescription $sSQL = 'SELECT place_id FROM placex'; $sSQL .= " WHERE place_id in ($sPlaceIDs) and rank_search < $iMaxRank"; if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); + $aPlaceIDs = chksql($oDB->getCol($sSQL)); $sPlaceIDs = join(',', $aPlaceIDs); } @@ -832,7 +831,7 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($this->oDB->getCol($sSQL))); + $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($oDB->getCol($sSQL))); } else { if ($this->oNearPoint) { $fRange = $this->oNearPoint->radius(); @@ -864,11 +863,25 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($this->oDB->getCol($sSQL))); + $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($oDB->getCol($sSQL))); } } } return $aClassPlaceIDs; } + + + /////////// Sort functions + + static function bySearchRank($a, $b) + { + if ($a->iSearchRank == $b->iSearchRank) { + return $a->iOperator + strlen($a->sHouseNumber) + - $b->iOperator - strlen($b->sHouseNumber); + } + + return $a->iSearchRank < $b->iSearchRank ? -1 : 1; + } + }; diff --git a/lib/lib.php b/lib/lib.php index 48ed0ffb..f92985ba 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -51,14 +51,6 @@ function getDatabaseDate(&$oDB) } -function bySearchRank($a, $b) -{ - if ($a['iSearchRank'] == $b['iSearchRank']) - return strlen($a['sOperator']) + strlen($a['sHouseNumber']) - strlen($b['sOperator']) - strlen($b['sHouseNumber']); - return ($a['iSearchRank'] < $b['iSearchRank']?-1:1); -} - - function byImportance($a, $b) { if ($a['importance'] != $b['importance']) From fd08d419629c51cb2c7b1e90814d596819d20640 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 8 Oct 2017 16:03:30 +0200 Subject: [PATCH 07/19] move Search dump function into SearchDescription class --- lib/SearchDescription.php | 50 ++++++++++++++++++++++++++++++++++ lib/lib.php | 57 +++------------------------------------ 2 files changed, 53 insertions(+), 54 deletions(-) diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index d18e7eab..a8285299 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -19,6 +19,27 @@ abstract class Operator const NAME = 4; /// Search for postcodes. const POSTCODE = 5; + + private $aConstantNames = null; + + public static function toString($iOperator) + { + if ($iOperator == Operator::NONE) { + return ''; + } + + if ($aConstantNames === null) { + $oReflector = new \ReflectionClass ('Nominatim\Operator'); + $aConstants = $oReflector->getConstants(); + + $aConstantNames = array(); + foreach ($aConstants as $sName => $iValue) { + $aConstantNames[$iValue] = $sName; + } + } + + return $aConstantNames[$iOperator]; + } } /** @@ -884,4 +905,33 @@ class SearchDescription return $a->iSearchRank < $b->iSearchRank ? -1 : 1; } + //////////// Debugging functions + + function dumpAsHtmlTableRow(&$aWordIDs) + { + $kf = function($k) use (&$aWordIDs) { return $aWordIDs[$k]; }; + + echo ""; + echo "$this->iSearchRank"; + echo "".join(', ', array_map($kf, $this->aName)).""; + echo "".join(', ', array_map($kf, $this->aNameNonSearch)).""; + echo "".join(', ', array_map($kf, $this->aAddress)).""; + echo "".join(', ', array_map($kf, $this->aAddressNonSearch)).""; + echo "".$this->sCountryCode.""; + echo "".Operator::toString($this->iOperator).""; + echo "".$this->sClass.""; + echo "".$this->sType.""; + echo "".$this->sPostcode.""; + echo "".$this->sHouseNumber.""; + + if ($this->oNearPoint) { + echo "".$this->oNearPoint->lat().""; + echo "".$this->oNearPoint->lon().""; + echo "".$this->oNearPoint->radius().""; + } else { + echo ""; + } + + echo ""; + } }; diff --git a/lib/lib.php b/lib/lib.php index f92985ba..969d58a3 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -481,7 +481,8 @@ function _debugDumpGroupedSearches($aData, $aTokens) foreach ($aTokens as $sToken => $aWords) { if ($aWords) { foreach ($aWords as $aToken) { - $aWordsIDs[$aToken['word_id']] = $sToken.'('.$aToken['word_id'].')'; + $aWordsIDs[$aToken['word_id']] = + '#'.$sToken.'('.$aToken['word_id'].')#'; } } } @@ -493,59 +494,7 @@ function _debugDumpGroupedSearches($aData, $aTokens) echo "LatLonRadius"; foreach ($aData as $iRank => $aRankedSet) { foreach ($aRankedSet as $aRow) { - echo ""; - echo "$iRank"; - - echo ""; - $sSep = ''; - foreach ($aRow['aName'] as $iWordID) { - echo $sSep.'#'.$aWordsIDs[$iWordID].'#'; - $sSep = ', '; - } - echo ""; - - echo ""; - $sSep = ''; - foreach ($aRow['aNameNonSearch'] as $iWordID) { - echo $sSep.'#'.$aWordsIDs[$iWordID].'#'; - $sSep = ', '; - } - echo ""; - - echo ""; - $sSep = ''; - foreach ($aRow['aAddress'] as $iWordID) { - echo $sSep.'#'.$aWordsIDs[$iWordID].'#'; - $sSep = ', '; - } - echo ""; - - echo ""; - $sSep = ''; - foreach ($aRow['aAddressNonSearch'] as $iWordID) { - echo $sSep.'#'.$aWordsIDs[$iWordID].'#'; - $sSep = ', '; - } - echo ""; - - echo "".$aRow['sCountryCode'].""; - - echo "".$aRow['sOperator'].""; - echo "".$aRow['sClass'].""; - echo "".$aRow['sType'].""; - - echo "".$aRow['sPostcode'].""; - echo "".$aRow['sHouseNumber'].""; - - if ($aRow['oNear']) { - echo "".$aRow['oNear']->lat().""; - echo "".$aRow['oNear']->lon().""; - echo "".$aRow['oNear']->radius().""; - } else { - echo ""; - } - - echo ""; + $aRow->dumpAsHtmlTableRow($aWordsIDs); } } echo ""; From 795153b213121c6ec8cc725451d4f20c307b4174 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 8 Oct 2017 16:42:04 +0200 Subject: [PATCH 08/19] fix more syntax issues --- lib/Geocode.php | 15 +++++++++------ lib/SearchDescription.php | 16 ++++++++-------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index 33b02d2b..2f265111 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -741,12 +741,15 @@ class Geocode // If the token is valid if (isset($aValidTokens[' '.$sToken])) { - // Recheck if the original word shows up in the query. - $bWordInQuery = false; - if (isset($aSearchTerm['word']) && $aSearchTerm['word']) { - $bWordInQuery = $this->normTerm($aSearchTerm['word']) !== false; - } foreach ($aValidTokens[' '.$sToken] as $aSearchTerm) { + // Recheck if the original word shows up in the query. + $bWordInQuery = false; + if (isset($aSearchTerm['word']) && $aSearchTerm['word']) { + $bWordInQuery = strpos( + $sNormQuery, + $this->normTerm($aSearchTerm['word']) + ) !== false; + } $aNewSearches = $oCurrentSearch->extendWithFullTerm( $aSearchTerm, $bWordInQuery, @@ -1180,7 +1183,7 @@ class Geocode ); } elseif ($oSearch->isOperator(Operator::POSTCODE)) { $aPlaceIDs = $oSearch->queryPostcode( - $oDB, + $this->oDB, $sCountryCodesSQL, $this->iLimit ); diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index a8285299..b7b4498d 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -20,7 +20,7 @@ abstract class Operator /// Search for postcodes. const POSTCODE = 5; - private $aConstantNames = null; + private static $aConstantNames = null; public static function toString($iOperator) { @@ -28,17 +28,17 @@ abstract class Operator return ''; } - if ($aConstantNames === null) { + if (Operator::$aConstantNames === null) { $oReflector = new \ReflectionClass ('Nominatim\Operator'); $aConstants = $oReflector->getConstants(); - $aConstantNames = array(); + Operator::$aConstantNames = array(); foreach ($aConstants as $sName => $iValue) { - $aConstantNames[$iValue] = $sName; + Operator::$aConstantNames[$iValue] = $sName; } } - return $aConstantNames[$iOperator]; + return Operator::$aConstantNames[$iOperator]; } } @@ -322,7 +322,7 @@ class SearchDescription } $oSearch->setPoiSearch($iOp, $aSearchTerm['class'], $aSearchTerm['type']); - $aNewWordsetSearches[] = $oSearch; + $aNewSearches[] = $oSearch; } } elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { $iWordID = $aSearchTerm['word_id']; @@ -501,10 +501,10 @@ class SearchDescription $sSQL .= 'WHERE '; } - $sSQL .= "p.postcode = '".pg_escape_string(reset($this->$aName))."'"; + $sSQL .= "p.postcode = '".pg_escape_string(reset($this->aName))."'"; $sCountryTerm = $this->countryCodeSQL('p.country_code', $sCountryList); if ($sCountryTerm) { - $sSQL .= ' AND '.$sCountyTerm; + $sSQL .= ' AND '.$sCountryTerm; } $sSQL .= " LIMIT $iLimit"; From 8e0ffde3e0a403d289b91ad720444fd94f3ae210 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 8 Oct 2017 17:00:59 +0200 Subject: [PATCH 09/19] fix CodeSniffer violations --- lib/Geocode.php | 9 ++--- lib/SearchDescription.php | 85 ++++++++------------------------------- phpcs.xml | 4 +- utils/setup.php | 1 - utils/update.php | 18 ++++++--- 5 files changed, 34 insertions(+), 83 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index 2f265111..58bc3bfc 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -368,7 +368,7 @@ class Geocode $this->aAddressRankList = array(); $this->aStructuredQuery = array(); - $this->sAllowedTypesSQLList = False; + $this->sAllowedTypesSQLList = false; $this->loadStructuredAddressElement($sAmenity, 'amenity', 26, 30, false); $this->loadStructuredAddressElement($sStreet, 'street', 26, 30, false); @@ -413,8 +413,7 @@ class Geocode if (sizeof($aPlaceIDs) == 0) return array(); $sLanguagePrefArraySQL = getArraySQL( - array_map("getDBQuoted", - $this->aLangPrefOrder) + array_map("getDBQuoted", $this->aLangPrefOrder) ); // Get the details for display (is this a redundant extra step?) @@ -789,7 +788,6 @@ class Geocode $aNewWordsetSearches[] = $oSearch; } } - } } } @@ -892,8 +890,7 @@ class Geocode $sNormQuery = $this->normTerm($this->sQuery); $sLanguagePrefArraySQL = getArraySQL( - array_map("getDBQuoted", - $this->aLangPrefOrder) + array_map("getDBQuoted", $this->aLangPrefOrder) ); $sCountryCodesSQL = false; if ($this->aCountryCodes) { diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index b7b4498d..d84c8bf8 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -1,47 +1,9 @@ getConstants(); - - Operator::$aConstantNames = array(); - foreach ($aConstants as $sName => $iValue) { - Operator::$aConstantNames[$iValue] = $sName; - } - } - - return Operator::$aConstantNames[$iOperator]; - } -} - /** * Description of a single interpretation of a search query. */ @@ -79,6 +41,7 @@ class SearchDescription /// Index of phrase currently processed private $iNamePhrase = -1; + public function getRank() { return $this->iSearchRank; @@ -95,9 +58,6 @@ class SearchDescription return $this->sPostcode; } - /** - * Set the geographic search radius. - */ public function setNear(&$oNearPoint) { $this->oNearPoint = $oNearPoint; @@ -110,26 +70,17 @@ class SearchDescription $this->sType = $sType; } - /** - * Check if name or address for the search are specified. - */ public function isNamedSearch() { return sizeof($this->aName) > 0 || sizeof($this->aAddress) > 0; } - /** - * Check if only a country is requested. - */ public function isCountrySearch() { return $this->sCountryCode && sizeof($this->aName) == 0 && !$this->iOperator && !$this->oNearPoint; } - /** - * Check if a search near a geographic location is requested. - */ public function isNearSearch() { return (bool) $this->oNearPoint; @@ -179,13 +130,6 @@ class SearchDescription return $this->iOperator != Operator::NONE; } - /** - * Extract special terms from the query, amend the search - * and return the shortended query. - * - * Only the first special term found will be used but all will - * be removed from the query. - */ public function extractKeyValuePairs($sQuery) { // Search for terms of kind [=]. @@ -225,6 +169,7 @@ class SearchDescription /////////// Search building functions + public function extendWithFullTerm($aSearchTerm, $bWordInQuery, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken, &$iGlobalRank) { $aNewSearches = array(); @@ -335,8 +280,7 @@ class SearchDescription $oSearch->iSearchRank++; $oSearch->aAddress[$iWordID] = $iWordID; $aNewSearches[] = $oSearch; - } - else { + } else { $this->aFullNameAddress[$iWordID] = $iWordID; } } else { @@ -393,7 +337,7 @@ class SearchDescription $aNewSearches[] = $oSearch; } } - } + } } if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch) @@ -421,6 +365,7 @@ class SearchDescription /////////// Query functions + public function queryCountry(&$oDB, $sViewboxSQL) { $sSQL = 'SELECT place_id FROM placex '; @@ -490,7 +435,7 @@ class SearchDescription public function queryPostcode(&$oDB, $sCountryList, $iLimit) { - $sSQL = 'SELECT p.place_id FROM location_postcode p '; + $sSQL = 'SELECT p.place_id FROM location_postcode p '; if (sizeof($this->aAddress)) { $sSQL .= ', search_name s '; @@ -592,7 +537,7 @@ class SearchDescription } if ($sViewboxSmall) { - $aTerms[] = 'centroid && '.$sViewboxSmall; + $aTerms[] = 'centroid && '.$sViewboxSmall; } if ($this->oNearPoint) { @@ -895,7 +840,8 @@ class SearchDescription /////////// Sort functions - static function bySearchRank($a, $b) + + public static function bySearchRank($a, $b) { if ($a->iSearchRank == $b->iSearchRank) { return $a->iOperator + strlen($a->sHouseNumber) @@ -907,9 +853,12 @@ class SearchDescription //////////// Debugging functions - function dumpAsHtmlTableRow(&$aWordIDs) + + public function dumpAsHtmlTableRow(&$aWordIDs) { - $kf = function($k) use (&$aWordIDs) { return $aWordIDs[$k]; }; + $kf = function ($k) use (&$aWordIDs) { + return $aWordIDs[$k]; + }; echo ""; echo "$this->iSearchRank"; @@ -934,4 +883,4 @@ class SearchDescription echo ""; } -}; +} diff --git a/phpcs.xml b/phpcs.xml index 9f705d83..2f4bc118 100644 --- a/phpcs.xml +++ b/phpcs.xml @@ -10,8 +10,8 @@ - - + + diff --git a/utils/setup.php b/utils/setup.php index 86630196..89177914 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -540,7 +540,6 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { $sSQL .= " FROM us_postcode WHERE postcode NOT IN"; $sSQL .= " (SELECT postcode FROM location_postcode"; $sSQL .= " WHERE country_code = 'us')"; - } else { $sSQL .= "TRUNCATE TABLE us_postcode"; } diff --git a/utils/update.php b/utils/update.php index ca700f0b..d729519a 100755 --- a/utils/update.php +++ b/utils/update.php @@ -77,8 +77,7 @@ if ($aResult['init-updates']) { if ($sDatabaseDate === false) { fail("Cannot determine date of database."); } - $sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', - strtotime($sDatabaseDate) - (3*60*60)); + $sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($sDatabaseDate) - (3*60*60)); // get the appropriate state id $aOutput = 0; @@ -288,7 +287,7 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) { if ($iResult == 3) { echo 'No new updates. Sleeping for '.CONST_Replication_Recheck_Interval." sec.\n"; sleep(CONST_Replication_Recheck_Interval); - } else if ($iResult != 0) { + } elseif ($iResult != 0) { echo 'ERROR: updates failed.'; exit($iResult); } else { @@ -325,7 +324,11 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) { // write the update logs $iFileSize = filesize($sImportFile); - $sSQL = "INSERT INTO import_osmosis_log (batchend, batchseq, batchsize, starttime, endtime, event) values ('$sBatchEnd',$iEndSequence,$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','import')"; + $sSQL = 'INSERT INTO import_osmosis_log'; + $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)'; + $sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'"; + $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','"; + $sSQL .= date('Y-m-d H:i:s')."','import')"; var_Dump($sSQL); chksql($oDB->query($sSQL)); @@ -348,7 +351,11 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) { exit($iErrorLevel); } - $sSQL = "INSERT INTO import_osmosis_log (batchend, batchseq, batchsize, starttime, endtime, event) values ('$sBatchEnd',$iEndSequence,$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','index')"; + $sSQL = 'INSERT INTO import_osmosis_log'; + $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)'; + $sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'"; + $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','"; + $sSQL .= date('Y-m-d H:i:s')."','index')"; var_Dump($sSQL); $oDB->query($sSQL); echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n"; @@ -362,4 +369,3 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) { if (!$aResult['import-osmosis-all']) exit(0); } } - From 4bff2814a9527e4d4f7645e9eedadf6bfeba698e Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 8 Oct 2017 17:13:41 +0200 Subject: [PATCH 10/19] add missing include --- lib/SearchDescription.php | 4 ++-- lib/SpecialSearchOperator.php | 44 +++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 lib/SpecialSearchOperator.php diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index d84c8bf8..533c0ab4 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -1,9 +1,9 @@ getConstants(); + + Operator::$aConstantNames = array(); + foreach ($aConstants as $sName => $iValue) { + Operator::$aConstantNames[$iValue] = $sName; + } + } + + return Operator::$aConstantNames[$iOperator]; + } +} From 614a6ab861fc991da390cdeca137e96ee4dcde2c Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 8 Oct 2017 17:36:38 +0200 Subject: [PATCH 11/19] don't trust words from word table to be sanatized --- lib/SearchDescription.php | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index 533c0ab4..42e5af30 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -201,7 +201,9 @@ class SearchDescription // We need to try the case where the postal code is the primary element // (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) // so try both. - if (!$this->sPostcode && $bWordInQuery) { + if (!$this->sPostcode && $bWordInQuery + && pg_escape_string($aSearchTerm['word']) == $aSearchTerm['word'] + ) { // If we have structured search or this is the first term, // make the postcode the primary search element. if ($this->iOperator == Operator::NONE @@ -239,6 +241,9 @@ class SearchDescription if (preg_match_all("/[^0-9]/", $oSearch->sHouseNumber, $aMatches) > 2) { $oSearch->iSearchRank++; } + if (!isset($aSearchTerm['word_id'])) { + $oSearch->iSearchRank++; + } // also must not appear in the middle of the address if (sizeof($this->aAddress) || sizeof($this->aAddressNonSearch)) { $oSearch->iSearchRank++; @@ -446,7 +451,7 @@ class SearchDescription $sSQL .= 'WHERE '; } - $sSQL .= "p.postcode = '".pg_escape_string(reset($this->aName))."'"; + $sSQL .= "p.postcode = '".reset($this->aName)."'"; $sCountryTerm = $this->countryCodeSQL('p.country_code', $sCountryList); if ($sCountryTerm) { $sSQL .= ' AND '.$sCountryTerm; From 30511fd3ab9d731b0afec202386fc2425a0b1b9f Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 8 Oct 2017 21:23:31 +0200 Subject: [PATCH 12/19] replace NearPoint with a more generic context object The NearPoint is actually common to all SearchDescriptions and there is other context data as well. like viewbox, that needs to be available to the search object but is common. --- lib/Geocode.php | 24 ++---- lib/NearPoint.php | 158 -------------------------------------- lib/ReverseGeocode.php | 11 ++- lib/SearchContext.php | 73 ++++++++++++++++++ lib/SearchDescription.php | 68 +++++++--------- lib/lib.php | 80 ++++++++++++++++++- 6 files changed, 194 insertions(+), 220 deletions(-) delete mode 100644 lib/NearPoint.php create mode 100644 lib/SearchContext.php diff --git a/lib/Geocode.php b/lib/Geocode.php index 58bc3bfc..a6baa96b 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -2,10 +2,10 @@ namespace Nominatim; -require_once(CONST_BasePath.'/lib/NearPoint.php'); require_once(CONST_BasePath.'/lib/PlaceLookup.php'); require_once(CONST_BasePath.'/lib/ReverseGeocode.php'); require_once(CONST_BasePath.'/lib/SearchDescription.php'); +require_once(CONST_BasePath.'/lib/SearchContext.php'); class Geocode { @@ -888,6 +888,8 @@ class Geocode { if (!$this->sQuery && !$this->aStructuredQuery) return array(); + $oCtx = new SearchContext(); + $sNormQuery = $this->normTerm($this->sQuery); $sLanguagePrefArraySQL = getArraySQL( array_map("getDBQuoted", $this->aLangPrefOrder) @@ -926,20 +928,12 @@ class Geocode } // Do we have anything that looks like a lat/lon pair? - $oNearPoint = false; - if ($aLooksLike = NearPoint::extractFromQuery($sQuery)) { - $oNearPoint = $aLooksLike['pt']; - $sQuery = $aLooksLike['query']; - } + $sQuery = $oCtx->setNearPointFromQuery($sQuery); $aSearchResults = array(); if ($sQuery || $this->aStructuredQuery) { // Start with a single blank search - $aSearches = array(new SearchDescription()); - - if ($oNearPoint) { - $aSearches[0]->setNear($oNearPoint); - } + $aSearches = array(new SearchDescription($oCtx)); if ($sQuery) { $sQuery = $aSearches[0]->extractKeyValuePairs($sQuery); @@ -1166,7 +1160,7 @@ class Geocode } } elseif (!$oSearch->isNamedSearch()) { // looking for a POI in a geographic area - if (!$bBoundingBoxSearch && !$oSearch->isNearSearch()) { + if (!$bBoundingBoxSearch && !$oCtx->hasNearPoint()) { continue; } @@ -1319,11 +1313,7 @@ class Geocode $oReverse = new ReverseGeocode($this->oDB); $oReverse->setZoom(18); - $aLookup = $oReverse->lookup( - $oNearPoint->lat(), - $oNearPoint->lon(), - false - ); + $aLookup = $oReverse->lookupPoint($oCtx->sqlNear, false); if (CONST_Debug) var_dump("Reverse search", $aLookup); diff --git a/lib/NearPoint.php b/lib/NearPoint.php deleted file mode 100644 index 30845b76..00000000 --- a/lib/NearPoint.php +++ /dev/null @@ -1,158 +0,0 @@ -fLat = (float)$lat; - $this->fLon = (float)$lon; - $this->fRadius = (float)$radius; - $this->sSQL = 'ST_SetSRID(ST_Point('.$this->fLon.','.$this->fLat.'),4326)'; - } - - public function lat() - { - return $this->fLat; - } - - public function lon() - { - return $this->fLon; - } - - public function radius() - { - return $this->fRadius; - } - - public function distanceSQL($sObj) - { - return 'ST_Distance('.$this->sSQL.", $sObj)"; - } - - public function withinSQL($sObj) - { - return sprintf('ST_DWithin(%s, %s, %F)', $sObj, $this->sSQL, $this->fRadius); - } - - /** - * Check that the coordinates are valid WSG84 coordinates. - * - * @return bool True if the coordinates are correctly bounded. - */ - public function isValid() - { - return ($this->fLat <= 90.1 - && $this->fLat >= -90.1 - && $this->fLon <= 180.1 - && $this->fLon >= -180.1); - } - - /** - * Extract a coordinate point from a query string. - * - * If a coordinate is found an array of a new NearPoint and the - * remaining query is returned or false otherwise. - * - * @param string $sQuery Query to scan. - * - * @return array|false If a coordinate was found, an array with - * `pt` as the NearPoint coordinates and `query` - * with the remaining query string. False otherwiese. - */ - public static function extractFromQuery($sQuery) - { - // Do we have anything that looks like a lat/lon pair? - // returns array(lat,lon,query_with_lat_lon_removed) - // or null - $sFound = null; - $fQueryLat = null; - $fQueryLon = null; - - if (preg_match('/\\s*([NS])[ ]+([0-9]+[0-9.]*)[° ]+([0-9.]+)?[′\']*[, ]+([EW])[ ]+([0-9]+)[° ]+([0-9]+[0-9.]*)[′\']*\\s*/', $sQuery, $aData)) { - /* 1 2 3 4 5 6 - * degrees decimal minutes - * N 40 26.767, W 79 58.933 - * N 40°26.767′, W 79°58.933′ - */ - $sFound = $aData[0]; - $fQueryLat = ($aData[1]=='N'?1:-1) * ($aData[2] + $aData[3]/60); - $fQueryLon = ($aData[4]=='E'?1:-1) * ($aData[5] + $aData[6]/60); - } elseif (preg_match('/\\s*([0-9]+)[° ]+([0-9]+[0-9.]*)?[′\']*[ ]+([NS])[, ]+([0-9]+)[° ]+([0-9]+[0-9.]*)?[′\' ]+([EW])\\s*/', $sQuery, $aData)) { - /* 1 2 3 4 5 6 - * degrees decimal minutes - * 40 26.767 N, 79 58.933 W - * 40° 26.767′ N 79° 58.933′ W - */ - $sFound = $aData[0]; - $fQueryLat = ($aData[3]=='N'?1:-1) * ($aData[1] + $aData[2]/60); - $fQueryLon = ($aData[6]=='E'?1:-1) * ($aData[4] + $aData[5]/60); - } elseif (preg_match('/\\s*([NS])[ ]([0-9]+)[° ]+([0-9]+)[′\' ]+([0-9]+)[″"]*[, ]+([EW])[ ]([0-9]+)[° ]+([0-9]+)[′\' ]+([0-9]+)[″"]*\\s*/', $sQuery, $aData)) { - /* 1 2 3 4 5 6 7 8 - * degrees decimal seconds - * N 40 26 46 W 79 58 56 - * N 40° 26′ 46″, W 79° 58′ 56″ - */ - $sFound = $aData[0]; - $fQueryLat = ($aData[1]=='N'?1:-1) * ($aData[2] + $aData[3]/60 + $aData[4]/3600); - $fQueryLon = ($aData[5]=='E'?1:-1) * ($aData[6] + $aData[7]/60 + $aData[8]/3600); - } elseif (preg_match('/\\s*([0-9]+)[° ]+([0-9]+)[′\' ]+([0-9]+)[″" ]+([NS])[, ]+([0-9]+)[° ]+([0-9]+)[′\' ]+([0-9]+)[″" ]+([EW])\\s*/', $sQuery, $aData)) { - /* 1 2 3 4 5 6 7 8 - * degrees decimal seconds - * 40 26 46 N 79 58 56 W - * 40° 26′ 46″ N, 79° 58′ 56″ W - */ - $sFound = $aData[0]; - $fQueryLat = ($aData[4]=='N'?1:-1) * ($aData[1] + $aData[2]/60 + $aData[3]/3600); - $fQueryLon = ($aData[8]=='E'?1:-1) * ($aData[5] + $aData[6]/60 + $aData[7]/3600); - } elseif (preg_match('/\\s*([NS])[ ]([0-9]+[0-9]*\\.[0-9]+)[°]*[, ]+([EW])[ ]([0-9]+[0-9]*\\.[0-9]+)[°]*\\s*/', $sQuery, $aData)) { - /* 1 2 3 4 - * degrees decimal - * N 40.446° W 79.982° - */ - $sFound = $aData[0]; - $fQueryLat = ($aData[1]=='N'?1:-1) * ($aData[2]); - $fQueryLon = ($aData[3]=='E'?1:-1) * ($aData[4]); - } elseif (preg_match('/\\s*([0-9]+[0-9]*\\.[0-9]+)[° ]+([NS])[, ]+([0-9]+[0-9]*\\.[0-9]+)[° ]+([EW])\\s*/', $sQuery, $aData)) { - /* 1 2 3 4 - * degrees decimal - * 40.446° N 79.982° W - */ - $sFound = $aData[0]; - $fQueryLat = ($aData[2]=='N'?1:-1) * ($aData[1]); - $fQueryLon = ($aData[4]=='E'?1:-1) * ($aData[3]); - } elseif (preg_match('/(\\s*\\[|^\\s*|\\s*)(-?[0-9]+[0-9]*\\.[0-9]+)[, ]+(-?[0-9]+[0-9]*\\.[0-9]+)(\\]\\s*|\\s*$|\\s*)/', $sQuery, $aData)) { - /* 1 2 3 4 - * degrees decimal - * 12.34, 56.78 - * 12.34 56.78 - * [12.456,-78.90] - */ - $sFound = $aData[0]; - $fQueryLat = $aData[2]; - $fQueryLon = $aData[3]; - } else { - return false; - } - - $oPt = new NearPoint($fQueryLat, $fQueryLon); - - if (!$oPt->isValid()) return false; - - $sQuery = trim(str_replace($sFound, ' ', $sQuery)); - - return array('pt' => $oPt, 'query' => $sQuery); - } -} diff --git a/lib/ReverseGeocode.php b/lib/ReverseGeocode.php index 1de0893c..9b43a3e3 100644 --- a/lib/ReverseGeocode.php +++ b/lib/ReverseGeocode.php @@ -66,15 +66,22 @@ class ReverseGeocode ); } + public function lookup($fLat, $fLon, $bDoInterpolation = true) + { + return $this->lookupPoint( + 'ST_SetSRID(ST_Point('.$fLon.','.$fLat.'),4326)', + $bDoInterpolation + ); + } + /* lookup() * returns { place_id =>, type => '(osm|tiger)' } * fails if no place was found */ - public function lookup($fLat, $fLon, $bDoInterpolation = true) + public function lookupPoint($sPointSQL, $bDoInterpolation = true) { - $sPointSQL = 'ST_SetSRID(ST_Point('.$fLon.','.$fLat.'),4326)'; $iMaxRank = $this->iMaxRank; // Find the nearest point diff --git a/lib/SearchContext.php b/lib/SearchContext.php new file mode 100644 index 00000000..a6b63586 --- /dev/null +++ b/lib/SearchContext.php @@ -0,0 +1,73 @@ +fNearRadius !== false; + } + + public function nearRadius() + { + return $this->fNearRadius; + } + + public function setNearPoint($fLat, $fLon, $fRadius = 0.1) + { + $this->fNearRadius = $fRadius; + $this->sqlNear = 'ST_SetSRID(ST_Point('.$fLon.','.$fLat.'),4326)'; + } + + /** + * Extract a coordinate point from a query string. + * + * @param string $sQuery Query to scan. + * + * @return The remaining query string. + */ + public function setNearPointFromQuery($sQuery) + { + $aResult = parseLatLon($sQuery); + + if ($aResult !== false + && $aResult[1] <= 90.1 + && $aResult[1] >= -90.1 + && $aResult[2] <= 180.1 + && $aResult[2] >= -180.1 + ) { + $this->setNearPoint($aResult[1], $aResult[2]); + $sQuery = trim(str_replace($aResult[0], ' ', $sQuery)); + } + + return $sQuery; + } + + public function distanceSQL($sObj) + { + return 'ST_Distance('.$this->sqlNear.", $sObj)"; + } + + public function withinSQL($sObj) + { + return sprintf('ST_DWithin(%s, %s, %F)', $sObj, $this->sqlNear, $this->fNearRadius); + } +} diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index 42e5af30..7073186b 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -3,6 +3,7 @@ namespace Nominatim; require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php'); +require_once(CONST_BasePath.'/lib/SearchContext.php'); /** * Description of a single interpretation of a search query. @@ -33,15 +34,20 @@ class SearchDescription private $sHouseNumber = ''; /// Postcode for the object. private $sPostcode = ''; - /// Geographic search area. - private $oNearPoint = false; + /// Global search constraints. + private $oContext; // Temporary values used while creating the search description. - /// Index of phrase currently processed + /// Index of phrase currently processed. private $iNamePhrase = -1; + public function __construct($oContext) + { + $this->oContext = $oContext; + } + public function getRank() { return $this->iSearchRank; @@ -58,11 +64,6 @@ class SearchDescription return $this->sPostcode; } - public function setNear(&$oNearPoint) - { - $this->oNearPoint = $oNearPoint; - } - public function setPoiSearch($iOperator, $sClass, $sType) { $this->iOperator = $iOperator; @@ -78,12 +79,7 @@ class SearchDescription public function isCountrySearch() { return $this->sCountryCode && sizeof($this->aName) == 0 - && !$this->iOperator && !$this->oNearPoint; - } - - public function isNearSearch() - { - return (bool) $this->oNearPoint; + && !$this->iOperator && !$this->oContext->hasNearPoint(); } public function isPoiSearch() @@ -400,8 +396,8 @@ class SearchDescription if ($sCountryList) { $sSQL .= ' JOIN placex USING (place_id)'; } - if ($this->oNearPoint) { - $sSQL .= ' WHERE '.$this->oNearPoint->withinSQL('ct.centroid'); + if ($this->oContext->hasNearPoint()) { + $sSQL .= ' WHERE '.$this->oContext->withinSQL('ct.centroid'); } else { $sSQL .= " WHERE ST_Contains($sViewboxSQL, ct.centroid)"; } @@ -413,23 +409,23 @@ class SearchDescription } if ($sViewboxCentreSQL) { $sSQL .= " ORDER BY ST_Distance($sViewboxCentreSQL, ct.centroid) ASC"; - } elseif ($this->oNearPoint) { - $sSQL .= ' ORDER BY '.$this->oNearPoint->distanceSQL('ct.centroid').' ASC'; + } elseif ($this->oContext->hasNearPoint()) { + $sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('ct.centroid').' ASC'; } $sSQL .= " limit $iLimit"; if (CONST_Debug) var_dump($sSQL); return chksql($oDB->getCol($sSQL)); } - if ($this->oNearPoint) { + if ($this->oContext->hasNearPoint()) { $sSQL = 'SELECT place_id FROM placex WHERE '; $sSQL .= 'class=\''.$this->sClass."' and type='".$this->sType."'"; - $sSQL .= ' AND '.$this->oNearPoint->withinSQL('geometry'); + $sSQL .= ' AND '.$this->oContext->withinSQL('geometry'); $sSQL .= ' AND linked_place_id is null'; if ($sCountryList) { $sSQL .= " AND country_code in ($sCountryList)"; } - $sSQL .= ' ORDER BY '.$this->oNearPoint->distanceSQL('centroid')." ASC"; + $sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('centroid')." ASC"; $sSQL .= " LIMIT $iLimit"; if (CONST_Debug) var_dump($sSQL); return chksql($oDB->getCol($sSQL)); @@ -526,9 +522,9 @@ class SearchDescription } } - if ($this->oNearPoint) { - $aTerms[] = $this->oNearPoint->withinSQL('centroid'); - $aOrder[] = $this->oNearPoint->distanceSQL('centroid'); + if ($this->oContext->hasNearPoint()) { + $aTerms[] = $this->oContext->withinSQL('centroid'); + $aOrder[] = $this->oContext->distanceSQL('centroid'); } elseif ($this->sPostcode) { if (!sizeof($this->aAddress)) { $aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."' AND ST_DWithin(search_name.centroid, p.geometry, 0.1))"; @@ -545,8 +541,8 @@ class SearchDescription $aTerms[] = 'centroid && '.$sViewboxSmall; } - if ($this->oNearPoint) { - $aOrder[] = $this->oNearPoint->distanceSQL('centroid'); + if ($this->oContext->hasNearPoint()) { + $aOrder[] = $this->oContext->distanceSQL('centroid'); } if ($this->sHouseNumber) { @@ -765,8 +761,8 @@ class SearchDescription $fRange = 0.05; $sOrderBySQL = ''; - if ($this->oNearPoint) { - $sOrderBySQL = $this->oNearPoint->distanceSQL('l.centroid'); + if ($this->oContext->hasNearPoint()) { + $sOrderBySQL = $this->oContext->distanceSQL('l.centroid'); } elseif ($sPlaceIDs) { $sOrderBySQL = "ST_Distance(l.centroid, f.geometry)"; } elseif ($sPlaceGeom) { @@ -804,13 +800,13 @@ class SearchDescription $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($oDB->getCol($sSQL))); } else { - if ($this->oNearPoint) { - $fRange = $this->oNearPoint->radius(); + if ($this->oContext->hasNearPoint()) { + $fRange = $this->oContext->nearRadius(); } $sOrderBySQL = ''; - if ($this->oNearPoint) { - $sOrderBySQL = $this->oNearPoint->distanceSQL('l.geometry'); + if ($this->oContext->hasNearPoint()) { + $sOrderBySQL = $this->oContext->distanceSQL('l.geometry'); } else { $sOrderBySQL = "ST_Distance(l.geometry, f.geometry)"; } @@ -878,14 +874,6 @@ class SearchDescription echo "".$this->sPostcode.""; echo "".$this->sHouseNumber.""; - if ($this->oNearPoint) { - echo "".$this->oNearPoint->lat().""; - echo "".$this->oNearPoint->lon().""; - echo "".$this->oNearPoint->radius().""; - } else { - echo ""; - } - echo ""; } } diff --git a/lib/lib.php b/lib/lib.php index 969d58a3..b5fbee3e 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -489,9 +489,8 @@ function _debugDumpGroupedSearches($aData, $aTokens) } echo ""; echo ""; - echo ""; - echo ""; - echo ""; + echo ""; + echo ""; foreach ($aData as $iRank => $aRankedSet) { foreach ($aRankedSet as $aRow) { $aRow->dumpAsHtmlTableRow($aWordsIDs); @@ -546,6 +545,81 @@ function addQuotes($s) return "'".$s."'"; } +function parseLatLon($sQuery) +{ + $sFound = null; + $fQueryLat = null; + $fQueryLon = null; + + if (preg_match('/\\s*([NS])[ ]+([0-9]+[0-9.]*)[° ]+([0-9.]+)?[′\']*[, ]+([EW])[ ]+([0-9]+)[° ]+([0-9]+[0-9.]*)[′\']*\\s*/', $sQuery, $aData)) { + /* 1 2 3 4 5 6 + * degrees decimal minutes + * N 40 26.767, W 79 58.933 + * N 40°26.767′, W 79°58.933′ + */ + $sFound = $aData[0]; + $fQueryLat = ($aData[1]=='N'?1:-1) * ($aData[2] + $aData[3]/60); + $fQueryLon = ($aData[4]=='E'?1:-1) * ($aData[5] + $aData[6]/60); + } elseif (preg_match('/\\s*([0-9]+)[° ]+([0-9]+[0-9.]*)?[′\']*[ ]+([NS])[, ]+([0-9]+)[° ]+([0-9]+[0-9.]*)?[′\' ]+([EW])\\s*/', $sQuery, $aData)) { + /* 1 2 3 4 5 6 + * degrees decimal minutes + * 40 26.767 N, 79 58.933 W + * 40° 26.767′ N 79° 58.933′ W + */ + $sFound = $aData[0]; + $fQueryLat = ($aData[3]=='N'?1:-1) * ($aData[1] + $aData[2]/60); + $fQueryLon = ($aData[6]=='E'?1:-1) * ($aData[4] + $aData[5]/60); + } elseif (preg_match('/\\s*([NS])[ ]([0-9]+)[° ]+([0-9]+)[′\' ]+([0-9]+)[″"]*[, ]+([EW])[ ]([0-9]+)[° ]+([0-9]+)[′\' ]+([0-9]+)[″"]*\\s*/', $sQuery, $aData)) { + /* 1 2 3 4 5 6 7 8 + * degrees decimal seconds + * N 40 26 46 W 79 58 56 + * N 40° 26′ 46″, W 79° 58′ 56″ + */ + $sFound = $aData[0]; + $fQueryLat = ($aData[1]=='N'?1:-1) * ($aData[2] + $aData[3]/60 + $aData[4]/3600); + $fQueryLon = ($aData[5]=='E'?1:-1) * ($aData[6] + $aData[7]/60 + $aData[8]/3600); + } elseif (preg_match('/\\s*([0-9]+)[° ]+([0-9]+)[′\' ]+([0-9]+)[″" ]+([NS])[, ]+([0-9]+)[° ]+([0-9]+)[′\' ]+([0-9]+)[″" ]+([EW])\\s*/', $sQuery, $aData)) { + /* 1 2 3 4 5 6 7 8 + * degrees decimal seconds + * 40 26 46 N 79 58 56 W + * 40° 26′ 46″ N, 79° 58′ 56″ W + */ + $sFound = $aData[0]; + $fQueryLat = ($aData[4]=='N'?1:-1) * ($aData[1] + $aData[2]/60 + $aData[3]/3600); + $fQueryLon = ($aData[8]=='E'?1:-1) * ($aData[5] + $aData[6]/60 + $aData[7]/3600); + } elseif (preg_match('/\\s*([NS])[ ]([0-9]+[0-9]*\\.[0-9]+)[°]*[, ]+([EW])[ ]([0-9]+[0-9]*\\.[0-9]+)[°]*\\s*/', $sQuery, $aData)) { + /* 1 2 3 4 + * degrees decimal + * N 40.446° W 79.982° + */ + $sFound = $aData[0]; + $fQueryLat = ($aData[1]=='N'?1:-1) * ($aData[2]); + $fQueryLon = ($aData[3]=='E'?1:-1) * ($aData[4]); + } elseif (preg_match('/\\s*([0-9]+[0-9]*\\.[0-9]+)[° ]+([NS])[, ]+([0-9]+[0-9]*\\.[0-9]+)[° ]+([EW])\\s*/', $sQuery, $aData)) { + /* 1 2 3 4 + * degrees decimal + * 40.446° N 79.982° W + */ + $sFound = $aData[0]; + $fQueryLat = ($aData[2]=='N'?1:-1) * ($aData[1]); + $fQueryLon = ($aData[4]=='E'?1:-1) * ($aData[3]); + } elseif (preg_match('/(\\s*\\[|^\\s*|\\s*)(-?[0-9]+[0-9]*\\.[0-9]+)[, ]+(-?[0-9]+[0-9]*\\.[0-9]+)(\\]\\s*|\\s*$|\\s*)/', $sQuery, $aData)) { + /* 1 2 3 4 + * degrees decimal + * 12.34, 56.78 + * 12.34 56.78 + * [12.456,-78.90] + */ + $sFound = $aData[0]; + $fQueryLat = $aData[2]; + $fQueryLon = $aData[3]; + } else { + return false; + } + + return array($sFound, $fQueryLat, $fQueryLon); +} + function geometryText2Points($geometry_as_text, $fRadius) { From 86c085813044e52a4897d6a85e4ae7700bd40f91 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 8 Oct 2017 22:44:01 +0200 Subject: [PATCH 13/19] move viewbox sql to new SearchContext --- lib/Geocode.php | 117 +++++++++----------------------------- lib/SearchContext.php | 72 ++++++++++++++++++++++- lib/SearchDescription.php | 30 +++++----- 3 files changed, 110 insertions(+), 109 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index a6baa96b..1138824c 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -37,9 +37,8 @@ class Geocode protected $bBoundedSearch = false; protected $aViewBox = false; - protected $sViewboxCentreSQL = false; - protected $sViewboxSmallSQL = false; - protected $sViewboxLargeSQL = false; + protected $aRoutePoints = false; + protected $aRouteWidth = false; protected $iMaxRank = 20; protected $iMinAddressRank = 0; @@ -184,26 +183,6 @@ class Geocode $this->iMaxAddressRank = $iMax; } - public function setRoute($aRoutePoints, $fRouteWidth) - { - $this->aViewBox = false; - - $this->sViewboxCentreSQL = "ST_SetSRID('LINESTRING("; - $sSep = ''; - foreach ($aRoutePoints as $aPoint) { - $fPoint = (float)$aPoint; - $this->sViewboxCentreSQL .= $sSep.$fPoint; - $sSep = ($sSep == ' ') ? ',' : ' '; - } - $this->sViewboxCentreSQL .= ")'::geometry,4326)"; - - $this->sViewboxSmallSQL = 'ST_BUFFER('.$this->sViewboxCentreSQL; - $this->sViewboxSmallSQL .= ','.($fRouteWidth/69).')'; - - $this->sViewboxLargeSQL = 'ST_BUFFER('.$this->sViewboxCentreSQL; - $this->sViewboxLargeSQL .= ','.($fRouteWidth/30).')'; - } - public function setViewbox($aViewbox) { $this->aViewBox = array_map('floatval', $aViewbox); @@ -218,29 +197,6 @@ class Geocode ) { userError("Bad parameter 'viewbox'. Not a box."); } - - $fHeight = $this->aViewBox[0] - $this->aViewBox[2]; - $fWidth = $this->aViewBox[1] - $this->aViewBox[3]; - $aBigViewBox[0] = $this->aViewBox[0] + $fHeight; - $aBigViewBox[2] = $this->aViewBox[2] - $fHeight; - $aBigViewBox[1] = $this->aViewBox[1] + $fWidth; - $aBigViewBox[3] = $this->aViewBox[3] - $fWidth; - - $this->sViewboxCentreSQL = false; - $this->sViewboxSmallSQL = sprintf( - 'ST_SetSRID(ST_MakeBox2D(ST_Point(%F,%F),ST_Point(%F,%F)),4326)', - $this->aViewBox[0], - $this->aViewBox[1], - $this->aViewBox[2], - $this->aViewBox[3] - ); - $this->sViewboxLargeSQL = sprintf( - 'ST_SetSRID(ST_MakeBox2D(ST_Point(%F,%F),ST_Point(%F,%F)),4326)', - $aBigViewBox[0], - $aBigViewBox[1], - $aBigViewBox[2], - $aBigViewBox[3] - ); } public function setQuery($sQueryString) @@ -319,7 +275,8 @@ class Geocode $aRoute = $oParams->getStringList('route'); $fRouteWidth = $oParams->getFloat('routewidth'); if ($aRoute && $fRouteWidth) { - $this->setRoute($aRoute, $fRouteWidth); + $this->aRoutePoints = $aRoute; + $this->aRouteWidth = $fRouteWidth; } } } @@ -407,7 +364,7 @@ class Geocode return false; } - public function getDetails($aPlaceIDs) + public function getDetails($aPlaceIDs, $oCtx) { //$aPlaceIDs is an array with key: placeID and value: tiger-housenumber, if found, else -1 if (sizeof($aPlaceIDs) == 0) return array(); @@ -419,16 +376,8 @@ class Geocode // Get the details for display (is this a redundant extra step?) $sPlaceIDs = join(',', array_keys($aPlaceIDs)); - $sImportanceSQL = ''; - $sImportanceSQLGeom = ''; - if ($this->sViewboxSmallSQL) { - $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; - $sImportanceSQLGeom .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, geometry) THEN 1 ELSE 0.75 END * "; - } - if ($this->sViewboxLargeSQL) { - $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; - $sImportanceSQLGeom .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, geometry) THEN 1 ELSE 0.75 END * "; - } + $sImportanceSQL = $oCtx->viewboxImportanceSQL('ST_Collect(centroid)'); + $sImportanceSQLGeom = $oCtx->viewboxImportanceSQL('geometry'); $sSQL = "SELECT "; $sSQL .= " osm_type,"; @@ -448,7 +397,7 @@ class Geocode if ($this->bIncludeNameDetails) $sSQL .= "hstore_to_json(name)::text AS names,"; $sSQL .= " avg(ST_X(centroid)) AS lon, "; $sSQL .= " avg(ST_Y(centroid)) AS lat, "; - $sSQL .= " ".$sImportanceSQL."COALESCE(importance,0.75-(rank_search::float/40)) AS importance, "; + $sSQL .= " COALESCE(importance,0.75-(rank_search::float/40)) $sImportanceSQL AS importance, "; $sSQL .= " ( "; $sSQL .= " SELECT max(p.importance*(p.rank_address+2))"; $sSQL .= " FROM "; @@ -507,7 +456,7 @@ class Geocode if ($this->bIncludeExtraTags) $sSQL .= "null AS extra,"; if ($this->bIncludeNameDetails) $sSQL .= "null AS names,"; $sSQL .= " ST_x(st_centroid(geometry)) AS lon, ST_y(st_centroid(geometry)) AS lat,"; - $sSQL .= $sImportanceSQLGeom."(0.75-(rank_search::float/40)) AS importance, "; + $sSQL .= " (0.75-(rank_search::float/40)) $sImportanceSQLGeom AS importance, "; $sSQL .= " ("; $sSQL .= " SELECT max(p.importance*(p.rank_address+2))"; $sSQL .= " FROM "; @@ -555,7 +504,7 @@ class Geocode if ($this->bIncludeNameDetails) $sSQL .= "null AS names,"; $sSQL .= " avg(st_x(centroid)) AS lon, "; $sSQL .= " avg(st_y(centroid)) AS lat,"; - $sSQL .= " ".$sImportanceSQL."-1.15 AS importance, "; + $sSQL .= " -1.15".$sImportanceSQL." AS importance, "; $sSQL .= " ("; $sSQL .= " SELECT max(p.importance*(p.rank_address+2))"; $sSQL .= " FROM "; @@ -605,7 +554,7 @@ class Geocode if ($this->bIncludeNameDetails) $sSQL .= "null AS names, "; $sSQL .= " AVG(st_x(centroid)) AS lon, "; $sSQL .= " AVG(st_y(centroid)) AS lat, "; - $sSQL .= " ".$sImportanceSQL."-0.1 AS importance, "; // slightly smaller than the importance for normal houses with rank 30, which is 0 + $sSQL .= " -0.1".$sImportanceSQL." AS importance, "; // slightly smaller than the importance for normal houses with rank 30, which is 0 $sSQL .= " ("; $sSQL .= " SELECT "; $sSQL .= " MAX(p.importance*(p.rank_address+2)) "; @@ -664,7 +613,7 @@ class Geocode if ($this->bIncludeNameDetails) $sSQL .= "null AS names, "; $sSQL .= " avg(ST_X(centroid)) AS lon, "; $sSQL .= " avg(ST_Y(centroid)) AS lat, "; - $sSQL .= " ".$sImportanceSQL."-1.10 AS importance, "; + $sSQL .= " -1.10".$sImportanceSQL." AS importance, "; $sSQL .= " ( "; $sSQL .= " SELECT max(p.importance*(p.rank_address+2))"; $sSQL .= " FROM "; @@ -890,6 +839,17 @@ class Geocode $oCtx = new SearchContext(); + if ($this->aRoutePoints) { + $oCtx->setViewboxFromRoute( + $this->oDB, + $this->aRoutePoints, + $this->aRouteWidth, + $this->bBoundedSearch + ); + } else if ($this->aViewBox) { + $oCtx->setViewboxFromBox($this->aViewBox, $this->bBoundedSearch); + } + $sNormQuery = $this->normTerm($this->sQuery); $sLanguagePrefArraySQL = getArraySQL( array_map("getDBQuoted", $this->aLangPrefOrder) @@ -911,22 +871,6 @@ class Geocode $sQuery = preg_replace('/(^|,)\s*la\s*(,|$)/', '\1louisiana\2', $sQuery); } - $bBoundingBoxSearch = $this->bBoundedSearch && $this->sViewboxSmallSQL; - if ($this->sViewboxCentreSQL) { - // For complex viewboxes (routes) precompute the bounding geometry - $sGeom = chksql( - $this->oDB->getOne("select ".$this->sViewboxSmallSQL), - "Could not get small viewbox" - ); - $this->sViewboxSmallSQL = "'".$sGeom."'::geometry"; - - $sGeom = chksql( - $this->oDB->getOne("select ".$this->sViewboxLargeSQL), - "Could not get large viewbox" - ); - $this->sViewboxLargeSQL = "'".$sGeom."'::geometry"; - } - // Do we have anything that looks like a lat/lon pair? $sQuery = $oCtx->setNearPointFromQuery($sQuery); @@ -1153,22 +1097,17 @@ class Geocode if ($oSearch->isCountrySearch()) { // Just looking for a country - look it up if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) { - $aPlaceIDs = $oSearch->queryCountry( - $this->oDB, - $bBoundingBoxSearch ? $this->sViewboxSmallSQL : '' - ); + $aPlaceIDs = $oSearch->queryCountry($this->oDB); } } elseif (!$oSearch->isNamedSearch()) { // looking for a POI in a geographic area - if (!$bBoundingBoxSearch && !$oCtx->hasNearPoint()) { + if (!$oCtx->isBoundedSearch()) { continue; } $aPlaceIDs = $oSearch->queryNearbyPoi( $this->oDB, $sCountryCodesSQL, - $bBoundingBoxSearch ? $this->sViewboxSmallSQL : '', - $this->sViewboxCentreSQL, $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '', $this->iLimit ); @@ -1188,8 +1127,6 @@ class Geocode $this->iMinAddressRank, $this->iMaxAddressRank, $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '', - $bBoundingBoxSearch ? $this->sViewboxSmallSQL : '', - $bBoundingBoxSearch ? $this->sViewboxLargeSQL : '', $this->iLimit ); @@ -1306,7 +1243,7 @@ class Geocode // Did we find anything? if (isset($aResultPlaceIDs) && sizeof($aResultPlaceIDs)) { - $aSearchResults = $this->getDetails($aResultPlaceIDs); + $aSearchResults = $this->getDetails($aResultPlaceIDs, $oCtx); } } else { // Just interpret as a reverse geocode @@ -1318,7 +1255,7 @@ class Geocode if (CONST_Debug) var_dump("Reverse search", $aLookup); if ($aLookup['place_id']) { - $aSearchResults = $this->getDetails(array($aLookup['place_id'] => -1)); + $aSearchResults = $this->getDetails(array($aLookup['place_id'] => -1), $oCtx); $aResultPlaceIDs[$aLookup['place_id']] = -1; } else { $aSearchResults = array(); diff --git a/lib/SearchContext.php b/lib/SearchContext.php index a6b63586..7552ae99 100644 --- a/lib/SearchContext.php +++ b/lib/SearchContext.php @@ -16,10 +16,12 @@ require_once(CONST_BasePath.'/lib/lib.php'); class SearchContext { private $fNearRadius = false; - - // cached SQL + public $bViewboxBounded = false; public $sqlNear = ''; + public $sqlViewboxSmall = ''; + public $sqlViewboxLarge = ''; + public $sqlViewboxCentre = ''; public function hasNearPoint() { @@ -37,6 +39,58 @@ class SearchContext $this->sqlNear = 'ST_SetSRID(ST_Point('.$fLon.','.$fLat.'),4326)'; } + public function isBoundedSearch() + { + return $this->hasNearPoint() || ($this->sqlViewboxSmall && $this->bViewboxBounded); + + } + + public function setViewboxFromBox(&$aViewBox, $bBounded) + { + $this->bViewboxBounded = $bBounded; + $this->sqlViewboxCentre = ''; + + $this->sqlViewboxSmall = sprintf( + 'ST_SetSRID(ST_MakeBox2D(ST_Point(%F,%F),ST_Point(%F,%F)),4326)', + $aViewBox[0], + $aViewBox[1], + $aViewBox[2], + $aViewBox[3] + ); + + $fHeight = $aViewBox[0] - $aViewBox[2]; + $fWidth = $aViewBox[1] - $aViewBox[3]; + + $this->sqlViewboxLarge = sprintf( + 'ST_SetSRID(ST_MakeBox2D(ST_Point(%F,%F),ST_Point(%F,%F)),4326)', + max($aViewBox[0], $aViewBox[2]) + $fHeight, + max($aViewBox[1], $aViewBox[3]) + $fWidth, + min($aViewBox[0], $aViewBox[2]) - $fHeight, + min($aViewBox[1], $aViewBox[3]) - $fWidth + ); + } + + public function setViewboxFromRoute(&$oDB, $aRoutePoints, $fRouteWidth, $bBounded) + { + $this->bViewboxBounded = $bBounded; + $this->sqlViewboxCentre = "ST_SetSRID('LINESTRING("; + $sSep = ''; + foreach ($aRoutePoints as $aPoint) { + $fPoint = (float)$aPoint; + $this->sqlViewboxCentre .= $sSep.$fPoint; + $sSep = ($sSep == ' ') ? ',' : ' '; + } + $this->sqlViewboxCentre .= ")'::geometry,4326)"; + + $sSQL = 'ST_BUFFER('.$this->sqlViewboxCentre.','.($fRouteWidth/69).')'; + $sGeom = chksql($oDB->getOne("select ".$sSQL), "Could not get small viewbox"); + $this->sqlViewboxSmall = "'".$sGeom."'::geometry"; + + $sSQL = 'ST_BUFFER('.$this->sqlViewboxCentre.','.($fRouteWidth/30).')'; + $sGeom = chksql($oDB->getOne("select ".$sSQL), "Could not get large viewbox"); + $this->sqlViewboxLarge = "'".$sGeom."'::geometry"; + } + /** * Extract a coordinate point from a query string. * @@ -70,4 +124,18 @@ class SearchContext { return sprintf('ST_DWithin(%s, %s, %F)', $sObj, $this->sqlNear, $this->fNearRadius); } + + public function viewboxImportanceSQL($sObj) + { + $sSQL = ''; + + if ($this->sqlViewboxSmall) { + $sSQL = " * CASE WHEN ST_Contains($this->sqlViewboxSmall, $sObj) THEN 1 ELSE 0.5 END"; + } + if ($this->sqlViewboxLarge) { + $sSQL = " * CASE WHEN ST_Contains($this->sqlViewboxLarge, $sObj) THEN 1 ELSE 0.5 END"; + } + + return $sSQL; + } } diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index 7073186b..e6cdca63 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -367,13 +367,13 @@ class SearchDescription /////////// Query functions - public function queryCountry(&$oDB, $sViewboxSQL) + public function queryCountry(&$oDB) { $sSQL = 'SELECT place_id FROM placex '; $sSQL .= "WHERE country_code='".$this->sCountryCode."'"; $sSQL .= ' AND rank_search = 4'; - if ($sViewboxSQL) { - $sSQL .= " AND ST_Intersects($sViewboxSQL, geometry)"; + if ($this->oContext->bViewboxBounded) { + $sSQL .= ' AND ST_Intersects('.$this->oContext->sqlViewboxSmall.', geometry)'; } $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1"; @@ -382,7 +382,7 @@ class SearchDescription return chksql($oDB->getCol($sSQL)); } - public function queryNearbyPoi(&$oDB, $sCountryList, $sViewboxSQL, $sViewboxCentreSQL, $sExcludeSQL, $iLimit) + public function queryNearbyPoi(&$oDB, $sCountryList, $sExcludeSQL, $iLimit) { if (!$this->sClass) { return array(); @@ -398,8 +398,8 @@ class SearchDescription } if ($this->oContext->hasNearPoint()) { $sSQL .= ' WHERE '.$this->oContext->withinSQL('ct.centroid'); - } else { - $sSQL .= " WHERE ST_Contains($sViewboxSQL, ct.centroid)"; + } else if ($this->oContext->bViewboxBounded) { + $sSQL .= ' WHERE ST_Contains('.$this->oContext->sqlViewboxSmall.', ct.centroid)'; } if ($sCountryList) { $sSQL .= " AND country_code in ($sCountryList)"; @@ -407,8 +407,9 @@ class SearchDescription if ($sExcludeSQL) { $sSQL .= ' AND place_id not in ('.$sExcludeSQL.')'; } - if ($sViewboxCentreSQL) { - $sSQL .= " ORDER BY ST_Distance($sViewboxCentreSQL, ct.centroid) ASC"; + if ($this->oContext->sqlViewboxCentre) { + $sSQL .= ' ORDER BY ST_Distance('; + $sSQL .= $this->oContext->sqlViewboxCentre.', ct.centroid) ASC'; } elseif ($this->oContext->hasNearPoint()) { $sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('ct.centroid').' ASC'; } @@ -459,7 +460,7 @@ class SearchDescription return chksql($oDB->getCol($sSQL)); } - public function queryNamedPlace(&$oDB, $aWordFrequencyScores, $sCountryList, $iMinAddressRank, $iMaxAddressRank, $sExcludeSQL, $sViewboxSmall, $sViewboxLarge, $iLimit) + public function queryNamedPlace(&$oDB, $aWordFrequencyScores, $sCountryList, $iMinAddressRank, $iMaxAddressRank, $sExcludeSQL, $iLimit) { $aTerms = array(); $aOrder = array(); @@ -537,8 +538,8 @@ class SearchDescription $aTerms[] = 'place_id not in ('.$sExcludeSQL.')'; } - if ($sViewboxSmall) { - $aTerms[] = 'centroid && '.$sViewboxSmall; + if ($this->oContext->bViewboxBounded) { + $aTerms[] = 'centroid && '.$this->oContext->sqlViewboxSmall; } if ($this->oContext->hasNearPoint()) { @@ -550,12 +551,7 @@ class SearchDescription } else { $sImportanceSQL = '(CASE WHEN importance = 0 OR importance IS NULL THEN 0.75-(search_rank::float/40) ELSE importance END)'; } - if ($sViewboxSmall) { - $sImportanceSQL .= " * CASE WHEN ST_Contains($sViewboxSmall, centroid) THEN 1 ELSE 0.5 END"; - } - if ($sViewboxLarge) { - $sImportanceSQL .= " * CASE WHEN ST_Contains($sViewboxLarge, centroid) THEN 1 ELSE 0.5 END"; - } + $sImportanceSQL .= $this->oContext->viewboxImportanceSQL('centroid'); $aOrder[] = "$sImportanceSQL DESC"; if (sizeof($this->aFullNameAddress)) { From 907133a38c7cba6868f8d0bf7222f7fd57698c31 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 8 Oct 2017 23:15:06 +0200 Subject: [PATCH 14/19] move excluded place list to SearchContext --- lib/Geocode.php | 7 +++---- lib/SearchContext.php | 15 ++++++++++++++ lib/SearchDescription.php | 43 ++++++++++++++------------------------- 3 files changed, 33 insertions(+), 32 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index 1138824c..b978a72c 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -849,6 +849,9 @@ class Geocode } else if ($this->aViewBox) { $oCtx->setViewboxFromBox($this->aViewBox, $this->bBoundedSearch); } + if ($this->aExcludePlaceIDs) { + $oCtx->setExcludeList($this->aExcludePlaceIDs); + } $sNormQuery = $this->normTerm($this->sQuery); $sLanguagePrefArraySQL = getArraySQL( @@ -1108,7 +1111,6 @@ class Geocode $aPlaceIDs = $oSearch->queryNearbyPoi( $this->oDB, $sCountryCodesSQL, - $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '', $this->iLimit ); } elseif ($oSearch->isOperator(Operator::POSTCODE)) { @@ -1126,7 +1128,6 @@ class Geocode $sCountryCodesSQL, $this->iMinAddressRank, $this->iMaxAddressRank, - $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '', $this->iLimit ); @@ -1142,7 +1143,6 @@ class Geocode $aResult = $oSearch->queryHouseNumber( $this->oDB, $aPlaceIDs, - $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '', $this->iLimit ); @@ -1159,7 +1159,6 @@ class Geocode $aPlaceIDs = $oSearch->queryPoiByOperator( $this->oDB, $aPlaceIDs, - $this->aExcludePlaceIDs ? join(',', $this->aExcludePlaceIDs) : '', $this->iLimit ); } diff --git a/lib/SearchContext.php b/lib/SearchContext.php index 7552ae99..1b48fd84 100644 --- a/lib/SearchContext.php +++ b/lib/SearchContext.php @@ -22,6 +22,7 @@ class SearchContext public $sqlViewboxSmall = ''; public $sqlViewboxLarge = ''; public $sqlViewboxCentre = ''; + private $sqlExcludeList = ''; public function hasNearPoint() { @@ -91,6 +92,11 @@ class SearchContext $this->sqlViewboxLarge = "'".$sGeom."'::geometry"; } + public function setExcludeList($aExcluded) + { + $this->sqlExcludeList = ' not in ('.join(',', $aExcluded).')'; + } + /** * Extract a coordinate point from a query string. * @@ -138,4 +144,13 @@ class SearchContext return $sSQL; } + + public function excludeSQL($sVariable) + { + if ($this->sqlExcludeList) { + return $sVariable.$this->sqlExcludeList; + } + + return ''; + } } diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index e6cdca63..0a1959fd 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -382,7 +382,7 @@ class SearchDescription return chksql($oDB->getCol($sSQL)); } - public function queryNearbyPoi(&$oDB, $sCountryList, $sExcludeSQL, $iLimit) + public function queryNearbyPoi(&$oDB, $sCountryList, $iLimit) { if (!$this->sClass) { return array(); @@ -404,9 +404,7 @@ class SearchDescription if ($sCountryList) { $sSQL .= " AND country_code in ($sCountryList)"; } - if ($sExcludeSQL) { - $sSQL .= ' AND place_id not in ('.$sExcludeSQL.')'; - } + $sSQL .= $this->oContext->excludeSQL(' AND place_id'); if ($this->oContext->sqlViewboxCentre) { $sSQL .= ' ORDER BY ST_Distance('; $sSQL .= $this->oContext->sqlViewboxCentre.', ct.centroid) ASC'; @@ -453,6 +451,7 @@ class SearchDescription if ($sCountryTerm) { $sSQL .= ' AND '.$sCountryTerm; } + $sSQL .= $this->oContext->excludeSQL(' AND p.place_id'); $sSQL .= " LIMIT $iLimit"; if (CONST_Debug) var_dump($sSQL); @@ -460,7 +459,7 @@ class SearchDescription return chksql($oDB->getCol($sSQL)); } - public function queryNamedPlace(&$oDB, $aWordFrequencyScores, $sCountryList, $iMinAddressRank, $iMaxAddressRank, $sExcludeSQL, $iLimit) + public function queryNamedPlace(&$oDB, $aWordFrequencyScores, $sCountryList, $iMinAddressRank, $iMaxAddressRank, $iLimit) { $aTerms = array(); $aOrder = array(); @@ -534,8 +533,9 @@ class SearchDescription } } + $sExcludeSQL = $this->oContext->excludeSQL('place_id'); if ($sExcludeSQL) { - $aTerms[] = 'place_id not in ('.$sExcludeSQL.')'; + $aTerms[] = $sExcludeSQL; } if ($this->oContext->bViewboxBounded) { @@ -590,7 +590,7 @@ class SearchDescription } - public function queryHouseNumber(&$oDB, $aRoadPlaceIDs, $sExcludeSQL, $iLimit) + public function queryHouseNumber(&$oDB, $aRoadPlaceIDs, $iLimit) { $sPlaceIDs = join(',', $aRoadPlaceIDs); @@ -598,9 +598,7 @@ class SearchDescription $sSQL = 'SELECT place_id FROM placex '; $sSQL .= 'WHERE parent_place_id in ('.$sPlaceIDs.')'; $sSQL .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'"; - if ($sExcludeSQL) { - $sSQL .= ' AND place_id not in ('.$sExcludeSQL.')'; - } + $sSQL .= $this->oContext->excludeSQL(' AND place_id'); $sSQL .= " LIMIT $iLimit"; if (CONST_Debug) var_dump($sSQL); @@ -629,10 +627,7 @@ class SearchDescription $sSQL .= " or interpolationtype='all') and "; $sSQL .= $iHousenumber.">=startnumber and "; $sSQL .= $iHousenumber."<=endnumber"; - - if ($sExcludeSQL) { - $sSQL .= ' AND place_id not in ('.$sExcludeSQL.')'; - } + $sSQL .= $this->oContext->excludeSQL(' AND place_id'); $sSQL .= " limit $iLimit"; if (CONST_Debug) var_dump($sSQL); @@ -649,9 +644,7 @@ class SearchDescription $sSQL = 'SELECT place_id FROM location_property_aux'; $sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.')'; $sSQL .= " AND housenumber = '".$this->sHouseNumber."'"; - if ($sExcludeSQL) { - $sSQL .= " AND place_id not in ($sExcludeSQL)"; - } + $sSQL .= $this->oContext->excludeSQL(' AND place_id'); $sSQL .= " limit $iLimit"; if (CONST_Debug) var_dump($sSQL); @@ -675,10 +668,7 @@ class SearchDescription $sSQL .= " or interpolationtype='all') and "; $sSQL .= $iHousenumber.">=startnumber and "; $sSQL .= $iHousenumber."<=endnumber"; - - if ($sExcludeSQL) { - $sSQL .= ' AND place_id not in ('.$sExcludeSQL.')'; - } + $sSQL .= $this->oContext->excludeSQL(' AND place_id'); $sSQL .= " limit $iLimit"; if (CONST_Debug) var_dump($sSQL); @@ -694,7 +684,7 @@ class SearchDescription } - public function queryPoiByOperator(&$oDB, $aParentIDs, $sExcludeSQL, $iLimit) + public function queryPoiByOperator(&$oDB, $aParentIDs, $iLimit) { $sPlaceIDs = join(',', $aParentIDs); $aClassPlaceIDs = array(); @@ -707,6 +697,7 @@ class SearchDescription $sSQL .= " AND class='".$this->sClass."' "; $sSQL .= " AND type='".$this->sType."'"; $sSQL .= " AND linked_place_id is null"; + $sSQL .= $this->oContext->excludeSQL(' AND place_id'); $sSQL .= " ORDER BY rank_search ASC "; $sSQL .= " LIMIT $iLimit"; @@ -783,9 +774,7 @@ class SearchDescription $sSQL .= " WHERE ST_Contains('$sPlaceGeom', l.centroid)"; } - if ($sExcludeSQL) { - $sSQL .= ' AND l.place_id not in ('.$sExcludeSQL.')'; - } + $sSQL .= $this->oContext->excludeSQL(' AND l.place_id'); $sSQL .= 'limit 300) i '; if ($sOrderBySQL) { $sSQL .= 'order by order_term asc'; @@ -816,9 +805,7 @@ class SearchDescription $sSQL .= " AND ST_DWithin(l.geometry, f.centroid, $fRange)"; $sSQL .= " AND l.class='".$this->sClass."'"; $sSQL .= " AND l.type='".$this->sType."'"; - if ($sExcludeSQL) { - $sSQL .= " AND l.place_id not in (".$sExcludeSQL.")"; - } + $sSQL .= $this->oContext->excludeSQL(' AND l.place_id'); if ($sOrderBySQL) { $sSQL .= "ORDER BY orderterm ASC"; } From 55629a48913d6e091247105cf52d6492574691f7 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 8 Oct 2017 23:33:54 +0200 Subject: [PATCH 15/19] move country list to SearchContext --- lib/Geocode.php | 22 ++++++---------------- lib/SearchContext.php | 6 ++++++ lib/SearchDescription.php | 30 +++++++++++++----------------- 3 files changed, 25 insertions(+), 33 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index b978a72c..b27f69e8 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -852,15 +852,14 @@ class Geocode if ($this->aExcludePlaceIDs) { $oCtx->setExcludeList($this->aExcludePlaceIDs); } + if ($this->aCountryCodes) { + $oCtx->setCountryList($this->aCountryCodes); + } $sNormQuery = $this->normTerm($this->sQuery); $sLanguagePrefArraySQL = getArraySQL( array_map("getDBQuoted", $this->aLangPrefOrder) ); - $sCountryCodesSQL = false; - if ($this->aCountryCodes) { - $sCountryCodesSQL = join(',', array_map('addQuotes', $this->aCountryCodes)); - } $sQuery = $this->sQuery; if (!preg_match('//u', $sQuery)) { @@ -1107,25 +1106,16 @@ class Geocode if (!$oCtx->isBoundedSearch()) { continue; } - - $aPlaceIDs = $oSearch->queryNearbyPoi( - $this->oDB, - $sCountryCodesSQL, - $this->iLimit - ); + $aPlaceIDs = $oSearch->queryNearbyPoi($this->oDB, $this->iLimit); } elseif ($oSearch->isOperator(Operator::POSTCODE)) { - $aPlaceIDs = $oSearch->queryPostcode( - $this->oDB, - $sCountryCodesSQL, - $this->iLimit - ); + // looking for postcode + $aPlaceIDs = $oSearch->queryPostcode($this->oDB, $this->iLimit); } else { // Ordinary search: // First search for places according to name and address. $aNamedPlaceIDs = $oSearch->queryNamedPlace( $this->oDB, $aWordFrequencyScores, - $sCountryCodesSQL, $this->iMinAddressRank, $this->iMaxAddressRank, $this->iLimit diff --git a/lib/SearchContext.php b/lib/SearchContext.php index 1b48fd84..f5eab95a 100644 --- a/lib/SearchContext.php +++ b/lib/SearchContext.php @@ -22,6 +22,7 @@ class SearchContext public $sqlViewboxSmall = ''; public $sqlViewboxLarge = ''; public $sqlViewboxCentre = ''; + public $sqlCountryList = ''; private $sqlExcludeList = ''; public function hasNearPoint() @@ -97,6 +98,11 @@ class SearchContext $this->sqlExcludeList = ' not in ('.join(',', $aExcluded).')'; } + public function setCountryList($aCountries) + { + $this->sqlCountryList = '('.join(',', array_map('addQuotes', $aCountries)).')'; + } + /** * Extract a coordinate point from a query string. * diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index 0a1959fd..39f7ffca 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -109,13 +109,13 @@ class SearchDescription return 'place_classtype_'.$this->sClass.'_'.$this->sType; } - public function countryCodeSQL($sVar, $sCountryList) + public function countryCodeSQL($sVar) { if ($this->sCountryCode) { return $sVar.' = \''.$this->sCountryCode."'"; } - if ($sCountryList) { - return $sVar.' in ('.$sCountryList.')'; + if ($this->oContext->sqlCountryList) { + return $sVar.' in '.$this->oContext->sqlCountryList; } return ''; @@ -382,7 +382,7 @@ class SearchDescription return chksql($oDB->getCol($sSQL)); } - public function queryNearbyPoi(&$oDB, $sCountryList, $iLimit) + public function queryNearbyPoi(&$oDB, $iLimit) { if (!$this->sClass) { return array(); @@ -393,7 +393,7 @@ class SearchDescription $sSQL = 'SELECT count(*) FROM pg_tables WHERE tablename = \''.$sPoiTable."'"; if (chksql($oDB->getOne($sSQL))) { $sSQL = 'SELECT place_id FROM '.$sPoiTable.' ct'; - if ($sCountryList) { + if ($this->oContext->sqlCountryList) { $sSQL .= ' JOIN placex USING (place_id)'; } if ($this->oContext->hasNearPoint()) { @@ -401,8 +401,8 @@ class SearchDescription } else if ($this->oContext->bViewboxBounded) { $sSQL .= ' WHERE ST_Contains('.$this->oContext->sqlViewboxSmall.', ct.centroid)'; } - if ($sCountryList) { - $sSQL .= " AND country_code in ($sCountryList)"; + if ($this->oContext->sqlCountryList) { + $sSQL .= ' AND country_code in '.$this->oContext->sqlCountryList; } $sSQL .= $this->oContext->excludeSQL(' AND place_id'); if ($this->oContext->sqlViewboxCentre) { @@ -421,8 +421,8 @@ class SearchDescription $sSQL .= 'class=\''.$this->sClass."' and type='".$this->sType."'"; $sSQL .= ' AND '.$this->oContext->withinSQL('geometry'); $sSQL .= ' AND linked_place_id is null'; - if ($sCountryList) { - $sSQL .= " AND country_code in ($sCountryList)"; + if ($this->oContext->sqlCountryList) { + $sSQL .= ' AND country_code in '.$this->oContext->sqlCountryList; } $sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('centroid')." ASC"; $sSQL .= " LIMIT $iLimit"; @@ -433,7 +433,7 @@ class SearchDescription return array(); } - public function queryPostcode(&$oDB, $sCountryList, $iLimit) + public function queryPostcode(&$oDB, $iLimit) { $sSQL = 'SELECT p.place_id FROM location_postcode p '; @@ -447,10 +447,7 @@ class SearchDescription } $sSQL .= "p.postcode = '".reset($this->aName)."'"; - $sCountryTerm = $this->countryCodeSQL('p.country_code', $sCountryList); - if ($sCountryTerm) { - $sSQL .= ' AND '.$sCountryTerm; - } + $sSQL .= $this->countryCodeSQL(' AND p.country_code'); $sSQL .= $this->oContext->excludeSQL(' AND p.place_id'); $sSQL .= " LIMIT $iLimit"; @@ -459,7 +456,7 @@ class SearchDescription return chksql($oDB->getCol($sSQL)); } - public function queryNamedPlace(&$oDB, $aWordFrequencyScores, $sCountryList, $iMinAddressRank, $iMaxAddressRank, $iLimit) + public function queryNamedPlace(&$oDB, $aWordFrequencyScores, $iMinAddressRank, $iMaxAddressRank, $iLimit) { $aTerms = array(); $aOrder = array(); @@ -506,7 +503,7 @@ class SearchDescription } } - $sCountryTerm = $this->countryCodeSQL('country_code', $sCountryList); + $sCountryTerm = $this->countryCodeSQL('country_code'); if ($sCountryTerm) { $aTerms[] = $sCountryTerm; } @@ -589,7 +586,6 @@ class SearchDescription return array(); } - public function queryHouseNumber(&$oDB, $aRoadPlaceIDs, $iLimit) { $sPlaceIDs = join(',', $aRoadPlaceIDs); From 2c62a8dbbc2f4d16bbec67d5539a83b223159fbd Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 9 Oct 2017 22:11:46 +0200 Subject: [PATCH 16/19] adapt phpunit tests to new SearchContext class --- test/php/Nominatim/NearPointTest.php | 95 ------------------------ test/php/Nominatim/NominatimTest.php | 61 ++++++++++++++- test/php/Nominatim/SearchContextTest.php | 52 +++++++++++++ 3 files changed, 112 insertions(+), 96 deletions(-) delete mode 100644 test/php/Nominatim/NearPointTest.php create mode 100644 test/php/Nominatim/SearchContextTest.php diff --git a/test/php/Nominatim/NearPointTest.php b/test/php/Nominatim/NearPointTest.php deleted file mode 100644 index bee7d3eb..00000000 --- a/test/php/Nominatim/NearPointTest.php +++ /dev/null @@ -1,95 +0,0 @@ -assertFalse(NearPoint::extractFromQuery('')); - $this->assertFalse(NearPoint::extractFromQuery('abc')); - $this->assertFalse(NearPoint::extractFromQuery('12 34')); - $this->assertFalse(NearPoint::extractFromQuery('200.1 89.9')); // because latitude > 180 - - // coordinates expected - $this->assertNotNull(NearPoint::extractFromQuery('0.0 -0.0')); - - $aRes = NearPoint::extractFromQuery(' abc 12.456 -78.90 def '); - $this->assertEquals($aRes['pt']->lat(), 12.456); - $this->assertEquals($aRes['pt']->lon(), -78.90); - $this->assertEquals($aRes['pt']->radius(), 0.1); - $this->assertEquals($aRes['query'], 'abc def'); - - $aRes = NearPoint::extractFromQuery(' [12.456,-78.90] '); - $this->assertEquals($aRes['pt']->lat(), 12.456); - $this->assertEquals($aRes['pt']->lon(), -78.90); - $this->assertEquals($aRes['pt']->radius(), 0.1); - $this->assertEquals($aRes['query'], ''); - - $aRes = NearPoint::extractFromQuery(' -12.456,-78.90 '); - $this->assertEquals($aRes['pt']->lat(), -12.456); - $this->assertEquals($aRes['pt']->lon(), -78.90); - - // http://en.wikipedia.org/wiki/Geographic_coordinate_conversion - // these all represent the same location - $aQueries = array( - '40 26.767 N 79 58.933 W', - '40° 26.767′ N 79° 58.933′ W', - "40° 26.767' N 79° 58.933' W", - 'N 40 26.767, W 79 58.933', - 'N 40°26.767′, W 79°58.933′', - "N 40°26.767', W 79°58.933'", - - '40 26 46 N 79 58 56 W', - '40° 26′ 46″ N 79° 58′ 56″ W', - 'N 40 26 46 W 79 58 56', - 'N 40° 26′ 46″, W 79° 58′ 56″', - 'N 40° 26\' 46", W 79° 58\' 56"', - - '40.446 -79.982', - '40.446,-79.982', - '40.446° N 79.982° W', - 'N 40.446° W 79.982°', - - '[40.446 -79.982]', - ' 40.446 , -79.982 ', - ); - - - foreach ($aQueries as $sQuery) { - $aRes = NearPoint::extractFromQuery($sQuery); - $this->assertEquals(40.446, $aRes['pt']->lat(), 'degrees decimal ' . $sQuery, 0.01); - $this->assertEquals(-79.982, $aRes['pt']->lon(), 'degrees decimal ' . $sQuery, 0.01); - $this->assertEquals('', $aRes['query']); - } - } - - public function testWithinSQL() - { - $np = new NearPoint(0.1, 23, 1); - - $this->assertEquals( - 'ST_DWithin(foo, ST_SetSRID(ST_Point(23,0.1),4326), 1.000000)', - $np->withinSQL('foo') - ); - } - - public function testDistanceSQL() - { - $np = new NearPoint(0.1, 23, 1); - - $this->assertEquals( - 'ST_Distance(ST_SetSRID(ST_Point(23,0.1),4326), foo)', - $np->distanceSQL('foo') - ); - } -} diff --git a/test/php/Nominatim/NominatimTest.php b/test/php/Nominatim/NominatimTest.php index a651fe40..33bb6d32 100644 --- a/test/php/Nominatim/NominatimTest.php +++ b/test/php/Nominatim/NominatimTest.php @@ -2,7 +2,7 @@ namespace Nominatim; -require '../../lib/lib.php'; +require_once '../../lib/lib.php'; class NominatimTest extends \PHPUnit_Framework_TestCase { @@ -203,4 +203,63 @@ class NominatimTest extends \PHPUnit_Framework_TestCase geometryText2Points('MULTIPOLYGON(((30 20, 45 40, 10 40, 30 20)),((15 5, 40 10, 10 20, 5 10, 15 5)))', $fRadius) ); } + + public function testParseLatLon() + { + // no coordinates expected + $this->assertFalse(parseLatLon('')); + $this->assertFalse(parseLatLon('abc')); + $this->assertFalse(parseLatLon('12 34')); + + // coordinates expected + $this->assertNotNull(parseLatLon('0.0 -0.0')); + + $aRes = parseLatLon(' abc 12.456 -78.90 def '); + $this->assertEquals($aRes[1], 12.456); + $this->assertEquals($aRes[2], -78.90); + $this->assertEquals($aRes[0], ' 12.456 -78.90 '); + + $aRes = parseLatLon(' [12.456,-78.90] '); + $this->assertEquals($aRes[1], 12.456); + $this->assertEquals($aRes[2], -78.90); + $this->assertEquals($aRes[0], ' [12.456,-78.90] '); + + $aRes = parseLatLon(' -12.456,-78.90 '); + $this->assertEquals($aRes[1], -12.456); + $this->assertEquals($aRes[2], -78.90); + $this->assertEquals($aRes[0], ' -12.456,-78.90 '); + + // http://en.wikipedia.org/wiki/Geographic_coordinate_conversion + // these all represent the same location + $aQueries = array( + '40 26.767 N 79 58.933 W', + '40° 26.767′ N 79° 58.933′ W', + "40° 26.767' N 79° 58.933' W", + 'N 40 26.767, W 79 58.933', + 'N 40°26.767′, W 79°58.933′', + "N 40°26.767', W 79°58.933'", + + '40 26 46 N 79 58 56 W', + '40° 26′ 46″ N 79° 58′ 56″ W', + 'N 40 26 46 W 79 58 56', + 'N 40° 26′ 46″, W 79° 58′ 56″', + 'N 40° 26\' 46", W 79° 58\' 56"', + + '40.446 -79.982', + '40.446,-79.982', + '40.446° N 79.982° W', + 'N 40.446° W 79.982°', + + '[40.446 -79.982]', + ' 40.446 , -79.982 ', + ); + + + foreach ($aQueries as $sQuery) { + $aRes = parseLatLon($sQuery); + $this->assertEquals(40.446, $aRes[1], 'degrees decimal ' . $sQuery, 0.01); + $this->assertEquals(-79.982, $aRes[2], 'degrees decimal ' . $sQuery, 0.01); + $this->assertEquals($sQuery, $aRes[0]); + } + } } diff --git a/test/php/Nominatim/SearchContextTest.php b/test/php/Nominatim/SearchContextTest.php new file mode 100644 index 00000000..a40d0716 --- /dev/null +++ b/test/php/Nominatim/SearchContextTest.php @@ -0,0 +1,52 @@ +oCtx = new SearchContext(); + } + + public function testHasNearPoint() + { + $this->assertFalse($this->oCtx->hasNearPoint()); + $this->oCtx->setNearPoint(0, 0); + $this->assertTrue($this->oCtx->hasNearPoint()); + } + + public function testNearRadius() + { + $this->oCtx->setNearPoint(1, 1); + $this->assertEquals(0.1, $this->oCtx->nearRadius()); + $this->oCtx->setNearPoint(1, 1, 0.338); + $this->assertEquals(0.338, $this->oCtx->nearRadius()); + } + + public function testWithinSQL() + { + $this->oCtx->setNearPoint(0.1, 23, 1); + + $this->assertEquals( + 'ST_DWithin(foo, ST_SetSRID(ST_Point(23,0.1),4326), 1.000000)', + $this->oCtx->withinSQL('foo') + ); + } + + public function testDistanceSQL() + { + $this->oCtx->setNearPoint(0.1, 23, 1); + + $this->assertEquals( + 'ST_Distance(ST_SetSRID(ST_Point(23,0.1),4326), foo)', + $this->oCtx->distanceSQL('foo') + ); + } +} From 9a5d5d9aec4cf785c8190c37a3136cf09aca6902 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 9 Oct 2017 22:55:50 +0200 Subject: [PATCH 17/19] move complete search query code into SearchDescription --- lib/Geocode.php | 107 ++++++-------------------------- lib/SearchDescription.php | 126 +++++++++++++++++++++++++++----------- 2 files changed, 107 insertions(+), 126 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index b27f69e8..27c33ad6 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -1026,10 +1026,6 @@ class Geocode // Any words that have failed completely? // TODO: suggestions - // Start the search process - // array with: placeid => -1 | tiger-housenumber - $aResultPlaceIDs = array(); - $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery); if ($this->bReverseInPlan) { @@ -1084,104 +1080,38 @@ class Geocode if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens); + // Start the search process + // array with: placeid => -1 | tiger-housenumber + $aResultPlaceIDs = array(); $iGroupLoop = 0; $iQueryLoop = 0; foreach ($aGroupedSearches as $iGroupedRank => $aSearches) { $iGroupLoop++; foreach ($aSearches as $oSearch) { $iQueryLoop++; - $searchedHousenumber = -1; - - if (CONST_Debug) echo "
Search Loop, group $iGroupLoop, loop $iQueryLoop"; - if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($oSearch)), $aValidTokens); - - $aPlaceIDs = array(); - if ($oSearch->isCountrySearch()) { - // Just looking for a country - look it up - if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) { - $aPlaceIDs = $oSearch->queryCountry($this->oDB); - } - } elseif (!$oSearch->isNamedSearch()) { - // looking for a POI in a geographic area - if (!$oCtx->isBoundedSearch()) { - continue; - } - $aPlaceIDs = $oSearch->queryNearbyPoi($this->oDB, $this->iLimit); - } elseif ($oSearch->isOperator(Operator::POSTCODE)) { - // looking for postcode - $aPlaceIDs = $oSearch->queryPostcode($this->oDB, $this->iLimit); - } else { - // Ordinary search: - // First search for places according to name and address. - $aNamedPlaceIDs = $oSearch->queryNamedPlace( - $this->oDB, - $aWordFrequencyScores, - $this->iMinAddressRank, - $this->iMaxAddressRank, - $this->iLimit - ); - - if (sizeof($aNamedPlaceIDs)) { - foreach ($aNamedPlaceIDs as $aRow) { - $aPlaceIDs[] = $aRow['place_id']; - $this->exactMatchCache[$aRow['place_id']] = $aRow['exactmatch']; - } - } - - //now search for housenumber, if housenumber provided - if ($oSearch->hasHouseNumber() && sizeof($aPlaceIDs)) { - $aResult = $oSearch->queryHouseNumber( - $this->oDB, - $aPlaceIDs, - $this->iLimit - ); - - if (sizeof($aResult)) { - $searchedHousenumber = $aResult['iHouseNumber']; - $aPlaceIDs = $aResult['aPlaceIDs']; - } elseif (!$oSearch->looksLikeFullAddress()) { - $aPlaceIDs = array(); - } - } - - // finally get POIs if requested - if ($oSearch->isPoiSearch() && sizeof($aPlaceIDs)) { - $aPlaceIDs = $oSearch->queryPoiByOperator( - $this->oDB, - $aPlaceIDs, - $this->iLimit - ); - } - } if (CONST_Debug) { - echo "
Place IDs: "; - var_Dump($aPlaceIDs); + echo "
Search Loop, group $iGroupLoop, loop $iQueryLoop"; + _debugDumpGroupedSearches(array($iGroupedRank => array($oSearch)), $aValidTokens); } - if (sizeof($aPlaceIDs) && $oSearch->getPostcode()) { - $sSQL = 'SELECT place_id FROM placex'; - $sSQL .= ' WHERE place_id in ('.join(',', $aPlaceIDs).')'; - $sSQL .= " AND postcode = '".$oSearch->getPostcode()."'"; - if (CONST_Debug) var_dump($sSQL); - $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL)); - if ($aFilteredPlaceIDs) { - $aPlaceIDs = $aFilteredPlaceIDs; - if (CONST_Debug) { - echo "
Place IDs after postcode filtering: "; - var_Dump($aPlaceIDs); - } - } - } + $aRes = $oSearch->query( + $this->oDB, + $aWordFrequencyScores, + $this->exactMatchCache, + $this->iMinAddressRank, + $this->iMaxAddressRank, + $this->iLimit + ); - foreach ($aPlaceIDs as $iPlaceID) { + foreach ($aRes['IDs'] as $iPlaceID) { // array for placeID => -1 | Tiger housenumber - $aResultPlaceIDs[$iPlaceID] = $searchedHousenumber; + $aResultPlaceIDs[$iPlaceID] = $aRes['houseNumber']; } if ($iQueryLoop > 20) break; } - if (isset($aResultPlaceIDs) && sizeof($aResultPlaceIDs) && ($this->iMinAddressRank != 0 || $this->iMaxAddressRank != 30)) { + if (sizeof($aResultPlaceIDs) && ($this->iMinAddressRank != 0 || $this->iMaxAddressRank != 30)) { // Need to verify passes rank limits before dropping out of the loop (yuk!) // reduces the number of place ids, like a filter // rank_address is 30 for interpolated housenumbers @@ -1224,14 +1154,13 @@ class Geocode $aResultPlaceIDs = $tempIDs; } - //exit; - if (isset($aResultPlaceIDs) && sizeof($aResultPlaceIDs)) break; + if (sizeof($aResultPlaceIDs)) break; if ($iGroupLoop > 4) break; if ($iQueryLoop > 30) break; } // Did we find anything? - if (isset($aResultPlaceIDs) && sizeof($aResultPlaceIDs)) { + if (sizeof($aResultPlaceIDs)) { $aSearchResults = $this->getDetails($aResultPlaceIDs, $oCtx); } } else { diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index 39f7ffca..f3afaff2 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -59,11 +59,6 @@ class SearchDescription return $this->iSearchRank; } - public function getPostCode() - { - return $this->sPostcode; - } - public function setPoiSearch($iOperator, $sClass, $sType) { $this->iOperator = $iOperator; @@ -71,22 +66,6 @@ class SearchDescription $this->sType = $sType; } - public function isNamedSearch() - { - return sizeof($this->aName) > 0 || sizeof($this->aAddress) > 0; - } - - public function isCountrySearch() - { - return $this->sCountryCode && sizeof($this->aName) == 0 - && !$this->iOperator && !$this->oContext->hasNearPoint(); - } - - public function isPoiSearch() - { - return (bool) $this->sClass; - } - public function looksLikeFullAddress() { return sizeof($this->aName) @@ -94,16 +73,6 @@ class SearchDescription && preg_match('/[0-9]+/', $this->sHouseNumber); } - public function isOperator($iType) - { - return $this->iOperator == $iType; - } - - public function hasHouseNumber() - { - return (bool) $this->sHouseNumber; - } - private function poiTable() { return 'place_classtype_'.$this->sClass.'_'.$this->sType; @@ -366,8 +335,91 @@ class SearchDescription /////////// Query functions + public function query(&$oDB, &$aWordFrequencyScores, &$aExactMatchCache, $iMinRank, $iMaxRank, $iLimit) + { + $aPlaceIDs = array(); + $iHousenumber = -1; - public function queryCountry(&$oDB) + if ($this->sCountryCode + && !sizeof($this->aName) + && !$this->iOperator + && !$this->sClass + && !$this->oContext->hasNearPoint() + ) { + // Just looking for a country - look it up + if (4 >= $iMinRank && 4 <= $iMaxRank) { + $aPlaceIDs = $this->queryCountry($oDB); + } + } elseif (!sizeof($this->aName) && !sizeof($this->aAddress)) { + // Neither name nor address? Then we must be + // looking for a POI in a geographic area. + if ($this->oContext->isBoundedSearch()) { + $aPlaceIDs = $this->queryNearbyPoi($oDB, $iLimit); + } + } elseif ($this->iOperator == Operator::POSTCODE) { + // looking for postcode + $aPlaceIDs = $this->queryPostcode($oDB, $iLimit); + } else { + // Ordinary search: + // First search for places according to name and address. + $aNamedPlaceIDs = $this->queryNamedPlace( + $oDB, + $aWordFrequencyScores, + $iMinRank, + $iMaxRank, + $iLimit + ); + + if (sizeof($aNamedPlaceIDs)) { + foreach ($aNamedPlaceIDs as $aRow) { + $aPlaceIDs[] = $aRow['place_id']; + $aExactMatchCache[$aRow['place_id']] = $aRow['exactmatch']; + } + } + + //now search for housenumber, if housenumber provided + if ($this->sHouseNumber && sizeof($aPlaceIDs)) { + $aResult = $this->queryHouseNumber($oDB, $aPlaceIDs, $iLimit); + + if (sizeof($aResult)) { + $iHousenumber = $aResult['iHouseNumber']; + $aPlaceIDs = $aResult['aPlaceIDs']; + } elseif (!$this->looksLikeFullAddress()) { + $aPlaceIDs = array(); + } + } + + // finally get POIs if requested + if ($this->sClass && sizeof($aPlaceIDs)) { + $aPlaceIDs = $this->queryPoiByOperator($oDB, $aPlaceIDs, $iLimit); + } + } + + if (CONST_Debug) { + echo "
Place IDs: "; + var_Dump($aPlaceIDs); + } + + if (sizeof($aPlaceIDs) && $this->sPostcode) { + $sSQL = 'SELECT place_id FROM placex'; + $sSQL .= ' WHERE place_id in ('.join(',', $aPlaceIDs).')'; + $sSQL .= " AND postcode = '".$this->sPostcode."'"; + if (CONST_Debug) var_dump($sSQL); + $aFilteredPlaceIDs = chksql($oDB->getCol($sSQL)); + if ($aFilteredPlaceIDs) { + $aPlaceIDs = $aFilteredPlaceIDs; + if (CONST_Debug) { + echo "
Place IDs after postcode filtering: "; + var_Dump($aPlaceIDs); + } + } + } + + return array('IDs' => $aPlaceIDs, 'houseNumber' => $iHousenumber); + } + + + private function queryCountry(&$oDB) { $sSQL = 'SELECT place_id FROM placex '; $sSQL .= "WHERE country_code='".$this->sCountryCode."'"; @@ -382,7 +434,7 @@ class SearchDescription return chksql($oDB->getCol($sSQL)); } - public function queryNearbyPoi(&$oDB, $iLimit) + private function queryNearbyPoi(&$oDB, $iLimit) { if (!$this->sClass) { return array(); @@ -433,7 +485,7 @@ class SearchDescription return array(); } - public function queryPostcode(&$oDB, $iLimit) + private function queryPostcode(&$oDB, $iLimit) { $sSQL = 'SELECT p.place_id FROM location_postcode p '; @@ -456,7 +508,7 @@ class SearchDescription return chksql($oDB->getCol($sSQL)); } - public function queryNamedPlace(&$oDB, $aWordFrequencyScores, $iMinAddressRank, $iMaxAddressRank, $iLimit) + private function queryNamedPlace(&$oDB, $aWordFrequencyScores, $iMinAddressRank, $iMaxAddressRank, $iLimit) { $aTerms = array(); $aOrder = array(); @@ -586,7 +638,7 @@ class SearchDescription return array(); } - public function queryHouseNumber(&$oDB, $aRoadPlaceIDs, $iLimit) + private function queryHouseNumber(&$oDB, $aRoadPlaceIDs, $iLimit) { $sPlaceIDs = join(',', $aRoadPlaceIDs); @@ -680,7 +732,7 @@ class SearchDescription } - public function queryPoiByOperator(&$oDB, $aParentIDs, $iLimit) + private function queryPoiByOperator(&$oDB, $aParentIDs, $iLimit) { $sPlaceIDs = join(',', $aParentIDs); $aClassPlaceIDs = array(); From c02bf4986f4c6a820325727fa4b56e54a63c40d6 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 9 Oct 2017 23:12:13 +0200 Subject: [PATCH 18/19] coding style and some documentation --- lib/Geocode.php | 2 +- lib/SearchContext.php | 12 ++++++++++-- lib/SearchDescription.php | 36 ++++++++++++++++++------------------ 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index 27c33ad6..e02aae94 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -846,7 +846,7 @@ class Geocode $this->aRouteWidth, $this->bBoundedSearch ); - } else if ($this->aViewBox) { + } elseif ($this->aViewBox) { $oCtx->setViewboxFromBox($this->aViewBox, $this->bBoundedSearch); } if ($this->aExcludePlaceIDs) { diff --git a/lib/SearchContext.php b/lib/SearchContext.php index f5eab95a..9bab8658 100644 --- a/lib/SearchContext.php +++ b/lib/SearchContext.php @@ -6,7 +6,7 @@ require_once(CONST_BasePath.'/lib/lib.php'); /** - * Collects search constraints that are independent of the + * Collection of search constraints that are independent of the * actual interpretation of the search query. * * The search context is shared between all SearchDescriptions. This @@ -15,16 +15,25 @@ require_once(CONST_BasePath.'/lib/lib.php'); */ class SearchContext { + /// Search radius around a given Near reference point. private $fNearRadius = false; + /// True if search must be restricted to viewbox only. public $bViewboxBounded = false; + /// Reference point for search (as SQL). public $sqlNear = ''; + /// Viewbox selected for search (as SQL). public $sqlViewboxSmall = ''; + /// Viewbox with a larger buffer around (as SQL). public $sqlViewboxLarge = ''; + /// Reference along a route (as SQL). public $sqlViewboxCentre = ''; + /// List of countries to restrict search to (as SQL). public $sqlCountryList = ''; + /// List of place IDs to exclude (as SQL). private $sqlExcludeList = ''; + public function hasNearPoint() { return $this->fNearRadius !== false; @@ -44,7 +53,6 @@ class SearchContext public function isBoundedSearch() { return $this->hasNearPoint() || ($this->sqlViewboxSmall && $this->bViewboxBounded); - } public function setViewboxFromBox(&$aViewBox, $bBounded) diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index f3afaff2..c287c898 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -73,23 +73,6 @@ class SearchDescription && preg_match('/[0-9]+/', $this->sHouseNumber); } - private function poiTable() - { - return 'place_classtype_'.$this->sClass.'_'.$this->sType; - } - - public function countryCodeSQL($sVar) - { - if ($this->sCountryCode) { - return $sVar.' = \''.$this->sCountryCode."'"; - } - if ($this->oContext->sqlCountryList) { - return $sVar.' in '.$this->oContext->sqlCountryList; - } - - return ''; - } - public function hasOperator() { return $this->iOperator != Operator::NONE; @@ -335,6 +318,7 @@ class SearchDescription /////////// Query functions + public function query(&$oDB, &$aWordFrequencyScores, &$aExactMatchCache, $iMinRank, $iMaxRank, $iLimit) { $aPlaceIDs = array(); @@ -450,7 +434,7 @@ class SearchDescription } if ($this->oContext->hasNearPoint()) { $sSQL .= ' WHERE '.$this->oContext->withinSQL('ct.centroid'); - } else if ($this->oContext->bViewboxBounded) { + } elseif ($this->oContext->bViewboxBounded) { $sSQL .= ' WHERE ST_Contains('.$this->oContext->sqlViewboxSmall.', ct.centroid)'; } if ($this->oContext->sqlCountryList) { @@ -869,6 +853,22 @@ class SearchDescription return $aClassPlaceIDs; } + private function poiTable() + { + return 'place_classtype_'.$this->sClass.'_'.$this->sType; + } + + private function countryCodeSQL($sVar) + { + if ($this->sCountryCode) { + return $sVar.' = \''.$this->sCountryCode."'"; + } + if ($this->oContext->sqlCountryList) { + return $sVar.' in '.$this->oContext->sqlCountryList; + } + + return ''; + } /////////// Sort functions From c8780da19c32c2ce5f6ea02d86a9db2d96e27195 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 10 Oct 2017 00:15:56 +0200 Subject: [PATCH 19/19] documentation for SearchContext and SearchDescription --- lib/SearchContext.php | 104 ++++++++++++++++++++++++++++++++++- lib/SearchDescription.php | 110 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+), 2 deletions(-) diff --git a/lib/SearchContext.php b/lib/SearchContext.php index 9bab8658..134b138f 100644 --- a/lib/SearchContext.php +++ b/lib/SearchContext.php @@ -34,27 +34,67 @@ class SearchContext private $sqlExcludeList = ''; + /** + * Check if a reference point is defined. + * + * @return bool True if a reference point is defined. + */ public function hasNearPoint() { return $this->fNearRadius !== false; } + /** + * Get radius around reference point. + * + * @return float Search radius around refernce point. + */ public function nearRadius() { return $this->fNearRadius; } + /** + * Set search reference point in WGS84. + * + * If set, then only places around this point will be taken into account. + * + * @param float $fLat Latitude of point. + * @param float $fLon Longitude of point. + * @param float $fRadius Search radius around point. + * + * @return void + */ public function setNearPoint($fLat, $fLon, $fRadius = 0.1) { $this->fNearRadius = $fRadius; $this->sqlNear = 'ST_SetSRID(ST_Point('.$fLon.','.$fLat.'),4326)'; } + /** + * Check if the search is geographically restricted. + * + * Searches are restricted if a reference point is given or if + * a bounded viewbox is set. + * + * @return bool True, if the search is geographically bounded. + */ public function isBoundedSearch() { return $this->hasNearPoint() || ($this->sqlViewboxSmall && $this->bViewboxBounded); } + /** + * Set rectangular viewbox. + * + * The viewbox may be bounded which means that no search results + * must be outside the viewbox. + * + * @param float[4] $aViewBox Coordinates of the viewbox. + * @param bool $bBounded True if the viewbox is bounded. + * + * @return void + */ public function setViewboxFromBox(&$aViewBox, $bBounded) { $this->bViewboxBounded = $bBounded; @@ -80,6 +120,19 @@ class SearchContext ); } + /** + * Set viewbox along a route. + * + * The viewbox may be bounded which means that no search results + * must be outside the viewbox. + * + * @param object $oDB DB connection to use for computing the box. + * @param string[] $aRoutePoints List of x,y coordinates along a route. + * @param float $fRouteWidth Buffer around the route to use. + * @param bool $bBounded True if the viewbox bounded. + * + * @return void + */ public function setViewboxFromRoute(&$oDB, $aRoutePoints, $fRouteWidth, $bBounded) { $this->bViewboxBounded = $bBounded; @@ -101,22 +154,36 @@ class SearchContext $this->sqlViewboxLarge = "'".$sGeom."'::geometry"; } + /** + * Set list of excluded place IDs. + * + * @param integer[] $aExcluded List of IDs. + * + * @return void + */ public function setExcludeList($aExcluded) { $this->sqlExcludeList = ' not in ('.join(',', $aExcluded).')'; } + /** + * Set list of countries to restrict search to. + * + * @param string[] $aCountries List of two-letter lower-case country codes. + * + * @return void + */ public function setCountryList($aCountries) { $this->sqlCountryList = '('.join(',', array_map('addQuotes', $aCountries)).')'; } /** - * Extract a coordinate point from a query string. + * Extract a reference point from a query string. * * @param string $sQuery Query to scan. * - * @return The remaining query string. + * @return string The remaining query string. */ public function setNearPointFromQuery($sQuery) { @@ -135,16 +202,41 @@ class SearchContext return $sQuery; } + /** + * Get an SQL snipped for computing the distance from the reference point. + * + * @param string $sObj SQL variable name to compute the distance from. + * + * @return string An SQL string. + */ public function distanceSQL($sObj) { return 'ST_Distance('.$this->sqlNear.", $sObj)"; } + /** + * Get an SQL snipped for checking if something is within range of the + * reference point. + * + * @param string $sObj SQL variable name to compute if it is within range. + * + * @return string An SQL string. + */ public function withinSQL($sObj) { return sprintf('ST_DWithin(%s, %s, %F)', $sObj, $this->sqlNear, $this->fNearRadius); } + /** + * Get an SQL snipped of the importance factor of the viewbox. + * + * The importance factor is computed by checking if an object is within + * the viewbox and/or the extended version of the viewbox. + * + * @param string $sObj SQL variable name of object to weight the importance + * + * @return string SQL snipped of the factor with a leading multiply sign. + */ public function viewboxImportanceSQL($sObj) { $sSQL = ''; @@ -159,6 +251,14 @@ class SearchContext return $sSQL; } + /** + * SQL snipped checking if a place ID should be excluded. + * + * @param string $sVariable SQL variable name of place ID to check, + * potentially prefixed with more SQL. + * + * @return string SQL snippet. + */ public function excludeSQL($sVariable) { if ($this->sqlExcludeList) { diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index c287c898..1f3765ab 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -43,22 +43,55 @@ class SearchDescription private $iNamePhrase = -1; + /** + * Create an empty search description. + * + * @param object $oContext Global context to use. Will be inherited by + * all derived search objects. + */ public function __construct($oContext) { $this->oContext = $oContext; } + /** + * Get current search rank. + * + * The higher the search rank the lower the likelyhood that the + * search is a correct interpretation of the search query. + * + * @return integer Search rank. + */ public function getRank() { return $this->iSearchRank; } + /** + * Increase the search rank. + * + * @param integer $iAddRank Number of ranks to increase. + * + * @return void + */ public function addToRank($iAddRank) { $this->iSearchRank += $iAddRank; return $this->iSearchRank; } + /** + * Make this search a POI search. + * + * In a POI search, objects are not (only) searched by their name + * but also by the primary OSM key/value pair (class and type in Nominatim). + * + * @param integer $iOperator Type of POI search + * @param string $sClass Class (or OSM tag key) of POI. + * @param string $sType Type (or OSM tag value) of POI. + * + * @return void + */ public function setPoiSearch($iOperator, $sClass, $sType) { $this->iOperator = $iOperator; @@ -66,6 +99,11 @@ class SearchDescription $this->sType = $sType; } + /** + * Check if this might be a full address search. + * + * @return bool True if the search contains name, address and housenumber. + */ public function looksLikeFullAddress() { return sizeof($this->aName) @@ -73,11 +111,27 @@ class SearchDescription && preg_match('/[0-9]+/', $this->sHouseNumber); } + /** + * Check if any operator is set. + * + * @return bool True, if this is a special search operation. + */ public function hasOperator() { return $this->iOperator != Operator::NONE; } + /** + * Extract key/value pairs from a query. + * + * Key/value pairs are recognised if they are of the form [=]. + * If multiple terms of this kind are found then all terms are removed + * but only the first is used for search. + * + * @param string $sQuery Original query string. + * + * @return string The query string with the special search patterns removed. + */ public function extractKeyValuePairs($sQuery) { // Search for terms of kind [=]. @@ -98,6 +152,13 @@ class SearchDescription return $sQuery; } + /** + * Check if the combination of parameters is sensible. + * + * @param string[] $aCountryCodes List of country codes. + * + * @return bool True, if the search looks valid. + */ public function isValidSearch(&$aCountryCodes) { if (!sizeof($this->aName)) { @@ -118,6 +179,25 @@ class SearchDescription /////////// Search building functions + /** + * Derive new searches by adding a full term to the existing search. + * + * @param mixed[] $aSearchTerm Description of the token. + * @param bool $bWordInQuery True, if the normalised version of the word + * is contained in the query. + * @param bool $bHasPartial True if there are also tokens of partial terms + * with the same name. + * @param string $sPhraseType Type of phrase the token is contained in. + * @param bool $bFirstToken True if the token is at the beginning of the + * query. + * @param bool $bFirstPhrase True if the token is in the first phrase of + * the query. + * @param bool $bLastToken True if the token is at the end of the query. + * @param integer $iGlobalRank Changable ranking of all searches in the + * batch. + * + * @return SearchDescription[] List of derived search descriptions. + */ public function extendWithFullTerm($aSearchTerm, $bWordInQuery, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken, &$iGlobalRank) { $aNewSearches = array(); @@ -247,6 +327,19 @@ class SearchDescription return $aNewSearches; } + /** + * Derive new searches by adding a partial term to the existing search. + * + * @param mixed[] $aSearchTerm Description of the token. + * @param bool $bStructuredPhrases True if the search is structured. + * @param integer $iPhrase Number of the phrase the token is in. + * @param mixed[] $aWordFrequencyScores Number of times tokens appears + * overall in a planet database. + * @param array[] $aFullTokens List of full term tokens with the + * same name. + * + * @return SearchDescription[] List of derived search descriptions. + */ public function extendWithPartialTerm($aSearchTerm, $bStructuredPhrases, $iPhrase, &$aWordFrequencyScores, $aFullTokens) { // Only allow name terms. @@ -319,6 +412,23 @@ class SearchDescription /////////// Query functions + /** + * Query database for places that match this search. + * + * @param object $oDB Database connection to use. + * @param mixed[] $aWordFrequencyScores Number of times tokens appears + * overall in a planet database. + * @param mixed[] $aExactMatchCache Saves number of exact matches. + * @param integer $iMinRank Minimum address rank to restrict + * search to. + * @param integer $iMaxRank Maximum address rank to restrict + * search to. + * @param integer $iLimit Maximum number of results. + * + * @return mixed[] An array with two fields: IDs contains the list of + * matching place IDs and houseNumber the houseNumber + * if appicable or -1 if not. + */ public function query(&$oDB, &$aWordFrequencyScores, &$aExactMatchCache, $iMinRank, $iMaxRank, $iLimit) { $aPlaceIDs = array();
rankName TokensName NotAddress TokensAddress Notcountryoperatorclasstypepostcodehouse#LatLonRadius
Address TokensAddress Notcountryoperatorclasstypepostcodehousenumber