Can't filter on rank_search before it has been assigned

This commit is contained in:
Brian Quinion
2012-05-11 19:23:44 +02:00
parent b9a3e52a67
commit 0ab9c90708
3 changed files with 66 additions and 29 deletions

View File

@@ -443,7 +443,7 @@ DECLARE
place_centre GEOMETRY; place_centre GEOMETRY;
nearcountry RECORD; nearcountry RECORD;
BEGIN BEGIN
place_centre := ST_Centroid(place); place_centre := ST_PointOnSurface(place);
--RAISE WARNING 'start: %', ST_AsText(place_centre); --RAISE WARNING 'start: %', ST_AsText(place_centre);
@@ -909,11 +909,7 @@ BEGIN
NEW.place_id := nextval('seq_place'); NEW.place_id := nextval('seq_place');
NEW.indexed_status := 1; --STATUS_NEW NEW.indexed_status := 1; --STATUS_NEW
IF NEW.rank_search >= 4 THEN NEW.country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
NEW.country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
ELSE
NEW.country_code := NULL;
END IF;
NEW.partition := get_partition(NEW.geometry, NEW.country_code); NEW.partition := get_partition(NEW.geometry, NEW.country_code);
NEW.geometry_sector := geometry_sector(NEW.partition, NEW.geometry); NEW.geometry_sector := geometry_sector(NEW.partition, NEW.geometry);
@@ -997,9 +993,11 @@ BEGIN
IF NEW.type in ('continent') THEN IF NEW.type in ('continent') THEN
NEW.rank_search := 2; NEW.rank_search := 2;
NEW.rank_address := NEW.rank_search; NEW.rank_address := NEW.rank_search;
NEW.country_code := NULL;
ELSEIF NEW.type in ('sea') THEN ELSEIF NEW.type in ('sea') THEN
NEW.rank_search := 2; NEW.rank_search := 2;
NEW.rank_address := 0; NEW.rank_address := 0;
NEW.country_code := NULL;
ELSEIF NEW.type in ('country') THEN ELSEIF NEW.type in ('country') THEN
NEW.rank_search := 4; NEW.rank_search := 4;
NEW.rank_address := NEW.rank_search; NEW.rank_address := NEW.rank_search;
@@ -1114,7 +1112,12 @@ BEGIN
END IF; END IF;
IF (NEW.extratags -> 'capital') = 'yes' THEN IF (NEW.extratags -> 'capital') = 'yes' THEN
NEW.rank_search := NEW.rank_search -1; NEW.rank_search := NEW.rank_search - 1;
END IF;
-- a country code make no sense below rank 4 (country)
IF NEW.rank_address < 4 THEN
NEW.country_code := NULL;
END IF; END IF;
-- Block import below rank 22 -- Block import below rank 22
@@ -1122,7 +1125,7 @@ BEGIN
-- RETURN NULL; -- RETURN NULL;
-- END IF; -- END IF;
RETURN NEW; -- The following is not needed until doing diff updates, and slows the main index process down RETURN NEW; -- @DIFFUPDATES@ The following is not needed until doing diff updates, and slows the main index process down
IF (ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_IsValid(NEW.geometry)) THEN IF (ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_IsValid(NEW.geometry)) THEN
-- Performance: We just can't handle re-indexing for country level changes -- Performance: We just can't handle re-indexing for country level changes
@@ -1311,12 +1314,12 @@ BEGIN
END LOOP; END LOOP;
NEW.importance := null; NEW.importance := null;
select language||':'||title,importance from get_wikipedia_match(NEW.extratags) INTO NEW.wikipedia,NEW.importance; select language||':'||title,importance from get_wikipedia_match(NEW.extratags, NEW.country_code) INTO NEW.wikipedia,NEW.importance;
IF NEW.importance IS NULL THEN IF NEW.importance IS NULL THEN
select language||':'||title,importance from wikipedia_article where osm_type = NEW.osm_type and osm_id = NEW.osm_id order by importance desc limit 1 INTO NEW.wikipedia,NEW.importance; select language||':'||title,importance from wikipedia_article where osm_type = NEW.osm_type and osm_id = NEW.osm_id order by importance desc limit 1 INTO NEW.wikipedia,NEW.importance;
END IF; END IF;
--RAISE WARNING '% %', NEW.place_id, NEW.rank_search; --RAISE WARNING 'before low level% %', NEW.place_id, NEW.rank_search;
-- For low level elements we inherit from our parent road -- For low level elements we inherit from our parent road
IF (NEW.rank_search > 27 OR (NEW.type = 'postcode' AND NEW.rank_search = 25)) THEN IF (NEW.rank_search > 27 OR (NEW.type = 'postcode' AND NEW.rank_search = 25)) THEN
@@ -1488,10 +1491,11 @@ BEGIN
-- see if we have any special relation members -- see if we have any special relation members
select members from planet_osm_rels where id = NEW.osm_id INTO relation_members; select members from planet_osm_rels where id = NEW.osm_id INTO relation_members;
-- RAISE WARNING 'get_osm_rel_members, label';
FOR relMember IN select get_osm_rel_members(relation_members,ARRAY['label']) as member LOOP FOR relMember IN select get_osm_rel_members(relation_members,ARRAY['label']) as member LOOP
select * from placex where osm_type = upper(substring(relMember.member,1,1)) select * from placex where osm_type = upper(substring(relMember.member,1,1))::char(1)
and osm_id = substring(relMember.member,2,10000)::integer order by rank_search desc limit 1 into linkedPlacex; and osm_id = substring(relMember.member,2,10000)::bigint order by rank_search desc limit 1 into linkedPlacex;
-- If we don't already have one use this as the centre point of the geometry -- If we don't already have one use this as the centre point of the geometry
IF NEW.centroid IS NULL THEN IF NEW.centroid IS NULL THEN
@@ -1514,14 +1518,15 @@ BEGIN
FOR relMember IN select get_osm_rel_members(relation_members,ARRAY['admin_center','admin_centre']) as member LOOP FOR relMember IN select get_osm_rel_members(relation_members,ARRAY['admin_center','admin_centre']) as member LOOP
select * from placex where osm_type = upper(substring(relMember.member,1,1)) select * from placex where osm_type = upper(substring(relMember.member,1,1))::char(1)
and osm_id = substring(relMember.member,2,10000)::integer order by rank_search desc limit 1 into linkedPlacex; and osm_id = substring(relMember.member,2,10000)::bigint order by rank_search desc limit 1 into linkedPlacex;
-- For an admin centre we also want a name match - still not perfect, for example 'new york, new york' -- For an admin centre we also want a name match - still not perfect, for example 'new york, new york'
-- But that can be fixed by explicitly setting the label in the data -- But that can be fixed by explicitly setting the label in the data
IF make_standard_name(NEW.name->'name') = make_standard_name(linkedPlacex.name->'name') IF make_standard_name(NEW.name->'name') = make_standard_name(linkedPlacex.name->'name')
AND NEW.rank_search = linkedPlacex.rank_search THEN AND NEW.rank_search = linkedPlacex.rank_search THEN
-- If we don't already have one use this as the centre point of the geometry -- If we don't already have one use this as the centre point of the geometry
IF NEW.centroid IS NULL THEN IF NEW.centroid IS NULL THEN
NEW.centroid := coalesce(linkedPlacex.centroid,st_centroid(linkedPlacex.geometry)); NEW.centroid := coalesce(linkedPlacex.centroid,st_centroid(linkedPlacex.geometry));
@@ -1552,7 +1557,7 @@ BEGIN
make_standard_name(name->'name') = make_standard_name(NEW.name->'name') make_standard_name(name->'name') = make_standard_name(NEW.name->'name')
AND placex.rank_search = NEW.rank_search AND placex.rank_search = NEW.rank_search
AND placex.place_id != NEW.place_id AND placex.place_id != NEW.place_id
AND placex.osm_type = 'N' AND placex.rank_search < 26 AND placex.osm_type = 'N'::char(1) AND placex.rank_search < 26
AND st_contains(NEW.geometry, placex.geometry) AND st_contains(NEW.geometry, placex.geometry)
LOOP LOOP
@@ -1582,11 +1587,11 @@ BEGIN
-- Did we gain a wikipedia tag in the process? then we need to recalculate our importance -- Did we gain a wikipedia tag in the process? then we need to recalculate our importance
IF NEW.importance is null THEN IF NEW.importance is null THEN
select language||':'||title,importance from get_wikipedia_match(NEW.extratags) INTO NEW.wikipedia,NEW.importance; select language||':'||title,importance from get_wikipedia_match(NEW.extratags, NEW.country_code) INTO NEW.wikipedia,NEW.importance;
END IF; END IF;
-- Still null? how about looking it up by the node id -- Still null? how about looking it up by the node id
IF NEW.importance IS NULL THEN IF NEW.importance IS NULL THEN
select language||':'||title,importance from wikipedia_article where osm_type = 'N' and osm_id = linked_node_id order by importance desc limit 1 INTO NEW.wikipedia,NEW.importance; select language||':'||title,importance from wikipedia_article where osm_type = 'N'::char(1) and osm_id = linked_node_id order by importance desc limit 1 INTO NEW.wikipedia,NEW.importance;
END IF; END IF;
END IF; END IF;
@@ -2756,7 +2761,7 @@ create type wikipedia_article_match as (
importance FLOAT importance FLOAT
); );
CREATE OR REPLACE FUNCTION get_wikipedia_match(extratags HSTORE) RETURNS wikipedia_article_match CREATE OR REPLACE FUNCTION get_wikipedia_match(extratags HSTORE, country_code varchar(2)) RETURNS wikipedia_article_match
AS $$ AS $$
DECLARE DECLARE
langs TEXT[]; langs TEXT[];
@@ -2766,18 +2771,22 @@ DECLARE
wiki_article_language TEXT; wiki_article_language TEXT;
result wikipedia_article_match; result wikipedia_article_match;
BEGIN BEGIN
langs := ARRAY['','ar','bg','ca','cs','da','de','en','es','eo','eu','fa','fr','ko','hi','hr','id','it','he','lt','hu','ms','nl','ja','no','pl','pt','kk','ro','ru','sk','sl','sr','fi','sv','tr','uk','vi','vo','war','zh']; langs := ARRAY['english','country','ar','bg','ca','cs','da','de','en','es','eo','eu','fa','fr','ko','hi','hr','id','it','he','lt','hu','ms','nl','ja','no','pl','pt','kk','ro','ru','sk','sl','sr','fi','sv','tr','uk','vi','vo','war','zh'];
i := 1; i := 1;
WHILE langs[i] IS NOT NULL LOOP WHILE langs[i] IS NOT NULL LOOP
wiki_article := extratags->(case when langs[i] = '' THEN 'wikipedia' ELSE 'wikipedia:'||langs[i] END); wiki_article := extratags->(case when langs[i] in ('english','country') THEN 'wikipedia' ELSE 'wikipedia:'||langs[i] END);
IF wiki_article is not null THEN IF wiki_article is not null THEN
wiki_article := replace(regexp_replace(wiki_article,E'(.*?)([a-z]+).wikipedia.org/wiki/',E'\\2:'),' ','_'); wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3}).wikipedia.org/wiki/',E'\\2:');
wiki_article_title := split_part(wiki_article, ':', 2); wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3}).wikipedia.org/w/index.php\\?title=',E'\\2:');
wiki_article := regexp_replace(wiki_article,E'^(.*?)/([a-z]{2,3})/wiki/',E'\\2:');
wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3})[=:]',E'\\2:');
wiki_article := replace(wiki_article,' ','_');
wiki_article_title := trim(split_part(wiki_article, ':', 2));
IF wiki_article_title IS NULL OR wiki_article_title = '' THEN IF wiki_article_title IS NULL OR wiki_article_title = '' THEN
wiki_article_title := wiki_article; wiki_article_title := trim(wiki_article);
wiki_article_language := CASE WHEN langs[i] = '' THEN 'en' ELSE langs[i] END; wiki_article_language := CASE WHEN langs[i] = 'english' THEN 'en' WHEN langs[i] = 'country' THEN get_country_language_code(country_code) ELSE langs[i] END;
ELSE ELSE
wiki_article_language := lower(split_part(wiki_article, ':', 1)); wiki_article_language := lower(trim(split_part(wiki_article, ':', 1)));
END IF; END IF;
select wikipedia_article.language,wikipedia_article.title,wikipedia_article.importance select wikipedia_article.language,wikipedia_article.title,wikipedia_article.importance

View File

@@ -19,7 +19,7 @@
if (isset($aCMDResult['parse-tiger'])) if (isset($aCMDResult['parse-tiger']))
{ {
$bDidSomething = true; $bDidSomething = true;
foreach(glob($aCMDResult['parse-tiger'].'/??_*', GLOB_ONLYDIR) as $sStateFolder) foreach(glob($aCMDResult['parse-tiger'].'/??_*', GLOB_ONLYDIR) as $sStateFolder)
{ {
preg_match('#([0-9]{2})_(.*)#',basename($sStateFolder), $aMatch); preg_match('#([0-9]{2})_(.*)#',basename($sStateFolder), $aMatch);
@@ -97,7 +97,7 @@
mkdir($sTempDir); mkdir($sTempDir);
$bDidSomething = true; $bDidSomething = true;
foreach(glob($aCMDResult['parse-tiger-2011'].'/tl_2011_?????_edges.zip', 0) as $sImportFile) foreach(glob($aCMDResult['parse-tiger-2011'].'/tl_2011_?????_edges.zip', 0) as $sImportFile)
{ {
set_time_limit(30); set_time_limit(30);

View File

@@ -219,7 +219,7 @@
if (file_exists($sWikiArticlesFile)) if (file_exists($sWikiArticlesFile))
{ {
echo "Importing wikipedia articles..."; echo "Importing wikipedia articles...";
pgsqlRunRestoreData($sWikiArticlesFile); pgsqlRunDropAndRestore($sWikiArticlesFile);
echo "...done\n"; echo "...done\n";
} }
else else
@@ -229,7 +229,7 @@
if (file_exists($sWikiRedirectsFile)) if (file_exists($sWikiRedirectsFile))
{ {
echo "Importing wikipedia redirects..."; echo "Importing wikipedia redirects...";
pgsqlRunRestoreData($sWikiRedirectsFile); pgsqlRunDropAndRestore($sWikiRedirectsFile);
echo "...done\n"; echo "...done\n";
} }
else else
@@ -558,3 +558,31 @@
proc_close($hProcess); proc_close($hProcess);
} }
function pgsqlRunDropAndRestore($sDumpFile)
{
// Convert database DSN to psql paramaters
$aDSNInfo = DB::parseDSN(CONST_Database_DSN);
if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
$sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc --clean '.$sDumpFile;
$aDescriptors = array(
0 => array('pipe', 'r'),
1 => array('pipe', 'w'),
2 => array('file', '/dev/null', 'a')
);
$ahPipes = null;
$hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
if (!is_resource($hProcess)) fail('unable to start pg_restore');
fclose($ahPipes[0]);
// TODO: error checking
while(!feof($ahPipes[1]))
{
echo fread($ahPipes[1], 4096);
}
fclose($ahPipes[1]);
proc_close($hProcess);
}