mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
cache postcode normalization
This commit is contained in:
@@ -78,6 +78,18 @@ class PostcodeFormatter:
|
|||||||
self.default_matcher = CountryPostcodeMatcher('', {'pattern': pattern})
|
self.default_matcher = CountryPostcodeMatcher('', {'pattern': pattern})
|
||||||
|
|
||||||
|
|
||||||
|
def get_matcher(self, country_code):
|
||||||
|
""" Return the CountryPostcodeMatcher for the given country.
|
||||||
|
Returns None if the country doesn't have a postcode and the
|
||||||
|
default matcher if there is no specific matcher configured for
|
||||||
|
the country.
|
||||||
|
"""
|
||||||
|
if country_code in self.country_without_postcode:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return self.country_matcher.get(country_code, self.default_matcher)
|
||||||
|
|
||||||
|
|
||||||
def match(self, country_code, postcode):
|
def match(self, country_code, postcode):
|
||||||
""" Match the given postcode against the postcode pattern for this
|
""" Match the given postcode against the postcode pattern for this
|
||||||
matcher. Returns a `re.Match` object if the country has a pattern
|
matcher. Returns a `re.Match` object if the country has a pattern
|
||||||
|
|||||||
@@ -37,16 +37,27 @@ class _CountryPostcodesCollector:
|
|||||||
""" Collector for postcodes of a single country.
|
""" Collector for postcodes of a single country.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, country):
|
def __init__(self, country, matcher):
|
||||||
self.country = country
|
self.country = country
|
||||||
|
self.matcher = matcher
|
||||||
self.collected = defaultdict(PointsCentroid)
|
self.collected = defaultdict(PointsCentroid)
|
||||||
|
self.normalization_cache = None
|
||||||
|
|
||||||
|
|
||||||
def add(self, postcode, x, y):
|
def add(self, postcode, x, y):
|
||||||
""" Add the given postcode to the collection cache. If the postcode
|
""" Add the given postcode to the collection cache. If the postcode
|
||||||
already existed, it is overwritten with the new centroid.
|
already existed, it is overwritten with the new centroid.
|
||||||
"""
|
"""
|
||||||
self.collected[postcode] += (x, y)
|
if self.matcher is not None:
|
||||||
|
if self.normalization_cache and self.normalization_cache[0] == postcode:
|
||||||
|
normalized = self.normalization_cache[1]
|
||||||
|
else:
|
||||||
|
match = self.matcher.match(postcode)
|
||||||
|
normalized = self.matcher.normalize(match) if match else None
|
||||||
|
self.normalization_cache = (postcode, normalized)
|
||||||
|
|
||||||
|
if normalized:
|
||||||
|
self.collected[normalized] += (x, y)
|
||||||
|
|
||||||
|
|
||||||
def commit(self, conn, analyzer, project_dir):
|
def commit(self, conn, analyzer, project_dir):
|
||||||
@@ -193,18 +204,16 @@ def update_postcodes(dsn, project_dir, tokenizer):
|
|||||||
if collector is None or country != collector.country:
|
if collector is None or country != collector.country:
|
||||||
if collector is not None:
|
if collector is not None:
|
||||||
collector.commit(conn, analyzer, project_dir)
|
collector.commit(conn, analyzer, project_dir)
|
||||||
collector = _CountryPostcodesCollector(country)
|
collector = _CountryPostcodesCollector(country, matcher.get_matcher(country))
|
||||||
todo_countries.discard(country)
|
todo_countries.discard(country)
|
||||||
match = matcher.match(country, postcode)
|
collector.add(postcode, x, y)
|
||||||
if match:
|
|
||||||
collector.add(matcher.normalize(country, match), x, y)
|
|
||||||
|
|
||||||
if collector is not None:
|
if collector is not None:
|
||||||
collector.commit(conn, analyzer, project_dir)
|
collector.commit(conn, analyzer, project_dir)
|
||||||
|
|
||||||
# Now handle any countries that are only in the postcode table.
|
# Now handle any countries that are only in the postcode table.
|
||||||
for country in todo_countries:
|
for country in todo_countries:
|
||||||
_CountryPostcodesCollector(country).commit(conn, analyzer, project_dir)
|
_CountryPostcodesCollector(country, matcher.get_matcher(country)).commit(conn, analyzer, project_dir)
|
||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user