mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 10:27:57 +00:00
cache postcode normalization
This commit is contained in:
@@ -78,6 +78,18 @@ class PostcodeFormatter:
|
||||
self.default_matcher = CountryPostcodeMatcher('', {'pattern': pattern})
|
||||
|
||||
|
||||
def get_matcher(self, country_code):
|
||||
""" Return the CountryPostcodeMatcher for the given country.
|
||||
Returns None if the country doesn't have a postcode and the
|
||||
default matcher if there is no specific matcher configured for
|
||||
the country.
|
||||
"""
|
||||
if country_code in self.country_without_postcode:
|
||||
return None
|
||||
|
||||
return self.country_matcher.get(country_code, self.default_matcher)
|
||||
|
||||
|
||||
def match(self, country_code, postcode):
|
||||
""" Match the given postcode against the postcode pattern for this
|
||||
matcher. Returns a `re.Match` object if the country has a pattern
|
||||
|
||||
@@ -37,16 +37,27 @@ class _CountryPostcodesCollector:
|
||||
""" Collector for postcodes of a single country.
|
||||
"""
|
||||
|
||||
def __init__(self, country):
|
||||
def __init__(self, country, matcher):
|
||||
self.country = country
|
||||
self.matcher = matcher
|
||||
self.collected = defaultdict(PointsCentroid)
|
||||
self.normalization_cache = None
|
||||
|
||||
|
||||
def add(self, postcode, x, y):
|
||||
""" Add the given postcode to the collection cache. If the postcode
|
||||
already existed, it is overwritten with the new centroid.
|
||||
"""
|
||||
self.collected[postcode] += (x, y)
|
||||
if self.matcher is not None:
|
||||
if self.normalization_cache and self.normalization_cache[0] == postcode:
|
||||
normalized = self.normalization_cache[1]
|
||||
else:
|
||||
match = self.matcher.match(postcode)
|
||||
normalized = self.matcher.normalize(match) if match else None
|
||||
self.normalization_cache = (postcode, normalized)
|
||||
|
||||
if normalized:
|
||||
self.collected[normalized] += (x, y)
|
||||
|
||||
|
||||
def commit(self, conn, analyzer, project_dir):
|
||||
@@ -193,18 +204,16 @@ def update_postcodes(dsn, project_dir, tokenizer):
|
||||
if collector is None or country != collector.country:
|
||||
if collector is not None:
|
||||
collector.commit(conn, analyzer, project_dir)
|
||||
collector = _CountryPostcodesCollector(country)
|
||||
collector = _CountryPostcodesCollector(country, matcher.get_matcher(country))
|
||||
todo_countries.discard(country)
|
||||
match = matcher.match(country, postcode)
|
||||
if match:
|
||||
collector.add(matcher.normalize(country, match), x, y)
|
||||
collector.add(postcode, x, y)
|
||||
|
||||
if collector is not None:
|
||||
collector.commit(conn, analyzer, project_dir)
|
||||
|
||||
# Now handle any countries that are only in the postcode table.
|
||||
for country in todo_countries:
|
||||
_CountryPostcodesCollector(country).commit(conn, analyzer, project_dir)
|
||||
_CountryPostcodesCollector(country, matcher.get_matcher(country)).commit(conn, analyzer, project_dir)
|
||||
|
||||
conn.commit()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user