mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
85 lines
2.7 KiB
Python
85 lines
2.7 KiB
Python
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
#
|
|
# This file is part of Nominatim. (https://nominatim.org)
|
|
#
|
|
# Copyright (C) 2025 by the Nominatim developer community.
|
|
# For a full list of authors see the git log.
|
|
"""
|
|
Simple dict-based implementation of a trie structure.
|
|
"""
|
|
from typing import TypeVar, Generic, Tuple, Optional, List, Dict
|
|
from collections import defaultdict
|
|
|
|
T = TypeVar('T')
|
|
|
|
|
|
class SimpleTrie(Generic[T]):
|
|
""" A simple read-only trie structure.
|
|
This structure supports examply one lookup operation,
|
|
which is longest-prefix lookup.
|
|
"""
|
|
|
|
def __init__(self, data: Optional[List[Tuple[str, T]]] = None) -> None:
|
|
self._tree: Dict[str, 'SimpleTrie[T]'] = defaultdict(SimpleTrie[T])
|
|
self._value: Optional[T] = None
|
|
self._prefix = ''
|
|
|
|
if data:
|
|
for key, value in data:
|
|
self._add(key, 0, value)
|
|
|
|
self._make_compact()
|
|
|
|
def _add(self, word: str, pos: int, value: T) -> None:
|
|
""" (Internal) Add a sub-word to the trie.
|
|
The word is added from index 'pos'. If the sub-word to add
|
|
is empty, then the trie saves the given value.
|
|
"""
|
|
if pos < len(word):
|
|
self._tree[word[pos]]._add(word, pos + 1, value)
|
|
else:
|
|
self._value = value
|
|
|
|
def _make_compact(self) -> None:
|
|
""" (Internal) Compress tree where there is exactly one subtree
|
|
and no value.
|
|
|
|
Compression works recursively starting at the leaf.
|
|
"""
|
|
for t in self._tree.values():
|
|
t._make_compact()
|
|
|
|
if len(self._tree) == 1 and self._value is None:
|
|
assert not self._prefix
|
|
for k, v in self._tree.items():
|
|
self._prefix = k + v._prefix
|
|
self._tree = v._tree
|
|
self._value = v._value
|
|
|
|
def longest_prefix(self, word: str, start: int = 0) -> Tuple[Optional[T], int]:
|
|
""" Return the longest prefix match for the given word starting at
|
|
the position 'start'.
|
|
|
|
The function returns a tuple with the value for the longest match and
|
|
the position of the word after the match. If no match was found at
|
|
all, the function returns (None, start).
|
|
"""
|
|
cur = self
|
|
pos = start
|
|
result: Tuple[Optional[T], int] = None, start
|
|
|
|
while True:
|
|
if cur._prefix:
|
|
if not word.startswith(cur._prefix, pos):
|
|
return result
|
|
pos += len(cur._prefix)
|
|
|
|
if cur._value:
|
|
result = cur._value, pos
|
|
|
|
if pos >= len(word) or word[pos] not in cur._tree:
|
|
return result
|
|
|
|
cur = cur._tree[word[pos]]
|
|
pos += 1
|