mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
update to modern mkdocstrings python handler
This commit is contained in:
@@ -25,10 +25,10 @@ endforeach()
|
|||||||
ADD_CUSTOM_TARGET(doc
|
ADD_CUSTOM_TARGET(doc
|
||||||
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-20.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-20.md
|
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-20.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-20.md
|
||||||
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-22.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-22.md
|
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-22.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-22.md
|
||||||
COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
|
COMMAND mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
|
||||||
)
|
)
|
||||||
|
|
||||||
ADD_CUSTOM_TARGET(serve-doc
|
ADD_CUSTOM_TARGET(serve-doc
|
||||||
COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs serve
|
COMMAND mkdocs serve -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
|
||||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -178,64 +178,72 @@ The following is a list of sanitizers that are shipped with Nominatim.
|
|||||||
::: nominatim.tokenizer.sanitizers.split_name_list
|
::: nominatim.tokenizer.sanitizers.split_name_list
|
||||||
selection:
|
selection:
|
||||||
members: False
|
members: False
|
||||||
rendering:
|
options:
|
||||||
heading_level: 6
|
heading_level: 6
|
||||||
|
docstring_section_style: spacy
|
||||||
|
|
||||||
##### strip-brace-terms
|
##### strip-brace-terms
|
||||||
|
|
||||||
::: nominatim.tokenizer.sanitizers.strip_brace_terms
|
::: nominatim.tokenizer.sanitizers.strip_brace_terms
|
||||||
selection:
|
selection:
|
||||||
members: False
|
members: False
|
||||||
rendering:
|
options:
|
||||||
heading_level: 6
|
heading_level: 6
|
||||||
|
docstring_section_style: spacy
|
||||||
|
|
||||||
##### tag-analyzer-by-language
|
##### tag-analyzer-by-language
|
||||||
|
|
||||||
::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language
|
::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language
|
||||||
selection:
|
selection:
|
||||||
members: False
|
members: False
|
||||||
rendering:
|
options:
|
||||||
heading_level: 6
|
heading_level: 6
|
||||||
|
docstring_section_style: spacy
|
||||||
|
|
||||||
##### clean-housenumbers
|
##### clean-housenumbers
|
||||||
|
|
||||||
::: nominatim.tokenizer.sanitizers.clean_housenumbers
|
::: nominatim.tokenizer.sanitizers.clean_housenumbers
|
||||||
selection:
|
selection:
|
||||||
members: False
|
members: False
|
||||||
rendering:
|
options:
|
||||||
heading_level: 6
|
heading_level: 6
|
||||||
|
docstring_section_style: spacy
|
||||||
|
|
||||||
##### clean-postcodes
|
##### clean-postcodes
|
||||||
|
|
||||||
::: nominatim.tokenizer.sanitizers.clean_postcodes
|
::: nominatim.tokenizer.sanitizers.clean_postcodes
|
||||||
selection:
|
selection:
|
||||||
members: False
|
members: False
|
||||||
rendering:
|
options:
|
||||||
heading_level: 6
|
heading_level: 6
|
||||||
|
docstring_section_style: spacy
|
||||||
|
|
||||||
##### clean-tiger-tags
|
##### clean-tiger-tags
|
||||||
|
|
||||||
::: nominatim.tokenizer.sanitizers.clean_tiger_tags
|
::: nominatim.tokenizer.sanitizers.clean_tiger_tags
|
||||||
selection:
|
selection:
|
||||||
members: False
|
members: False
|
||||||
rendering:
|
options:
|
||||||
heading_level: 6
|
heading_level: 6
|
||||||
|
docstring_section_style: spacy
|
||||||
|
|
||||||
#### delete-tags
|
#### delete-tags
|
||||||
|
|
||||||
::: nominatim.tokenizer.sanitizers.delete_tags
|
::: nominatim.tokenizer.sanitizers.delete_tags
|
||||||
selection:
|
selection:
|
||||||
members: False
|
members: False
|
||||||
rendering:
|
options:
|
||||||
heading_level: 6
|
heading_level: 6
|
||||||
|
docstring_section_style: spacy
|
||||||
|
|
||||||
#### tag-japanese
|
#### tag-japanese
|
||||||
|
|
||||||
::: nominatim.tokenizer.sanitizers.tag_japanese
|
::: nominatim.tokenizer.sanitizers.tag_japanese
|
||||||
selection:
|
selection:
|
||||||
members: False
|
members: False
|
||||||
rendering:
|
options:
|
||||||
heading_level: 6
|
heading_level: 6
|
||||||
|
docstring_section_style: spacy
|
||||||
|
|
||||||
#### Token Analysis
|
#### Token Analysis
|
||||||
|
|
||||||
|
|||||||
@@ -47,8 +47,8 @@ depending on your choice of webserver framework:
|
|||||||
The documentation is built with mkdocs:
|
The documentation is built with mkdocs:
|
||||||
|
|
||||||
* [mkdocs](https://www.mkdocs.org/) >= 1.1.2
|
* [mkdocs](https://www.mkdocs.org/) >= 1.1.2
|
||||||
* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.16
|
* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.18
|
||||||
* [mkdocstrings-python-legacy](https://mkdocstrings.github.io/python-legacy/)
|
* [mkdocstrings-python](https://mkdocstrings.github.io/python/)
|
||||||
|
|
||||||
### Installing prerequisites on Ubuntu/Debian
|
### Installing prerequisites on Ubuntu/Debian
|
||||||
|
|
||||||
|
|||||||
@@ -53,21 +53,18 @@ the function.
|
|||||||
### Sanitizer configuration
|
### Sanitizer configuration
|
||||||
|
|
||||||
::: nominatim.tokenizer.sanitizers.config.SanitizerConfig
|
::: nominatim.tokenizer.sanitizers.config.SanitizerConfig
|
||||||
rendering:
|
options:
|
||||||
show_source: no
|
heading_level: 3
|
||||||
heading_level: 6
|
|
||||||
|
|
||||||
### The main filter function of the sanitizer
|
### The main filter function of the sanitizer
|
||||||
|
|
||||||
The filter function receives a single object of type `ProcessInfo`
|
The filter function receives a single object of type `ProcessInfo`
|
||||||
which has with three members:
|
which has with three members:
|
||||||
|
|
||||||
* `place`: read-only information about the place being processed.
|
* `place: PlaceInfo`: read-only information about the place being processed.
|
||||||
See PlaceInfo below.
|
See PlaceInfo below.
|
||||||
* `names`: The current list of names for the place. Each name is a
|
* `names: List[PlaceName]`: The current list of names for the place.
|
||||||
PlaceName object.
|
* `address: List[PlaceName]`: The current list of address names for the place.
|
||||||
* `address`: The current list of address names for the place. Each name
|
|
||||||
is a PlaceName object.
|
|
||||||
|
|
||||||
While the `place` member is provided for information only, the `names` and
|
While the `place` member is provided for information only, the `names` and
|
||||||
`address` lists are meant to be manipulated by the sanitizer. It may add and
|
`address` lists are meant to be manipulated by the sanitizer. It may add and
|
||||||
@@ -77,17 +74,15 @@ adding extra attributes) or completely replace the list with a different one.
|
|||||||
#### PlaceInfo - information about the place
|
#### PlaceInfo - information about the place
|
||||||
|
|
||||||
::: nominatim.data.place_info.PlaceInfo
|
::: nominatim.data.place_info.PlaceInfo
|
||||||
rendering:
|
options:
|
||||||
show_source: no
|
heading_level: 3
|
||||||
heading_level: 6
|
|
||||||
|
|
||||||
|
|
||||||
#### PlaceName - extended naming information
|
#### PlaceName - extended naming information
|
||||||
|
|
||||||
::: nominatim.data.place_name.PlaceName
|
::: nominatim.data.place_name.PlaceName
|
||||||
rendering:
|
options:
|
||||||
show_source: no
|
heading_level: 3
|
||||||
heading_level: 6
|
|
||||||
|
|
||||||
|
|
||||||
### Example: Filter for US street prefixes
|
### Example: Filter for US street prefixes
|
||||||
@@ -145,15 +140,13 @@ They can be found in the directory
|
|||||||
## Custom token analysis module
|
## Custom token analysis module
|
||||||
|
|
||||||
::: nominatim.tokenizer.token_analysis.base.AnalysisModule
|
::: nominatim.tokenizer.token_analysis.base.AnalysisModule
|
||||||
rendering:
|
options:
|
||||||
show_source: no
|
heading_level: 3
|
||||||
heading_level: 6
|
|
||||||
|
|
||||||
|
|
||||||
::: nominatim.tokenizer.token_analysis.base.Analyzer
|
::: nominatim.tokenizer.token_analysis.base.Analyzer
|
||||||
rendering:
|
options:
|
||||||
show_source: no
|
heading_level: 3
|
||||||
heading_level: 6
|
|
||||||
|
|
||||||
### Example: Creating acronym variants for long names
|
### Example: Creating acronym variants for long names
|
||||||
|
|
||||||
|
|||||||
@@ -134,14 +134,14 @@ All tokenizers must inherit from `nominatim.tokenizer.base.AbstractTokenizer`
|
|||||||
and implement the abstract functions defined there.
|
and implement the abstract functions defined there.
|
||||||
|
|
||||||
::: nominatim.tokenizer.base.AbstractTokenizer
|
::: nominatim.tokenizer.base.AbstractTokenizer
|
||||||
rendering:
|
options:
|
||||||
heading_level: 4
|
heading_level: 3
|
||||||
|
|
||||||
### Python Analyzer Class
|
### Python Analyzer Class
|
||||||
|
|
||||||
::: nominatim.tokenizer.base.AbstractAnalyzer
|
::: nominatim.tokenizer.base.AbstractAnalyzer
|
||||||
rendering:
|
options:
|
||||||
heading_level: 4
|
heading_level: 3
|
||||||
|
|
||||||
### PL/pgSQL Functions
|
### PL/pgSQL Functions
|
||||||
|
|
||||||
|
|||||||
@@ -59,7 +59,8 @@ plugins:
|
|||||||
- search
|
- search
|
||||||
- mkdocstrings:
|
- mkdocstrings:
|
||||||
handlers:
|
handlers:
|
||||||
python-legacy:
|
python:
|
||||||
rendering:
|
paths: ["${PROJECT_SOURCE_DIR}"]
|
||||||
show_source: false
|
options:
|
||||||
show_signature_annotations: false
|
show_source: False
|
||||||
|
show_bases: False
|
||||||
|
|||||||
@@ -53,8 +53,8 @@ class AbstractAnalyzer(ABC):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The function returns the list of all tuples that could be
|
The function returns the list of all tuples that could be
|
||||||
found for the given words. Each list entry is a tuple of
|
found for the given words. Each list entry is a tuple of
|
||||||
(original word, word token, word id).
|
(original word, word token, word id).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@@ -118,7 +118,7 @@ class AbstractAnalyzer(ABC):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A JSON-serialisable structure that will be handed into
|
A JSON-serialisable structure that will be handed into
|
||||||
the database via the `token_info` field.
|
the database via the `token_info` field.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@@ -144,8 +144,6 @@ class AbstractTokenizer(ABC):
|
|||||||
tables should be skipped. This option is only required for
|
tables should be skipped. This option is only required for
|
||||||
migration purposes and can be safely ignored by custom
|
migration purposes and can be safely ignored by custom
|
||||||
tokenizers.
|
tokenizers.
|
||||||
|
|
||||||
TODO: can we move the init_db parameter somewhere else?
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@@ -197,8 +195,8 @@ class AbstractTokenizer(ABC):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
If an issue was found, return an error message with the
|
If an issue was found, return an error message with the
|
||||||
description of the issue as well as hints for the user on
|
description of the issue as well as hints for the user on
|
||||||
how to resolve the issue. If everything is okay, return `None`.
|
how to resolve the issue. If everything is okay, return `None`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@@ -236,8 +234,12 @@ class AbstractTokenizer(ABC):
|
|||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
|
def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
|
||||||
""" Return a list of the `num` most frequent full words
|
""" Return a list of the most frequent full words in the database.
|
||||||
in the database.
|
|
||||||
|
Arguments:
|
||||||
|
conn: Open connection to the database which may be used to
|
||||||
|
retrive the words.
|
||||||
|
num: Maximum number of words to return.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -41,9 +41,9 @@ class SanitizerConfig(_BaseUserDict):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
If the parameter value is a simple string, it is returned as a
|
If the parameter value is a simple string, it is returned as a
|
||||||
one-item list. If the parameter value does not exist, the given
|
one-item list. If the parameter value does not exist, the given
|
||||||
default is returned. If the parameter value is a list, it is
|
default is returned. If the parameter value is a list, it is
|
||||||
checked to contain only strings before being returned.
|
checked to contain only strings before being returned.
|
||||||
"""
|
"""
|
||||||
values = self.data.get(param, None)
|
values = self.data.get(param, None)
|
||||||
|
|
||||||
@@ -94,10 +94,10 @@ class SanitizerConfig(_BaseUserDict):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A regular expression pattern which can be used to
|
A regular expression pattern which can be used to
|
||||||
split a string. The regular expression makes sure that the
|
split a string. The regular expression makes sure that the
|
||||||
resulting names are stripped and that repeated delimiters
|
resulting names are stripped and that repeated delimiters
|
||||||
are ignored. It may still create empty fields on occasion. The
|
are ignored. It may still create empty fields on occasion. The
|
||||||
code needs to filter those.
|
code needs to filter those.
|
||||||
"""
|
"""
|
||||||
delimiter_set = set(self.data.get('delimiters', default))
|
delimiter_set = set(self.data.get('delimiters', default))
|
||||||
if not delimiter_set:
|
if not delimiter_set:
|
||||||
@@ -133,8 +133,8 @@ class SanitizerConfig(_BaseUserDict):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A filter function that takes a target string as the argument and
|
A filter function that takes a target string as the argument and
|
||||||
returns True if it fully matches any of the regular expressions
|
returns True if it fully matches any of the regular expressions
|
||||||
otherwise returns False.
|
otherwise returns False.
|
||||||
"""
|
"""
|
||||||
filters = self.get_string_list(param) or default
|
filters = self.get_string_list(param) or default
|
||||||
|
|
||||||
|
|||||||
@@ -28,8 +28,8 @@ class Analyzer(Protocol):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
ID string with a canonical form of the name. The string may
|
ID string with a canonical form of the name. The string may
|
||||||
be empty, when the analyzer cannot analyze the name at all,
|
be empty, when the analyzer cannot analyze the name at all,
|
||||||
for example because the character set in use does not match.
|
for example because the character set in use does not match.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def compute_variants(self, canonical_id: str) -> List[str]:
|
def compute_variants(self, canonical_id: str) -> List[str]:
|
||||||
@@ -42,13 +42,13 @@ class Analyzer(Protocol):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A list of possible spelling variants. All strings must have
|
A list of possible spelling variants. All strings must have
|
||||||
been transformed with the global normalizer and
|
been transformed with the global normalizer and
|
||||||
transliterator ICU rules. Otherwise they cannot be matched
|
transliterator ICU rules. Otherwise they cannot be matched
|
||||||
against the input by the query frontend.
|
against the input by the query frontend.
|
||||||
The list may be empty, when there are no useful
|
The list may be empty, when there are no useful
|
||||||
spelling variants. This may happen when an analyzer only
|
spelling variants. This may happen when an analyzer only
|
||||||
usually outputs additional variants to the canonical spelling
|
usually outputs additional variants to the canonical spelling
|
||||||
and there are no such variants.
|
and there are no such variants.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@@ -74,8 +74,8 @@ class AnalysisModule(Protocol):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A data object with configuration data. This will be handed
|
A data object with configuration data. This will be handed
|
||||||
as is into the `create()` function and may be
|
as is into the `create()` function and may be
|
||||||
used freely by the analysis module as needed.
|
used freely by the analysis module as needed.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def create(self, normalizer: Any, transliterator: Any, config: Any) -> Analyzer:
|
def create(self, normalizer: Any, transliterator: Any, config: Any) -> Analyzer:
|
||||||
@@ -92,5 +92,5 @@ class AnalysisModule(Protocol):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A new analyzer instance. This must be an object that implements
|
A new analyzer instance. This must be an object that implements
|
||||||
the Analyzer protocol.
|
the Analyzer protocol.
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user