Merge pull request #2958 from lonvia/streaming-json-writer

Introduce streaming json writer for JSON output
This commit is contained in:
Sarah Hoffmann
2023-01-25 17:36:23 +01:00
committed by GitHub
6 changed files with 266 additions and 16 deletions

View File

@@ -110,7 +110,7 @@ jobs:
run: pip3 install falcon sanic sanic-testing sanic-cors starlette
- name: Install latest pylint/mypy
run: pip3 install -U pylint mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests typing-extensions asgi_lifespan sqlalchemy2-stubs
run: pip3 install -U pylint mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson typing-extensions asgi_lifespan sqlalchemy2-stubs
- name: PHP linting
run: phpcs --report-width=120 .

View File

@@ -64,6 +64,7 @@ sudo apt install php-cgi phpunit php-codesniffer \
pip3 install --user behave mkdocs mkdocstrings pytest pylint \
mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil \
types-ujosn \
sanic-testing httpx asgi-lifespan
```

View File

@@ -7,12 +7,9 @@
"""
Output formatters for API version v1.
"""
from typing import Dict, Any
from collections import OrderedDict
import json
from nominatim.api.result_formatting import FormatDispatcher
from nominatim.api import StatusResult
from nominatim.utils.json_writer import JsonWriter
dispatch = FormatDispatcher()
@@ -26,13 +23,15 @@ def _format_status_text(result: StatusResult) -> str:
@dispatch.format_func(StatusResult, 'json')
def _format_status_json(result: StatusResult) -> str:
out: Dict[str, Any] = OrderedDict()
out['status'] = result.status
out['message'] = result.message
if result.data_updated is not None:
out['data_updated'] = result.data_updated.isoformat()
out['software_version'] = str(result.software_version)
if result.database_version is not None:
out['database_version'] = str(result.database_version)
out = JsonWriter()
return json.dumps(out)
out.start_object()\
.keyval('status', result.status)\
.keyval('message', result.message)\
.keyval_not_none('data_updated', result.data_updated,
lambda v: v.isoformat())\
.keyval('software_version', str(result.software_version))\
.keyval_not_none('database_version', result.database_version, str)\
.end_object()
return out()

View File

@@ -0,0 +1,144 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Streaming JSON encoder.
"""
from typing import Any, TypeVar, Optional, Callable
import io
try:
import ujson as json
except ModuleNotFoundError:
import json # type: ignore[no-redef]
T = TypeVar('T') # pylint: disable=invalid-name
class JsonWriter:
""" JSON encoder that renders the output directly into an output
stream. This is a very simple writer which produces JSON in a
compact as possible form.
The writer does not check for syntactic correctness. It is the
responsibility of the caller to call the write functions in an
order that produces correct JSON.
All functions return the writer object itself so that function
calls can be chained.
"""
def __init__(self) -> None:
self.data = io.StringIO()
self.pending = ''
def __call__(self) -> str:
""" Return the rendered JSON content as a string.
The writer remains usable after calling this function.
"""
if self.pending:
assert self.pending in (']', '}')
self.data.write(self.pending)
self.pending = ''
return self.data.getvalue()
def start_object(self) -> 'JsonWriter':
""" Write the open bracket of a JSON object.
"""
if self.pending:
self.data.write(self.pending)
self.pending = '{'
return self
def end_object(self) -> 'JsonWriter':
""" Write the closing bracket of a JSON object.
"""
assert self.pending in (',', '{', '')
if self.pending == '{':
self.data.write(self.pending)
self.pending = '}'
return self
def start_array(self) -> 'JsonWriter':
""" Write the opening bracket of a JSON array.
"""
if self.pending:
self.data.write(self.pending)
self.pending = '['
return self
def end_array(self) -> 'JsonWriter':
""" Write the closing bracket of a JSON array.
"""
assert self.pending in (',', '[', '')
if self.pending == '[':
self.data.write(self.pending)
self.pending = ']'
return self
def key(self, name: str) -> 'JsonWriter':
""" Write the key string of a JSON object.
"""
assert self.pending
self.data.write(self.pending)
self.data.write(json.dumps(name, ensure_ascii=False))
self.pending = ':'
return self
def value(self, value: Any) -> 'JsonWriter':
""" Write out a value as JSON. The function uses the json.dumps()
function for encoding the JSON. Thus any value that can be
encoded by that function is permissible here.
"""
return self.raw(json.dumps(value, ensure_ascii=False))
def next(self) -> 'JsonWriter':
""" Write out a delimiter comma between JSON object or array elements.
"""
if self.pending:
self.data.write(self.pending)
self.pending = ','
return self
def raw(self, raw_json: str) -> 'JsonWriter':
""" Write out the given value as is. This function is useful if
a value is already available in JSON format.
"""
if self.pending:
self.data.write(self.pending)
self.pending = ''
self.data.write(raw_json)
return self
def keyval(self, key: str, value: Any) -> 'JsonWriter':
""" Write out an object element with the given key and value.
This is a shortcut for calling 'key()', 'value()' and 'next()'.
"""
self.key(key)
self.value(value)
return self.next()
def keyval_not_none(self, key: str, value: Optional[T],
transform: Optional[Callable[[T], Any]] = None) -> 'JsonWriter':
""" Write out an object element only if the value is not None.
If 'transform' is given, it must be a function that takes the
value type and returns a JSON encodable type. The transform
function will be called before the value is written out.
"""
if value is not None:
self.key(key)
self.value(transform(value) if transform else value)
self.next()
return self

View File

@@ -44,7 +44,7 @@ def test_status_format_json_minimal():
result = api_impl.format_result(status, 'json')
assert result == '{"status": 700, "message": "Bad format.", "software_version": "%s"}' % (NOMINATIM_VERSION, )
assert result == '{"status":700,"message":"Bad format.","software_version":"%s"}' % (NOMINATIM_VERSION, )
def test_status_format_json_full():
@@ -54,4 +54,4 @@ def test_status_format_json_full():
result = api_impl.format_result(status, 'json')
assert result == '{"status": 0, "message": "OK", "data_updated": "2010-02-07T20:20:03+00:00", "software_version": "%s", "database_version": "5.6"}' % (NOMINATIM_VERSION, )
assert result == '{"status":0,"message":"OK","data_updated":"2010-02-07T20:20:03+00:00","software_version":"%s","database_version":"5.6"}' % (NOMINATIM_VERSION, )

View File

@@ -0,0 +1,106 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for the streaming JSON writer.
"""
import json
import pytest
from nominatim.utils.json_writer import JsonWriter
@pytest.mark.parametrize("inval,outstr", [(None, 'null'),
(True, 'true'), (False, 'false'),
(23, '23'), (0, '0'), (-1.3, '-1.3'),
('g\nä', '"g\\"'), ('"', '"\\\""'),
({}, '{}'), ([], '[]')])
def test_simple_value(inval, outstr):
writer = JsonWriter()
writer.value(inval)
assert writer() == outstr
json.loads(writer())
def test_empty_array():
writer = JsonWriter().start_array().end_array()
assert writer() == '[]'
json.loads(writer())
def test_array_with_single_value():
writer = JsonWriter().start_array().value(None).end_array()
assert writer() == '[null]'
json.loads(writer())
@pytest.mark.parametrize("invals,outstr", [((1, ), '[1]'),
(('a', 'b'), '["a","b"]')])
def test_array_with_data(invals, outstr):
writer = JsonWriter()
writer.start_array()
for ival in invals:
writer.value(ival).next()
writer.end_array()
assert writer() == outstr
json.loads(writer())
def test_empty_object():
writer = JsonWriter().start_object().end_object()
assert writer() == '{}'
json.loads(writer())
def test_object_single_entry():
writer = JsonWriter()\
.start_object()\
.key('something')\
.value(5)\
.end_object()
assert writer() == '{"something":5}'
json.loads(writer())
def test_object_many_values():
writer = JsonWriter()\
.start_object()\
.keyval('foo', None)\
.keyval('bar', {})\
.keyval('baz', 'b\taz')\
.end_object()
assert writer() == '{"foo":null,"bar":{},"baz":"b\\taz"}'
json.loads(writer())
def test_object_many_values_without_none():
writer = JsonWriter()\
.start_object()\
.keyval_not_none('foo', 0)\
.keyval_not_none('bar', None)\
.keyval_not_none('baz', '')\
.keyval_not_none('eve', False,
transform = lambda v: 'yes' if v else 'no')\
.end_object()
assert writer() == '{"foo":0,"baz":"","eve":"no"}'
json.loads(writer())
def test_raw_output():
writer = JsonWriter()\
.start_array()\
.raw('{ "nicely": "formatted here" }').next()\
.value(1)\
.end_array()
assert writer() == '[{ "nicely": "formatted here" },1]'