add class for online centroid computation

This commit is contained in:
Sarah Hoffmann
2022-06-06 09:49:00 +02:00
parent b7704833e4
commit 4885fdf0f9
3 changed files with 104 additions and 0 deletions

View File

View File

@@ -0,0 +1,48 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2022 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Functions for computation of centroids.
"""
from collections.abc import Collection
class PointsCentroid:
""" Centroid computation from single points using an online algorithm.
More points may be added at any time.
Coordinates are internally treated as a 7-digit fixed-point float
(i.e. in OSM style).
"""
def __init__(self):
self.sum_x = 0
self.sum_y = 0
self.count = 0
def centroid(self):
""" Return the centroid of all points collected so far.
"""
if self.count == 0:
raise ValueError("No points available for centroid.")
return (float(self.sum_x/self.count)/10000000,
float(self.sum_y/self.count)/10000000)
def __len__(self):
return self.count
def __iadd__(self, other):
if isinstance(other, Collection) and len(other) == 2:
if all(isinstance(p, (float, int)) for p in other):
x, y = other
self.sum_x += int(x * 10000000)
self.sum_y += int(y * 10000000)
self.count += 1
return self
raise ValueError("Can only add 2-element tuples to centroid.")

View File

@@ -0,0 +1,56 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2022 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for centroid computation.
"""
import pytest
from nominatim.utils.centroid import PointsCentroid
def test_empty_set():
c = PointsCentroid()
with pytest.raises(ValueError, match='No points'):
c.centroid()
@pytest.mark.parametrize("centroid", [(0,0), (-1, 3), [0.0000032, 88.4938]])
def test_one_point_centroid(centroid):
c = PointsCentroid()
c += centroid
assert len(c.centroid()) == 2
assert c.centroid() == (pytest.approx(centroid[0]), pytest.approx(centroid[1]))
def test_multipoint_centroid():
c = PointsCentroid()
c += (20.0, -10.0)
assert c.centroid() == (pytest.approx(20.0), pytest.approx(-10.0))
c += (20.2, -9.0)
assert c.centroid() == (pytest.approx(20.1), pytest.approx(-9.5))
c += (20.2, -9.0)
assert c.centroid() == (pytest.approx(20.13333), pytest.approx(-9.333333))
def test_manypoint_centroid():
c = PointsCentroid()
for _ in range(10000):
c += (4.564732, -0.000034)
assert c.centroid() == (pytest.approx(4.564732), pytest.approx(-0.000034))
@pytest.mark.parametrize("param", ["aa", None, 5, [1, 2, 3], (3, None), ("a", 3.9)])
def test_add_non_tuple(param):
c = PointsCentroid()
with pytest.raises(ValueError, match='2-element tuples'):
c += param