From 5ed4ea557430214f8d4989e293683621f5082d19 Mon Sep 17 00:00:00 2001 From: Goran Jelic-Cizmek Date: Mon, 15 Jul 2024 20:51:44 +0200 Subject: [PATCH 1/7] Use vectorized ops when calling `LineCollection.distance` --- .../shapes/bases/line_collection.py | 80 ++++++------------- 1 file changed, 26 insertions(+), 54 deletions(-) diff --git a/src/data_morph/shapes/bases/line_collection.py b/src/data_morph/shapes/bases/line_collection.py index ee155387..f1001572 100644 --- a/src/data_morph/shapes/bases/line_collection.py +++ b/src/data_morph/shapes/bases/line_collection.py @@ -4,6 +4,7 @@ from typing import Iterable import matplotlib.pyplot as plt +import numpy as np from matplotlib.axes import Axes from ...plotting.style import plot_with_custom_style @@ -44,65 +45,36 @@ def distance(self, x: Number, y: Number) -> float: float The minimum distance from the lines of this shape to the point (x, y). - """ - return min( - self._distance_point_to_line(point=(x, y), line=line) for line in self.lines - ) - - def _distance_point_to_line( - self, - point: Iterable[Number], - line: Iterable[Iterable[Number]], - ) -> float: - """ - Calculate the minimum distance between a point and a line. - - Parameters - ---------- - point : Iterable[numbers.Number] - Coordinates of a point in 2D space. - line : Iterable[Iterable[numbers.Number]] - Coordinates of the endpoints of a line in 2D space. - - Returns - ------- - float - The minimum distance between the point and the line. Notes ----- - Implementation based on `this VBA code`_. + Implementation based on `this SO answer`_. - .. _this VBA code: http://local.wasp.uwa.edu.au/~pbourke/geometry/pointline/source.vba + .. _this SO answer: https://stackoverflow.com/a/58781995 """ - start, end = line - line_mag = self._euclidean_distance(start, end) - - if line_mag < 0.00000001: - # Arbitrarily large value - return 9999 - - px, py = point - x1, y1 = start - x2, y2 = end - - u1 = ((px - x1) * (x2 - x1)) + ((py - y1) * (y2 - y1)) - u = u1 / (line_mag * line_mag) - - if (u < 0.00001) or (u > 1): - # closest point does not fall within the line segment, take the shorter - # distance to an endpoint - distance = min( - self._euclidean_distance(point, start), - self._euclidean_distance(point, end), - ) - else: - # Intersecting point is on the line, use the formula - ix = x1 + u * (x2 - x1) - iy = y1 + u * (y2 - y1) - distance = self._euclidean_distance(point, (ix, iy)) - - return distance + p = np.array([x, y]) + lines = np.array(self.lines) + + a = lines[:, 0, :] + b = lines[:, 1, :] + + d_ba = b - a + d = np.divide(d_ba, (np.hypot(d_ba[:, 0], d_ba[:, 1]).reshape(-1, 1))) + + # signed parallel distance components + # rowwise dot products of 2D vectors + s = np.multiply(a - p, d).sum(axis=1) + t = np.multiply(p - b, d).sum(axis=1) + + # clamped parallel distance + h = np.maximum.reduce([s, t, np.zeros(len(s))]) + + # perpendicular distance component + # rowwise cross products of 2D vectors + d_pa = p - a + c = d_pa[:, 0] * d[:, 1] - d_pa[:, 1] * d[:, 0] + + return np.min(np.hypot(h, c)) @plot_with_custom_style def plot(self, ax: Axes = None) -> Axes: From 8e0a11eb2adf73571322b3f675079691ff7b5e31 Mon Sep 17 00:00:00 2001 From: Goran Jelic-Cizmek Date: Mon, 22 Jul 2024 08:39:47 +0200 Subject: [PATCH 2/7] Add test for invalid `LineCollection` args --- src/data_morph/shapes/bases/line_collection.py | 5 +++++ tests/shapes/bases/test_line_collection.py | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/data_morph/shapes/bases/line_collection.py b/src/data_morph/shapes/bases/line_collection.py index f1001572..ac88fff5 100644 --- a/src/data_morph/shapes/bases/line_collection.py +++ b/src/data_morph/shapes/bases/line_collection.py @@ -23,6 +23,11 @@ class LineCollection(Shape): """ def __init__(self, *lines: Iterable[Iterable[Number]]) -> None: + # check that lines that have the same starting and ending points raise an error + for line in lines: + start, end = line + if np.allclose(start, end): + raise ValueError(f'Line {line} has the same start and end point') self.lines = lines """Iterable[Iterable[numbers.Number]]: An iterable of two (x, y) pairs representing the endpoints of a line.""" diff --git a/tests/shapes/bases/test_line_collection.py b/tests/shapes/bases/test_line_collection.py index 6b356552..0b1d39ae 100644 --- a/tests/shapes/bases/test_line_collection.py +++ b/tests/shapes/bases/test_line_collection.py @@ -35,10 +35,10 @@ def test_distance_nonzero(self, line_collection, point, expected_distance): assert pytest.approx(line_collection.distance(*point)) == expected_distance @pytest.mark.parametrize('line', [[(0, 0), (0, 0)], [(-1, -1), (-1, -1)]], ids=str) - def test_distance_to_small_line_magnitude(self, line_collection, line): - """Test _distance_point_to_line() for small line magnitudes.""" - distance = line_collection._distance_point_to_line((30, 50), line) - assert distance == 9999 + def test_line_as_point(self, line): + """Test LineCollection raises a ValueError for small line magnitudes.""" + with pytest.raises(ValueError): + LineCollection(line) def test_repr(self, line_collection): """Test that the __repr__() method is working.""" From 3dc1e2c096a992597a5a5bbb280d97cf4a26c242 Mon Sep 17 00:00:00 2001 From: JCGoran Date: Tue, 23 Jul 2024 00:24:42 +0200 Subject: [PATCH 3/7] Apply suggestions from code review Co-authored-by: Stefanie Molin <24376333+stefmolin@users.noreply.github.com> --- src/data_morph/shapes/bases/line_collection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data_morph/shapes/bases/line_collection.py b/src/data_morph/shapes/bases/line_collection.py index ac88fff5..6e6ae0cc 100644 --- a/src/data_morph/shapes/bases/line_collection.py +++ b/src/data_morph/shapes/bases/line_collection.py @@ -53,9 +53,9 @@ def distance(self, x: Number, y: Number) -> float: Notes ----- - Implementation based on `this SO answer`_. + Implementation based on `this Stack Overflow answer`_. - .. _this SO answer: https://stackoverflow.com/a/58781995 + .. _this Stack Overflow answer: https://stackoverflow.com/a/58781995 """ p = np.array([x, y]) lines = np.array(self.lines) From f78959391f8473537b3183983871b7f5c0559815 Mon Sep 17 00:00:00 2001 From: Goran Jelic-Cizmek Date: Tue, 30 Jul 2024 19:53:55 +0200 Subject: [PATCH 4/7] Fix test for `__repr__` --- .../shapes/bases/line_collection.py | 7 +++---- tests/shapes/bases/test_line_collection.py | 21 ++++++++----------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/data_morph/shapes/bases/line_collection.py b/src/data_morph/shapes/bases/line_collection.py index 6e6ae0cc..34a96451 100644 --- a/src/data_morph/shapes/bases/line_collection.py +++ b/src/data_morph/shapes/bases/line_collection.py @@ -28,7 +28,7 @@ def __init__(self, *lines: Iterable[Iterable[Number]]) -> None: start, end = line if np.allclose(start, end): raise ValueError(f'Line {line} has the same start and end point') - self.lines = lines + self.lines = np.array(lines) """Iterable[Iterable[numbers.Number]]: An iterable of two (x, y) pairs representing the endpoints of a line.""" @@ -58,10 +58,9 @@ def distance(self, x: Number, y: Number) -> float: .. _this Stack Overflow answer: https://stackoverflow.com/a/58781995 """ p = np.array([x, y]) - lines = np.array(self.lines) - a = lines[:, 0, :] - b = lines[:, 1, :] + a = self.lines[:, 0, :] + b = self.lines[:, 1, :] d_ba = b - a d = np.divide(d_ba, (np.hypot(d_ba[:, 0], d_ba[:, 1]).reshape(-1, 1))) diff --git a/tests/shapes/bases/test_line_collection.py b/tests/shapes/bases/test_line_collection.py index 0b1d39ae..f4b46070 100644 --- a/tests/shapes/bases/test_line_collection.py +++ b/tests/shapes/bases/test_line_collection.py @@ -1,7 +1,6 @@ """Test line_collection module.""" import itertools -import re import pytest @@ -42,14 +41,12 @@ def test_line_as_point(self, line): def test_repr(self, line_collection): """Test that the __repr__() method is working.""" - lines = r'\n '.join( - [r'\[\[\d+\.*\d*, \d+\.*\d*\], \[\d+\.*\d*, \d+\.*\d*\]\]'] - * len(line_collection.lines) - ) - assert ( - re.match( - (r'^\n lines=\n ' + lines), - repr(line_collection), - ) - is not None - ) + lines = r""" + lines= + array([[0., 0.], + [0., 1.]]) + array([[1., 0.], + [1., 1.]]) + array([[10.5, 11.5], + [11. , 10. ]])""" + assert repr(line_collection) == lines From 84239ae21faeef4ff69cc3f1c39606b3e0860313 Mon Sep 17 00:00:00 2001 From: JCGoran Date: Sun, 18 Aug 2024 11:58:56 +0200 Subject: [PATCH 5/7] Use regex in test Co-authored-by: Stefanie Molin <24376333+stefmolin@users.noreply.github.com> --- tests/shapes/bases/test_line_collection.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/shapes/bases/test_line_collection.py b/tests/shapes/bases/test_line_collection.py index f4b46070..688294d9 100644 --- a/tests/shapes/bases/test_line_collection.py +++ b/tests/shapes/bases/test_line_collection.py @@ -41,12 +41,10 @@ def test_line_as_point(self, line): def test_repr(self, line_collection): """Test that the __repr__() method is working.""" - lines = r""" - lines= - array([[0., 0.], - [0., 1.]]) - array([[1., 0.], - [1., 1.]]) - array([[10.5, 11.5], - [11. , 10. ]])""" - assert repr(line_collection) == lines + assert ( + re.match( + r"""\n lines=\n {8}array\(\[\[\d+""", + repr(line_collection), + ) + is not None + ) From e93e8b0c319b930e9b01117769e44cb040069df7 Mon Sep 17 00:00:00 2001 From: JCGoran Date: Sun, 18 Aug 2024 12:05:51 +0200 Subject: [PATCH 6/7] Add missing import --- tests/shapes/bases/test_line_collection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/shapes/bases/test_line_collection.py b/tests/shapes/bases/test_line_collection.py index 688294d9..dbda853d 100644 --- a/tests/shapes/bases/test_line_collection.py +++ b/tests/shapes/bases/test_line_collection.py @@ -1,6 +1,7 @@ """Test line_collection module.""" import itertools +import re import pytest From 36d6c942582595b8ecbc6412735f02061ef23490 Mon Sep 17 00:00:00 2001 From: Stefanie Molin <24376333+stefmolin@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:17:32 +0200 Subject: [PATCH 7/7] Rework distance calculation --- .../shapes/bases/line_collection.py | 60 ++++++++++++------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/src/data_morph/shapes/bases/line_collection.py b/src/data_morph/shapes/bases/line_collection.py index 34a96451..2f91fc2f 100644 --- a/src/data_morph/shapes/bases/line_collection.py +++ b/src/data_morph/shapes/bases/line_collection.py @@ -23,11 +23,12 @@ class LineCollection(Shape): """ def __init__(self, *lines: Iterable[Iterable[Number]]) -> None: - # check that lines that have the same starting and ending points raise an error + # check that lines with the same starting and ending points raise an error for line in lines: start, end = line if np.allclose(start, end): raise ValueError(f'Line {line} has the same start and end point') + self.lines = np.array(lines) """Iterable[Iterable[numbers.Number]]: An iterable of two (x, y) pairs representing the endpoints of a line.""" @@ -57,28 +58,41 @@ def distance(self, x: Number, y: Number) -> float: .. _this Stack Overflow answer: https://stackoverflow.com/a/58781995 """ - p = np.array([x, y]) - - a = self.lines[:, 0, :] - b = self.lines[:, 1, :] - - d_ba = b - a - d = np.divide(d_ba, (np.hypot(d_ba[:, 0], d_ba[:, 1]).reshape(-1, 1))) - - # signed parallel distance components - # rowwise dot products of 2D vectors - s = np.multiply(a - p, d).sum(axis=1) - t = np.multiply(p - b, d).sum(axis=1) - - # clamped parallel distance - h = np.maximum.reduce([s, t, np.zeros(len(s))]) - - # perpendicular distance component - # rowwise cross products of 2D vectors - d_pa = p - a - c = d_pa[:, 0] * d[:, 1] - d_pa[:, 1] * d[:, 0] - - return np.min(np.hypot(h, c)) + point = np.array([x, y]) + + start_points = self.lines[:, 0, :] + end_points = self.lines[:, 1, :] + + tangent_vector = end_points - start_points + normalized_tangent_vectors = np.divide( + tangent_vector, + np.hypot(tangent_vector[:, 0], tangent_vector[:, 1]).reshape(-1, 1), + ) + + # row-wise dot products of 2D vectors + signed_parallel_distance_start = np.multiply( + start_points - point, normalized_tangent_vectors + ).sum(axis=1) + signed_parallel_distance_end = np.multiply( + point - end_points, normalized_tangent_vectors + ).sum(axis=1) + + clamped_parallel_distance = np.maximum.reduce( + [ + signed_parallel_distance_start, + signed_parallel_distance_end, + np.zeros(signed_parallel_distance_start.shape[0]), + ] + ) + + # row-wise cross products of 2D vectors + perpendicular_distance_component = np.cross( + point - start_points, normalized_tangent_vectors + ) + + return np.min( + np.hypot(clamped_parallel_distance, perpendicular_distance_component) + ) @plot_with_custom_style def plot(self, ax: Axes = None) -> Axes: