Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: geometry #93

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
219 changes: 219 additions & 0 deletions src/akimbo/geometry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
import numba
import numpy as np

# geoarrow definitions https://geoarrow.org/format.html
# note that if a geometry exists, no elements may be NULL; but
# in the awkward representation there may be (zero-null) optional layers


def cont(layout):
while layout.is_option:
# require unmasked?
layout = layout.content
return layout


def match_point_separated(layout, x="x", y="y", z="z", m="m"):
fieldset = [x, y, z, m]
return layout.is_record and layout.fields == fieldset[: len(layout.fields)]


def match_point_interleaved(layout):
return layout.is_regular and 2 <= layout.size <= 4


def match_point(layout, **kw):
return match_point_separated(layout, **kw) or match_point_interleaved(layout)


def match_line_string(layout, **kw):
# layout.fields == ["vertices"]
return layout.is_list and match_point(cont(layout), **kw)


def match_polygon(layout, **kw):
# layout.fields == ["rings"] and layout.content.fields == ["vertices"]
return (
layout.is_list
and cont(layout).is_list
and match_point(cont(cont(layout)), **kw)
)


def match_multipoint(layout, **kw):
# layout.fields == ["points"]
return layout.is_list and match_point(cont(layout), **kw)


def match_multipolygon(layout, **kw):
# layout.fields == ["polygons"]
return layout.is_list and match_polygon(cont(layout))


# TODO: multilinestring (aka. line collection)


@numba.njit(nogil=True, cache=True)
def bounds(coord):
"""
Aggregation: compute max/min bounds for 1d array

If all points are nan, the return is (nan, nan)
"""
# NB: (np.nanmin(coord), np.nanmax(coord)) is significantly faster,
# but this should easily be made to work on the GPU
# how would you apply this to find the bounds of each of lists of strings?
xmin = np.inf
xmax = -np.inf

for x in coord:
if np.isfinite(x):
xmin = min(xmin, x)
xmax = max(xmax, x)

if not np.isfinite(xmin):
xmin = xmax = np.nan

return (xmin, xmax)


def bounding_box(arr):
if arr.fields:
return tuple(bounds(arr[c]) for c in arr.fields) # should be dict?
return tuple(bounds(arr[c]) for c in arr.fields)


@numba.njit(nogil=True, cache=True)
def _line_length(listx, listy):
"""of one 2d line"""
total_len = 0.0
x0 = listx[0]
y0 = listy[0]

for x1, y1 in zip(listx[1:], listy[1:]):
if np.isfinite(x0) and np.isfinite(y0) and np.isfinite(x1) and np.isfinite(y1):
# we just skip NULL points??
total_len += np.sqrt((x1 - x0) ** 2 + (y1 - y0) ** 2)
x0 = x1
y0 = y1
return total_len


@numba.njit(nogil=True, cache=True)
def line_lengths(listlistx, listlisty):
"""of each 2d line in an array"""
out = np.zeros(len(listlistx), dtype="float64")
for i, (listx, listy) in enumerate(zip(listlistx, listlisty)):
# this loop could be extracted or parallelised
out[i] = _line_length(listx, listy)
return out


@numba.njit(nogil=True, cache=True)
def _area(listx, listy):
"""of one 2d polygon

CCW is +ve, CW is -ve
"""
area = 0.0

for i in range(len(listx) - 2):
ix = listx[i + 1]
jy = listy[i + 2]
ky = listy[i]

area += ix * (jy - ky)

# wrap-around term for polygon
firstx = listx[0]
secondy = listy[1]
lasty = listy[-1]
area += firstx * (secondy - lasty)

return area / 2.0


@numba.njit(nogil=True, cache=True)
def area(listlistx, listlisty):
"""of each 2d polygon in an array"""
out = np.empty(len(listlistx), dtype="float64")
for i in range(len(listlistx)):
# this can be parallel
out[i] = _area(listlistx[i], listlisty[i])
return out


@numba.njit(nogil=True, cache=True)
def triangle_orientation(ax, ay, bx, by, cx, cy):
"""
Orientation of single triangle

Args:
ax, ay: coords of first point
bx, by: coords of second point
cx, cy: coords of third point

Returns:
+1 if counter clockwise
0 if colinear
-1 if clockwise
"""
ab_x, ab_y = bx - ax, by - ay
ac_x, ac_y = cx - ax, cy - ay

# compute cross product: ab x bc
ab_x_ac = (ab_x * ac_y) - (ab_y * ac_x)

if ab_x_ac > 0:
# Counter clockwise
return 1
elif ab_x_ac < 0:
# Clockwise
return -1
else:
# Collinear
return 0


@numba.njit(nogil=True, cache=True)
def orient_polygons(flatx, flaty, polygon_offsets, ring_offsets):
"""
Reorient polygons so that exterior is in CCW order and interior rings (holes) CW

This function mutates the values array.

Because we do mutation here, input is numpy arrays, no ak, and must be
extracted from the layout.
"""
num_rings = len(ring_offsets) - 1

# Compute expected orientation of rings
expected_ccw = np.zeros(len(ring_offsets) - 1, dtype=np.bool_)
expected_ccw[polygon_offsets[:-1]] = True # outer ring of each set

# Compute actual orientation of rings
is_ccw = np.zeros(num_rings)
for i in range(num_rings):
is_ccw[i] = (
_area(
flatx[ring_offsets[i] : ring_offsets[i + 1]],
flaty[ring_offsets[i] : ring_offsets[i + 1]],
)
>= 0
)

# Compute indices of rings to flip
flip_inds = is_ccw != expected_ccw
ring_starts = ring_offsets[:-1]
ring_stops = ring_offsets[1:]
flip_starts = ring_starts[flip_inds]
flip_stops = ring_stops[flip_inds]

for i in range(len(flip_starts)):
flip_start = flip_starts[i]
flip_stop = flip_stops[i]

xs = flatx[flip_start:flip_stop]
ys = flaty[flip_start:flip_stop]
flatx[flip_start:flip_stop] = xs[::-1]
flaty[flip_start:flip_stop] = ys[::-1]
Loading