Skip to content

Commit

Permalink
update to shapely 1.8
Browse files Browse the repository at this point in the history
  • Loading branch information
bertsky committed Mar 18, 2022
1 parent b856f5b commit 8841abc
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 47 deletions.
64 changes: 62 additions & 2 deletions ocrd_cis/ocropy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from scipy.ndimage import measurements, filters, interpolation, morphology
from scipy import stats, signal
#from skimage.morphology import convex_hull_image
from skimage.measure import find_contours, approximate_polygon
from PIL import Image

from . import ocrolib
Expand Down Expand Up @@ -996,7 +997,9 @@ def h_compatible(obj1, obj2, center1, center2):
# (which must be split anyway)
# - with tighter polygonal spread around foreground
# - with spread of line labels against separator labels
# - with centerline extraction
# - return bg line and sep labels intead of just fg line labels
# - return centerline coords, too
@checks(ABINARY2)
def compute_segmentation(binary,
zoom=1.0,
Expand Down Expand Up @@ -1046,6 +1049,7 @@ def compute_segmentation(binary,
foreground may remain unlabelled for
separators and other non-text like small
noise, or large drop-capitals / images),
- list of Numpy arrays of centerline coordinates [x, y points in lr order]
- Numpy array of horizontal foreground lines mask,
- Numpy array of vertical foreground lines mask,
- Numpy array of large/non-text foreground component mask,
Expand Down Expand Up @@ -1141,10 +1145,66 @@ def compute_segmentation(binary,
LOG.debug('sorting labels by reading order')
llabels = morph.reading_order(llabels,rl,bt)[llabels]
DSAVE('llabels_ordered', llabels)

#segmentation = llabels*binary
#return segmentation
return llabels, hlines, vlines, images, colseps, scale
clines = compute_centerlines(bottom, top, llabels, scale)
return llabels, clines, hlines, vlines, images, colseps, scale

@checks(AFLOAT2,AFLOAT2,SEGMENTATION,NUMBER)
def compute_centerlines(bottom, top, lines, scale):
"""Get the coordinates of center lines running between each bottom and top gradient peak."""
# smooth bottom+top maps horizontally for centerline estimation
bottom = filters.gaussian_filter(bottom, (scale*0.25,scale), mode='constant')
top = filters.gaussian_filter(top, (scale*0.25,scale), mode='constant')
# idea: center is where bottom and top gradient meet in the middle
# (but between top and bottom, not between bottom and top)
# - calculation via numpy == or isclose is too fragile numerically:
#clines = np.isclose(top, bottom, rtol=0.1) & (np.diff(top - bottom, axis=0, append=0) < 0)
#DSAVE('clines', [clines, bottom, top], enabled=True)
# - calculation via skimage.measure contours is reliable, but produces polygon segments
gradients = bottom - top
#seeds = np.diff(bottom - top, axis=0, append=0) > 0
seeds = lines > 0
contours = find_contours(gradients, 0, mask=seeds)
img = np.zeros_like(bottom, np.int)
#img = gradients
from skimage import draw
clines = []
for j, contour in enumerate(contours):
# map y,x to x,y points
contour = contour[:,::-1]
#contour = approximate_polygon(contour, 1.0)
if len(contour) <= 3:
# too short already
clines.append(contour[np.argsort(contour[:,0])])
continue
img[draw.polygon_perimeter(contour[:,1], contour[:,0], img.shape)] = j
# ensure the segment runs from left-most to right-most point once,
# find the middle between both paths (back and forth)
left = contour[:,0].argmin()
contour = np.concatenate((contour[left:], contour[:left]))
right = contour[:,0].argmax()
if right >= len(contour)-2 or right <= 1:
# no plateau - no back path
clines.append(contour[np.argsort(contour[:,0])])
continue
contour1 = contour[0:right]
contour2 = contour[right:]
interp1 = np.interp(contour2[:,0], contour1[:,0], contour1[:,1])
interp2 = np.interp(contour1[:,0], contour2[:,0], contour2[:,1])
order = np.argsort(contour[:,0])
interpolated = []
for i in order:
if i >= right:
interpolated.append([contour[i,0], 0.5 * (contour2[i-right,1] + interp1[i-right])])
else:
interpolated.append([contour[i,0], 0.5 * (contour1[i,1] + interp2[i])])
interpolated = np.array(interpolated)
img[draw.polygon_perimeter(interpolated[:,1], interpolated[:,0], img.shape)] = j+0.5
clines.append(interpolated)
DSAVE("centerline contours", img, enabled=True)
return clines

# from ocropus-gpageseg, but
# - on both foreground and background,
Expand Down
18 changes: 9 additions & 9 deletions ocrd_cis/ocropy/resegment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from itertools import chain
import numpy as np
from skimage import draw
from shapely.geometry import Polygon, asPolygon, LineString
from shapely.geometry import Polygon, LineString
from shapely.prepared import prep
from shapely.ops import unary_union
import alphashape
Expand Down Expand Up @@ -209,7 +209,7 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
segment_polygon = coordinates_of_segment(segment, parent_image, parent_coords)
segment_polygon = make_valid(Polygon(segment_polygon)).buffer(margin)
line_polygons.append(prep(segment_polygon))
segment_polygon = np.array(segment_polygon.exterior, np.int)[:-1]
segment_polygon = np.array(segment_polygon.exterior.coords, np.int)[:-1]
# draw.polygon: If any segment_polygon lies outside of parent
# (causing negative/above-max indices), either fully or partially,
# then this will silently ignore them. The caller does not need
Expand All @@ -224,7 +224,7 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
segment.id, page_id if fullpage else parent.id)
segment_polygon = coordinates_of_segment(segment, parent_image, parent_coords)
segment_polygon = make_valid(Polygon(segment_polygon)).buffer(margin)
segment_polygon = np.array(segment_polygon.exterior, np.int)[:-1]
segment_polygon = np.array(segment_polygon.exterior.coords, np.int)[:-1]
ignore_bin[draw.polygon(segment_polygon[:, 1],
segment_polygon[:, 0],
parent_bin.shape)] = False
Expand Down Expand Up @@ -271,7 +271,7 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
# left-hand side if left-to-right, and vice versa
scale * (-1) ** line_ltr, single_sided=True)],
loc=line.id, scale=scale))
line_polygon = np.array(line_polygon.exterior, np.int)[:-1]
line_polygon = np.array(line_polygon.exterior.coords, np.int)[:-1]
line_y, line_x = draw.polygon(line_polygon[:, 1],
line_polygon[:, 0],
parent_bin.shape)
Expand All @@ -280,12 +280,12 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
scale=scale, loc=parent.id, threshold=threshold)
return
try:
new_line_labels, _, _, _, _, scale = compute_segmentation(
new_line_labels, _, _, _, _, _, scale = compute_segmentation(
parent_bin, seps=ignore_bin, zoom=zoom, fullpage=fullpage,
maxseps=0, maxcolseps=len(ignore), maximages=0)
except Exception as err:
LOG.warning('Cannot line-segment %s "%s": %s',
tag, page_id if fullpage else parent.id, err)
LOG.error('Cannot line-segment %s "%s": %s',
tag, page_id if fullpage else parent.id, err)
return
LOG.info("Found %d new line labels for %d existing lines on %s '%s'",
new_line_labels.max(), len(lines), tag, parent.id)
Expand Down Expand Up @@ -476,7 +476,7 @@ def diff_polygons(poly1, poly2):
if poly.type == 'MultiPolygon':
poly = poly.convex_hull
if poly.minimum_clearance < 1.0:
poly = asPolygon(np.round(poly.exterior.coords))
poly = Polygon(np.round(poly.exterior.coords))
poly = make_valid(poly)
return poly

Expand Down Expand Up @@ -517,7 +517,7 @@ def join_polygons(polygons, loc='', scale=20):
if jointp.minimum_clearance < 1.0:
# follow-up calculations will necessarily be integer;
# so anticipate rounding here and then ensure validity
jointp = asPolygon(np.round(jointp.exterior.coords))
jointp = Polygon(np.round(jointp.exterior.coords))
jointp = make_valid(jointp)
return jointp

Expand Down
Loading

0 comments on commit 8841abc

Please sign in to comment.