craft_utils.py
Go to the documentation of this file.
1 """
2 Copyright (c) 2019-present NAVER Corp.
3 MIT License
4 """
5 
6 # -*- coding: utf-8 -*-
7 import math
8 
9 import cv2
10 import numpy as np
11 
12 
13 """ auxilary functions """
14 # unwarp corodinates
15 
16 
17 def warpCoord(Minv, pt):
18  out = np.matmul(Minv, (pt[0], pt[1], 1))
19  return np.array([out[0] / out[2], out[1] / out[2]])
20 
21 
22 """ end of auxilary functions """
23 
24 
26  textmap,
27  linkmap,
28  text_threshold,
29  link_threshold,
30  low_text):
31  # prepare data
32  linkmap = linkmap.copy()
33  textmap = textmap.copy()
34  img_h, img_w = textmap.shape
35 
36  """ labeling method """
37  ret, text_score = cv2.threshold(textmap, low_text, 1, 0)
38  ret, link_score = cv2.threshold(linkmap, link_threshold, 1, 0)
39 
40  text_score_comb = np.clip(text_score + link_score, 0, 1)
41  nLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(
42  text_score_comb.astype(np.uint8), connectivity=4)
43 
44  det = []
45  mapper = []
46  for k in range(1, nLabels):
47  # size filtering
48  size = stats[k, cv2.CC_STAT_AREA]
49  if size < 10:
50  continue
51 
52  # thresholding
53  if np.max(textmap[labels == k]) < text_threshold:
54  continue
55 
56  # make segmentation map
57  segmap = np.zeros(textmap.shape, dtype=np.uint8)
58  segmap[labels == k] = 255
59  segmap[np.logical_and(link_score == 1,
60  text_score == 0)] = 0 # remove link area
61  x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP]
62  w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT]
63  niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2)
64  sx, ex, sy, ey = x - niter, x + w + niter + 1, \
65  y - niter, y + h + niter + 1
66  # boundary check
67  if sx < 0:
68  sx = 0
69  if sy < 0:
70  sy = 0
71  if ex >= img_w:
72  ex = img_w
73  if ey >= img_h:
74  ey = img_h
75  kernel = cv2.getStructuringElement(
76  cv2.MORPH_RECT, (1 + niter, 1 + niter))
77  segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel)
78 
79  # make box
80  np_contours = np.roll(np.array(np.where(segmap != 0)),
81  1, axis=0).transpose().reshape(-1, 2)
82  rectangle = cv2.minAreaRect(np_contours)
83  box = cv2.boxPoints(rectangle)
84 
85  # boundary check due to minAreaRect may have out of range values
86  # (see https://docs.opencv.org/3.4/d3/dc0/group__imgproc__shape.html#ga3d476a3417130ae5154aea421ca7ead9) # NOQA
87  for p in box:
88  if p[0] < 0:
89  p[0] = 0
90  if p[1] < 0:
91  p[1] = 0
92  if p[0] >= img_w:
93  p[0] = img_w
94  if p[1] >= img_h:
95  p[1] = img_h
96 
97  # align diamond-shape
98  w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2])
99  box_ratio = max(w, h) / (min(w, h) + 1e-5)
100  if abs(1 - box_ratio) <= 0.1:
101  l, r = min(np_contours[:, 0]), max(np_contours[:, 0])
102  t, b = min(np_contours[:, 1]), max(np_contours[:, 1])
103  box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32)
104 
105  # make clock-wise order
106  startidx = box.sum(axis=1).argmin()
107  box = np.roll(box, 4 - startidx, 0)
108  box = np.array(box)
109 
110  det.append(box)
111  mapper.append(k)
112 
113  return det, labels, mapper
114 
115 
116 def getPoly_core(boxes, labels, mapper, linkmap):
117  # configs
118  num_cp = 5
119  max_len_ratio = 0.7
120  expand_ratio = 1.45
121  max_r = 2.0
122  step_r = 0.2
123 
124  polys = []
125  for k, box in enumerate(boxes):
126  # size filter for small instance
127  w = int(np.linalg.norm(box[0] - box[1]) + 1)
128  h = int(np.linalg.norm(box[1] - box[2]) + 1)
129  if w < 10 or h < 10:
130  polys.append(None)
131  continue
132 
133  # warp image
134  tar = np.float32([[0, 0], [w, 0], [w, h], [0, h]])
135  M = cv2.getPerspectiveTransform(box, tar)
136  word_label = cv2.warpPerspective(
137  labels, M, (w, h), flags=cv2.INTER_NEAREST)
138  try:
139  Minv = np.linalg.inv(M)
140  except BaseException:
141  polys.append(None)
142  continue
143 
144  # binarization for selected label
145  cur_label = mapper[k]
146  word_label[word_label != cur_label] = 0
147  word_label[word_label > 0] = 1
148 
149  """ Polygon generation """
150  # find top/bottom contours
151  cp = []
152  max_len = -1
153  for i in range(w):
154  region = np.where(word_label[:, i] != 0)[0]
155  if len(region) < 2:
156  continue
157  cp.append((i, region[0], region[-1]))
158  length = region[-1] - region[0] + 1
159  if length > max_len:
160  max_len = length
161 
162  # pass if max_len is similar to h
163  if h * max_len_ratio < max_len:
164  polys.append(None)
165  continue
166 
167  # get pivot points with fixed length
168  tot_seg = num_cp * 2 + 1
169  seg_w = w / tot_seg # segment width
170  pp = [None] * num_cp # init pivot points
171  cp_section = [[0, 0]] * tot_seg
172  seg_height = [0] * num_cp
173  seg_num = 0
174  num_sec = 0
175  prev_h = -1
176  for i in range(0, len(cp)):
177  (x, sy, ey) = cp[i]
178  if (seg_num + 1) * seg_w <= x and seg_num <= tot_seg:
179  # average previous segment
180  if num_sec == 0:
181  break
182  cp_section[seg_num] = [
183  cp_section[seg_num][0] / num_sec,
184  cp_section[seg_num][1] / num_sec]
185  num_sec = 0
186 
187  # reset variables
188  seg_num += 1
189  prev_h = -1
190 
191  # accumulate center points
192  cy = (sy + ey) * 0.5
193  cur_h = ey - sy + 1
194  cp_section[seg_num] = [
195  cp_section[seg_num][0] + x,
196  cp_section[seg_num][1] + cy]
197  num_sec += 1
198 
199  if seg_num % 2 == 0:
200  continue # No polygon area
201 
202  if prev_h < cur_h:
203  pp[int((seg_num - 1) / 2)] = (x, cy)
204  seg_height[int((seg_num - 1) / 2)] = cur_h
205  prev_h = cur_h
206 
207  # processing last segment
208  if num_sec != 0:
209  cp_section[-1] = [
210  cp_section[-1][0] / num_sec,
211  cp_section[-1][1] / num_sec]
212 
213  # pass if num of pivots is not sufficient or segment widh is smaller
214  # than character height
215  if None in pp or seg_w < np.max(seg_height) * 0.25:
216  polys.append(None)
217  continue
218 
219  # calc median maximum of pivot points
220  half_char_h = np.median(seg_height) * expand_ratio / 2
221 
222  # calc gradiant and apply to make horizontal pivots
223  new_pp = []
224  for i, (x, cy) in enumerate(pp):
225  dx = cp_section[i * 2 + 2][0] - cp_section[i * 2][0]
226  dy = cp_section[i * 2 + 2][1] - cp_section[i * 2][1]
227  if dx == 0: # gradient if zero
228  new_pp.append([x, cy - half_char_h, x, cy + half_char_h])
229  continue
230  rad = - math.atan2(dy, dx)
231  c, s = half_char_h * math.cos(rad), half_char_h * math.sin(rad)
232  new_pp.append([x - s, cy - c, x + s, cy + c])
233 
234  # get edge points to cover character heatmaps
235  isSppFound, isEppFound = False, False
236  grad_s = (pp[1][1] - pp[0][1]) / (pp[1][0] - pp[0][0]) + \
237  (pp[2][1] - pp[1][1]) / (pp[2][0] - pp[1][0])
238  grad_e = (pp[-2][1] - pp[-1][1]) / (pp[-2][0] - pp[-1][0]) + \
239  (pp[-3][1] - pp[-2][1]) / (pp[-3][0] - pp[-2][0])
240  for r in np.arange(0.5, max_r, step_r):
241  dx = 2 * half_char_h * r
242  if not isSppFound:
243  line_img = np.zeros(word_label.shape, dtype=np.uint8)
244  dy = grad_s * dx
245  p = np.array(new_pp[0]) - np.array([dx, dy, dx, dy])
246  cv2.line(
247  line_img, (int(
248  p[0]), int(
249  p[1])), (int(
250  p[2]), int(
251  p[3])), 1, thickness=1)
252  if np.sum(np.logical_and(word_label, line_img)
253  ) == 0 or r + 2 * step_r >= max_r:
254  spp = p
255  isSppFound = True
256  if not isEppFound:
257  line_img = np.zeros(word_label.shape, dtype=np.uint8)
258  dy = grad_e * dx
259  p = np.array(new_pp[-1]) + np.array([dx, dy, dx, dy])
260  cv2.line(
261  line_img, (int(
262  p[0]), int(
263  p[1])), (int(
264  p[2]), int(
265  p[3])), 1, thickness=1)
266  if np.sum(np.logical_and(word_label, line_img)
267  ) == 0 or r + 2 * step_r >= max_r:
268  epp = p
269  isEppFound = True
270  if isSppFound and isEppFound:
271  break
272 
273  # pass if boundary of polygon is not found
274  if not (isSppFound and isEppFound):
275  polys.append(None)
276  continue
277 
278  # make final polygon
279  poly = []
280  poly.append(warpCoord(Minv, (spp[0], spp[1])))
281  for p in new_pp:
282  poly.append(warpCoord(Minv, (p[0], p[1])))
283  poly.append(warpCoord(Minv, (epp[0], epp[1])))
284  poly.append(warpCoord(Minv, (epp[2], epp[3])))
285  for p in reversed(new_pp):
286  poly.append(warpCoord(Minv, (p[2], p[3])))
287  poly.append(warpCoord(Minv, (spp[2], spp[3])))
288 
289  # add to final result
290  # make clock-wise order
291  print(poly.shape)
292  poly = np.array(poly)
293  startidx = poly.sum(axis=1).argmin()
294  poly = np.roll(poly, len(poly) - startidx, 0)
295  polys.append(poly)
296 
297  return polys
298 
299 
301  textmap,
302  linkmap,
303  text_threshold,
304  link_threshold,
305  low_text,
306  poly=False):
307  boxes, labels, mapper = getDetBoxes_core(
308  textmap, linkmap, text_threshold, link_threshold, low_text)
309 
310  if poly:
311  polys = getPoly_core(boxes, labels, mapper, linkmap)
312  else:
313  polys = [None] * len(boxes)
314 
315  return boxes, polys
316 
317 
318 def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2):
319  if len(polys) > 0:
320  polys = np.array(polys)
321  for k in range(len(polys)):
322  if polys[k] is not None:
323  polys[k] *= (ratio_w * ratio_net, ratio_h * ratio_net)
324  return polys
node_scripts.craft.craft_utils.getDetBoxes
def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly=False)
Definition: craft_utils.py:300
ssd_train_dataset.int
int
Definition: ssd_train_dataset.py:175
node_scripts.craft.craft_utils.adjustResultCoordinates
def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2)
Definition: craft_utils.py:318
node_scripts.craft.craft_utils.getDetBoxes_core
def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text)
Definition: craft_utils.py:25
node_scripts.craft.craft_utils.getPoly_core
def getPoly_core(boxes, labels, mapper, linkmap)
Definition: craft_utils.py:116
node_scripts.craft.craft_utils.warpCoord
def warpCoord(Minv, pt)
Definition: craft_utils.py:17


jsk_perception
Author(s): Manabu Saito, Ryohei Ueda
autogenerated on Fri May 16 2025 03:11:16