4 from __future__
import division
5 from __future__
import print_function
8 from multiprocessing.pool
import ThreadPool
as Pool
15 from dynamic_reconfigure.server
import Server
16 from jsk_recognition_msgs.msg
import Label
17 from jsk_recognition_msgs.msg
import LabelArray
18 from jsk_recognition_msgs.msg
import PolygonArray
19 from jsk_recognition_msgs.msg
import RectArray
21 from jsk_recognition_utils
import get_tile_image
22 from jsk_topic_tools
import ConnectionBasedTransport
23 import message_filters
27 from sensor_msgs.msg
import Image
29 from jsk_perception.cfg
import OCRConfig
as Config
33 if poly.shape != (4, 2):
34 raise ValueError(
'Not supported shape size {}'.format(poly.shape))
36 poly = np.array(poly, dtype=np.int32)
37 startidx = poly.sum(axis=1).argmin()
38 poly = np.roll(poly, len(poly) - startidx, 0)
40 poly = np.array(poly, dtype=np.int32)
41 x_min = max(np.min(poly[:, 0]), 0)
42 x_max = min(np.max(poly[:, 0]), img.shape[1])
43 y_min = max(np.min(poly[:, 1]), 0)
44 y_max = min(np.max(poly[:, 1]), img.shape[0])
47 pts1 = np.float32(poly)
48 pts2 = np.float32([[0, 0], [w, 0], [w, h], [0, h]])
49 rot_mat = cv2.getPerspectiveTransform(pts1, pts2)
50 croppped_img = cv2.warpPerspective(img, rot_mat, (w, h))
54 def ocr_image(process_index, poly, img, lang='eng'):
56 if croppped_img.shape[0] == 0
or croppped_img.shape[1] == 0:
58 binary_img = np.zeros((0, 0), dtype=np.uint8)
60 gray = cv2.cvtColor(croppped_img, cv2.COLOR_RGB2GRAY)
61 _, binary_img = cv2.threshold(
63 cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
64 txt = pytesseract.image_to_string(
65 binary_img, lang=lang, config=
"--psm 6")
66 txt = txt.encode(
'utf-8')
67 txt = txt.lstrip().rstrip()
68 return process_index, txt, croppped_img, binary_img
76 if not os.path.exists(font_path):
77 raise OSError(
"Font not exists!")
80 texts = [
''] * len(polys)
84 poly = np.array(poly).astype(np.int32).reshape((-1))
85 poly = poly.reshape(-1, 2)
87 img, [poly.reshape((-1, 1, 2))],
88 True, color=(255, 0, 0), thickness=box_thickness)
91 for poly, text
in zip(polys, texts):
92 poly = np.array(poly).astype(np.int32).reshape((-1))
93 poly = poly.reshape(-1, 2)
94 if isinstance(text, str):
95 text =
"{}".format(text)
97 text =
"{}".format(text.decode(
'utf-8'))
98 pos = (poly[0][0] + 1, poly[0][1] + 1)
101 font_path, font_size,
103 background_color=(255, 255, 255),
124 '~output', std_msgs.msg.String, queue_size=1)
126 '~output/viz', Image, queue_size=1)
128 '~output/labels', LabelArray, queue_size=1)
130 '~output/debug/viz', Image, queue_size=1)
132 '~output/debug/binary_viz', Image, queue_size=1)
146 rospy.logwarn(
'Not valid font_path: {}'.format(self.
font_path))
152 self.
n_jobs = multiprocessing.cpu_count()
158 if resubscribe
and self.is_subscribed():
168 '~input/polygons', PolygonArray)
169 subs.append(sub_polygons)
173 subs.append(sub_rects)
177 slop = rospy.get_param(
'~slop', 0.1)
178 sync = message_filters.ApproximateTimeSynchronizer(
181 sync.registerCallback(callback)
185 sync.registerCallback(callback)
189 for sub
in self.
subs:
194 label_array_msg = LabelArray(header=header)
195 for i, text
in enumerate(texts):
196 label_array_msg.labels.append(
197 Label(id=i, name=text.decode(
'utf-8')))
200 if self.
pub_viz.get_num_connections() > 0:
213 msg_viz = self.
bridge.cv2_to_imgmsg(viz, encoding=
'rgb8')
214 msg_viz.header = header
220 viz = get_tile_image(imgs)
221 msg_viz = self.
bridge.cv2_to_imgmsg(viz, encoding=
'rgb8')
222 msg_viz.header = header
228 viz = get_tile_image(binary_imgs)
229 msg_viz = self.
bridge.cv2_to_imgmsg(viz, encoding=
'mono8')
230 msg_viz.header = header
235 polys = np.array(polys, dtype=np.int32)
236 indices = np.argsort(polys.sum(axis=2).min(axis=1))
237 text =
' '.join([texts[i].decode(
'utf-8')
for i
in indices])
241 std_msgs.msg.String(data=text))
244 img = self.
bridge.imgmsg_to_cv2(img_msg, desired_encoding=
'rgb8')
246 for rect
in rects_msg.rects:
248 np.array([[rect.x, rect.y],
249 [rect.x, rect.y + rect.height],
250 [rect.x + rect.width, rect.y + rect.height],
251 [rect.x + rect.width, rect.y]], dtype=np.int32))
252 polys = np.array(polys)
253 texts, imgs, binary_imgs = self.
process_ocr(img, polys)
257 img = self.
bridge.imgmsg_to_cv2(img_msg, desired_encoding=
'rgb8')
259 for polygon_stamp_msg
in polygons_msg.polygons:
261 np.array([[point.x, point.y]
262 for point
in polygon_stamp_msg.polygon.points]))
263 polys = np.array(polys, dtype=np.int32)
264 texts, imgs, binary_imgs = self.
process_ocr(img, polys)
266 imgs=imgs, binary_imgs=binary_imgs)
273 n_jobs = min(self.
n_jobs, len(polys))
274 process = Pool(n_jobs)
276 process.apply_async(ocr_image, (i, poly, img, self.
language))
277 for i, poly
in enumerate(polys)]
280 results = sorted([res.get()
for res
in multiple_results], key=
lambda a: a[0])
281 texts = [b
for a, b, c, d
in results]
283 for _, _, img, _
in results:
284 if img.shape[0] > 0
and img.shape[1] > 0:
285 imgs.append(img.copy())
287 for _, _, _, img
in results:
288 if img.shape[0] > 0
and img.shape[1] > 0:
289 binary_imgs.append(img.copy())
290 return texts, imgs, binary_imgs
293 if __name__ ==
'__main__':
294 rospy.init_node(
'ocr_node')