text_locator: TextLocator.cpp Source File

Go to the documentation of this file.
00001 /*********************************************************************
00002  The MIT License (MIT)
00003 
00004  Copyright (c) <2013> <Vojtech Novak>
00005 
00006  Permission is hereby granted, free of charge, to any person obtaining a copy
00007  of this software and associated documentation files (the "Software"), to deal
00008  in the Software without restriction, including without limitation the rights
00009  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00010  copies of the Software, and to permit persons to whom the Software is
00011  furnished to do so, subject to the following conditions:
00012 
00013  The above copyright notice and this permission notice shall be included in
00014  all copies or substantial portions of the Software.
00015 
00016  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00017  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00018  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00019  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00020  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00021  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00022  THE SOFTWARE.
00023  *********************************************************************/
00024 
00025 #include <boost/smart_ptr/shared_ptr.hpp>
00026 #include <ros/console.h>
00027 #include <ros/node_handle.h>
00028 #include <ros/subscriber.h>
00029 #include <sensor_msgs/image_encodings.h>
00030 #include <sensor_msgs/Image.h>
00031 #include <tesseract/publictypes.h>
00032 #include <iostream>
00033 #include <stdexcept>
00034 #include <string>
00035 #include <utility>
00036 #include <vector>
00037 
00038 #include "text_locator/TextLocator.hpp"
00039 #include "text_locator/utils.hpp"
00040 
00041 //for publisher
00042 #include "text_locator/TextLocation.h"
00043 #include <tf/transform_datatypes.h>
00044 #include <stdlib.h>
00045 
00046 using namespace sensor_msgs;
00047 
00048 namespace ros_text_locator {
00049 
00050 TextLocator::TextLocator(ros::NodeHandle &handle,
00051                 tesseract::PageSegMode pgSegMode, std::string lang,
00052                 std::vector<ccv_swt_param_t> params, bool enableRecognition,
00053                 bool paramDebug, bool enablePcl) :
00054                 rec(TRecognizer(pgSegMode, lang)), detector(TDetector(params)), paramDebugMode(
00055                                 paramDebug), recognitionEnabled(enableRecognition), newdata(
00056                                 false), nodehandler(handle), pclEnabled(enablePcl) {
00057 
00058         init();
00059 //      nodehandler = handle;
00060 //      detector = TDetector(params);
00061 //      rec = TRecognizer(pgSegMode, lang);
00062 //      paramDebugMode = paramDebug;
00063 //      recognitionEnabled = enableRecognition;
00064 //      pclEnabled=enablePcl;
00065 //      locked = false;
00066 }
00067 
00068 void TextLocator::init() {
00069         using namespace ros::param;
00070 
00071         int dm, im;
00072         param<int>("/camera/driver/image_mode", im, -1);
00073         param<int>("/camera/driver/depth_mode", dm, -1);
00074         if (dm == -1 || im == -1) {
00075                 ROS_ERROR(
00076                                 "unable to load sensor resolutions from /camera/driver/depth_mode and/or /camera/driver/image_mode");
00077                 ros::shutdown();
00078         }
00079         std::pair<int, int> rgbRes = openniModeToRes(im);
00080         std::pair<int, int> irRes = openniModeToRes(dm);
00081         if (rgbRes.first == -1 || irRes.first == -1) {
00082                 ROS_ERROR(
00083                                 "unknown openni mode encountered. unable to convert the mode to resolution.");
00084                 ros::shutdown();
00085         }
00086 
00087         rgb2IrX = ((float) irRes.first) / ((float) rgbRes.first);
00088         rgb2IrY = ((float) irRes.second) / ((float) rgbRes.second);
00089 }
00090 
00091 void TextLocator::run() {
00092         sub = nodehandler.subscribe("/camera/rgb/image_rect", 8,
00093                         &TextLocator::monoCallback, this);
00094         if (pclEnabled) {
00095                 sub2 = nodehandler.subscribe<pcl::PointCloud<pcl::PointXYZRGB> >(
00096                                 "/camera/depth_registered/points", 8,
00097                                 &TextLocator::depthCallback, this);
00098         }
00099 
00100         pub = nodehandler.advertise<text_locator::TextLocation>(
00101                         "text_locator_topic", 8);
00102         if (!pub) {
00103                 ROS_ERROR("publisher of text_locator_topic cannot be initialized");
00104                 ros::shutdown();
00105         }
00106         if (paramDebugMode) {
00107                 std::string h = getHome() + "/text_locator.jpg";
00108                 ROS_INFO(
00109                                 "debug image will be written to %s. use it to fine-tune the detections parameters and then turn this feature off.", h.c_str());
00110         }
00111 }
00112 
00113 void TextLocator::monoCallback(const sensor_msgs::ImageConstPtr& image) {
00114         try {
00115                 cv_ptr = cv_bridge::toCvShare(image, image_encodings::MONO8);
00116         } catch (cv_bridge::Exception& e) {
00117                 ROS_ERROR("cv_bridge exception: %s", e.what());
00118                 return;
00119         }
00120         newdata = true;
00121 
00122         detector.detect(cv_ptr->image, text2d);
00123         if (paramDebugMode) {
00124                 saveDbgImg(image);
00125         }
00126         if (recognitionEnabled) {
00127                 rec.recognize(cv_ptr->image, text2d);
00128         }
00129         if (!pclEnabled) {
00130                 pcl::PointXYZRGB p; //dummy point
00131                 for (int i = 0; i < text2d.size(); i++) {
00132                         publish(p, p, text2d.at(i).text);
00133                 }
00134         }
00135 }
00136 
00137 void TextLocator::depthCallback(
00138                 const pcl::PointCloud<pcl::PointXYZRGB>::ConstPtr& depth) {
00139         if (!newdata) {
00140                 return;
00141         }
00142         int y, x, y2, x2;
00143         pcl::PointXYZRGB p1, p2;
00144         if (text2d.size() == 0) {
00145                 //ROS_INFO("nothing detected");
00146         }
00147         // IF recognition disabled then broadcast the detected textlike region coordinates in rgb image?
00148         for (int i = 0; i < text2d.size(); i++) {
00149                 // coordinate translation for different ir and rgb resolutions
00150                 y = rgbToIrY(text2d.at(i).y);
00151                 x = rgbToIrX(text2d.at(i).x);
00152                 y2 = rgbToIrY(text2d.at(i).y2);
00153                 x2 = rgbToIrX(text2d.at(i).x2);
00154 
00155                 p1 = depth->points[(y * depth->width) + x];
00156                 p2 = depth->points[(y2 * depth->width) + x2];
00157                 //cout << "kinect: " << text2d.at(i).text << endl;
00158                 //cout << "kinect: " << p1.x << " " << p1.y << " " << p1.z << "\t";
00159                 //cout << "kinect: " << p2.x << " " << p2.y << " " << p2.z << endl;
00160                 publish(p1, p2, text2d.at(i).text);
00161         }
00162         newdata = false;
00163 }
00164 
00165 int TextLocator::rgbToIrX(int rgb) {
00166         return (int) (rgb2IrX * ((float) (rgb)));
00167 }
00168 
00169 int TextLocator::rgbToIrY(int rgb) {
00170         return (int) (rgb2IrY * ((float) (rgb)));
00171 }
00172 
00173 void TextLocator::publish(pcl::PointXYZRGB &p1, pcl::PointXYZRGB &p2,
00174                 std::string txt) {
00175         text_locator::TextLocation msg;
00176         msg.header.stamp = ros::Time::now();
00177         msg.header.frame_id = "/camera/depth_registered/points";
00178         geometry_msgs::Point upLeft;
00179         upLeft.x = p1.x;
00180         upLeft.y = p1.y;
00181         upLeft.z = p1.z;
00182         msg.p1 = upLeft;
00183 
00184         geometry_msgs::Point lowRight;
00185         lowRight.x = p2.x;
00186         lowRight.y = p2.y;
00187         lowRight.z = p2.z;
00188 
00189         msg.p2 = lowRight;
00190         msg.text = txt;
00191         pub.publish(msg);
00192 }
00193 
00194 void TextLocator::saveDbgImg(const sensor_msgs::ImageConstPtr& image) {
00195         cv::Mat m;
00196         try {
00197                 m = cv_bridge::toCvShare(image, image_encodings::BGR8)->image;
00198         } catch (cv_bridge::Exception& e) {
00199                 ROS_ERROR("cv_bridge exception: %s", e.what());
00200                 return;
00201         }
00202         cv::Point p1, p2;
00203         //drawing detected areas
00204         for (int i = 0; i < text2d.size(); i++) {
00205                 p1.x = text2d.at(i).x;
00206                 p1.y = text2d.at(i).y;
00207                 p2.x = text2d.at(i).x2;
00208                 p2.y = text2d.at(i).y2;
00209                 cv::rectangle(m, p1, p2, cv::Scalar(0, 0, 255));
00210         }
00211         //rectangle in the center
00212         p1.x = m.cols / 2;
00213         p1.y = m.rows / 2;
00214         p2.x = m.cols / 2 + 1;
00215         p2.y = m.rows / 2 + 1;
00216         cv::rectangle(m, p1, p2, cv::Scalar(0, 0, 255));
00217         std::string h = getHome() + "/text_locator.jpg";
00218         cv::imwrite(h, m);
00219 }
00220 
00221 } /* namespace manipulator */