theora_imagem_transport: theora_publisher.cpp Source File

00001 #include "theora_imagem_transport/theora_publisher.h"
00002 #include <sensor_msgs/image_encodings.h>
00003 #include <opencv/highgui.h>
00004 
00005 #include <vector>
00006 #include <stdio.h> //for memcpy
00007 #define null 0
00008 
00009 using namespace std;
00010 
00011 namespace theora_imagem_transport
00012 {
00013 
00014 TheoraPublisher::TheoraPublisher()
00015 {
00016   encoding_context_ = null;
00017 }
00018 
00019 TheoraPublisher::~TheoraPublisher()
00020 {
00021   if (encoding_context_ != null)
00022     th_encode_free(encoding_context_);
00023 }
00024 
00025 //Sends the header packets to new subscribers (unless there are no header packets to send yet)
00026 void TheoraPublisher::connectCallback(const ros::SingleSubscriberPublisher& pub)
00027 {
00028   theora_imagem_transport::packet msg;
00029   ROS_INFO("New connection detected");
00030     for (unsigned int i = 0; i < stream_header_.size(); i++)
00031     {
00032       oggPacketToMsg(stream_header_[i], msg);
00033       pub.publish(msg);
00034       ROS_DEBUG("Published header packet, sleeping for 0.1 second");
00035       ros::Duration d = ros::Duration(0, 100000000);
00036       d.sleep();
00037     }
00038 }
00039 
00040 void TheoraPublisher::publish(const sensor_msgs::Image& message, 
00041                 const message_transport::SimplePublisherPlugin<sensor_msgs::Image,theora_imagem_transport::packet>::PublishFn& publish_fn) const 
00042 {
00043   if (img_bridge_.fromImage(message, "bgr8"))
00044   {
00045     IplImage* img = img_bridge_.toIpl();
00046     //cvShowImage(window_name_.c_str(), img);
00047     IplImage* img2 = cvCreateImage(cvGetSize(img), img->depth, img->nChannels);
00048     cvCvtColor(img, img2, CV_BGR2YCrCb);
00049     ensure_encoding_context(cvGetSize(img2), publish_fn);
00050 
00051     //convert image
00052     th_ycbcr_buffer ycbcr_image;
00053     vector<unsigned char> planes[3]; //color planes
00054     for (int i = 0; i < 3; i++) //Size is number of pixels in padded image (chroma subsampled by factor of 2 in each dimension, which means area is 1/4)
00055       planes[i].resize(nearestWidth * nearestHeight / (i > 0 ? 4 : 1));
00056 
00057     //ROS_DEBUG("Image size: %d image width: %d image height: %d", img2->imageSize, img2->width, img2->height);
00058     for (int i = 0; i < img2->imageSize / 3; i++) //imageSize/3 is the number of pixels
00059     {
00060       planes[0][i] = *(unsigned char*)(img2->imageData + i * 3); //Y
00061       //planes[2][i] = *(unsigned char*)(img2->imageData + i*3 + 1);  //Cr
00062       //planes[1][i] = *(unsigned char*)(img2->imageData + i*3 + 2);  //Cb
00063     }
00064 
00065     //Note that while OpenCV uses YCbCr, theora uses YCrCb... this can make things a little confusing
00066     //CHROMA subsampling
00067     for (int planeIdx = 1; planeIdx < 3; planeIdx++)
00068     {
00069       int swappedIdx = planeIdx;
00070       if (planeIdx == 1)
00071         swappedIdx = 2;
00072       else if (planeIdx == 2)
00073         swappedIdx = 1;
00074       for (int i = 0; i < img2->width; i += 2)
00075         for (int j = 0; j < img2->height; j += 2)
00076         {
00077           int planeDataIdx = i / 2 + (j * img2->width) / 4;
00078           //planes[2][planeDataIdx]
00079           unsigned int total = (unsigned int)((uchar*)(img2->imageData + img2->widthStep * j))[i * 3 + planeIdx];
00080           unsigned int count = 1;
00081           if (i < img2->width - 1)
00082           {
00083             total += (unsigned int)((uchar*)(img2->imageData + img2->widthStep * j))[(i + 1) * 3 + planeIdx];
00084             count++;
00085           }
00086           if (j < img2->height - 1)
00087           {
00088             total += (unsigned int)((uchar*)(img2->imageData + img2->widthStep * (j + 1)))[i * 3 + planeIdx];
00089             count++;
00090           }
00091           if (i < img2->width - 1 && j < img2->height - 1)
00092           {
00093             total += (unsigned int)((uchar*)(img2->imageData + img2->widthStep * (j + 1)))[(i + 1) * 3 + planeIdx];
00094             count++;
00095           }
00096           planes[swappedIdx][planeDataIdx] = (uchar)(total / count);
00097         }
00098     }
00099 
00100     for (int i = 0; i < 3; i++)
00101     {
00102       ycbcr_image[i].width = nearestWidth / (i > 0 ? 2 : 1); //
00103       ycbcr_image[i].height = nearestHeight / (i > 0 ? 2 : 1); //chroma is subsampled by a factor of 2
00104       ycbcr_image[i].stride = img2->width / (i > 0 ? 2 : 1); //
00105       ycbcr_image[i].data = &planes[i][0];
00106     }
00107     ROS_DEBUG("Width: %d, Height: %d, xOff: %d, yOff: %d", nearestWidth, nearestHeight, nearestXoff, nearestYoff);
00108 
00109     cvReleaseImage(&img2);
00110 
00111     int rval;
00112     if (encoding_context_ == null)
00113       ROS_DEBUG("About to encode with null encoding context.");
00114     rval = th_encode_ycbcr_in(encoding_context_, ycbcr_image);
00115     if (rval == TH_EFAULT)
00116       ROS_DEBUG("EFault in encoding.");
00117     if (rval == TH_EINVAL)
00118       ROS_DEBUG("EInval in encoding.");
00119     ROS_DEBUG("Encoding resulted in: %d", rval);
00120 
00121     ogg_packet oggpacket;
00122     theora_imagem_transport::packet output;
00123     ROS_DEBUG("Ready to get encoded packets.");
00124     while ((rval = th_encode_packetout(encoding_context_, 0, &oggpacket)) > 0)
00125     {
00126       oggPacketToMsg(oggpacket, output);
00127       ROS_DEBUG("Publishing packet!");
00128       publish_fn(output);
00129     }
00130     ROS_DEBUG("Punted from while loop with rval %d", rval);
00131   }
00132   else
00133     ROS_ERROR("Unable to convert from %s to bgr", message.encoding.c_str());
00134 }
00135 
00136 void TheoraPublisher::ensure_encoding_context(const CvSize &size, const PublishFn& publish_fn) const
00137 {
00138   if (encoding_context_ == null)
00139   {
00140     th_info encoder_setup;
00141     th_info_init(&encoder_setup);
00142     nearestWidth = size.width + size.width % 16 == 0 ? 0 : (16 - size.width % 16);
00143     nearestHeight = size.height + size.height % 16 == 0 ? 0 : (16 - size.height % 16);
00144 
00145     /* Theora has a divisible-by-sixteen restriction for the encoded frame size */
00146     /* scale the picture size up to the nearest /16 and calculate offsets */
00147     nearestWidth = (size.width + 15) & ~0xF;
00148     nearestHeight = (size.height + 15) & ~0xF;
00149     /*Force the offsets to be even so that chroma samples line up like we
00150      expect.*/
00151     nearestXoff = ((nearestWidth - size.width) >> 1) & ~1;
00152     nearestYoff = ((nearestHeight - size.height) >> 1) & ~1;
00153 
00154     encoder_setup.frame_width = nearestWidth;
00155     encoder_setup.frame_height = nearestHeight;
00156     encoder_setup.pic_width = size.width;
00157     encoder_setup.pic_height = size.height;
00158     encoder_setup.pic_x = nearestXoff;
00159     encoder_setup.pic_y = nearestYoff;
00160     ROS_DEBUG("Creating context with Width: %d, Height: %d", nearestWidth, nearestHeight);
00161     encoder_setup.colorspace = TH_CS_UNSPECIFIED;
00162     //encoder_setup.colorspace = TH_CS_ITU_REC_470M;     //TH_CS_ITU_REC_470M     A color space designed for NTSC content.
00163     //TH_CS_ITU_REC_470BG     A color space designed for PAL/SECAM content.
00164     encoder_setup.pixel_fmt = TH_PF_420; //see bottom of http://www.theora.org/doc/libtheora-1.1beta1/codec_8h.html
00165     int bitrate;
00166     getParamNode().param("theora_bitrate", bitrate, 800000);
00167     encoder_setup.target_bitrate = bitrate;
00168     //encoder_setup.quality = 63;    //On a scale of 0 to 63, to use this set target bitrate to 0
00169     encoder_setup.aspect_numerator = 1;
00170     encoder_setup.aspect_denominator = 1;
00171     encoder_setup.fps_numerator = 0;
00172     encoder_setup.fps_denominator = 0;
00173     encoder_setup.keyframe_granule_shift = 6; //Apparently a good default
00174 
00175     encoding_context_ = th_encode_alloc(&encoder_setup);
00176 
00177     if (encoding_context_ == null)
00178       ROS_DEBUG("No encoding context immediately after alloc.");
00179 
00180     th_comment comment;
00181     th_comment_init(&comment);
00182     th_comment_add(&comment, (char*)"Compression node written by Ethan.");
00183     comment.vendor = (char*)"Encoded by Willow Garage image_compression_node.";
00184 
00185     if (encoding_context_ == null)
00186       ROS_DEBUG("Encoding context not successfully created.");
00187 
00188     ogg_packet oggpacket;
00189     while (th_encode_flushheader(encoding_context_, &comment, &oggpacket) > 0)
00190     {
00191       stream_header_.push_back(oggpacket);
00192       //Have to deep copy the packet since theora owns the packet memory, not doing this causes nasty bugs and is very hard to track down!!!
00193       stream_header_.back().packet = new unsigned char[oggpacket.bytes];
00194       memcpy(stream_header_.back().packet, oggpacket.packet, oggpacket.bytes);
00195     }
00196     //ROS_DEBUG("Published %d header packets.", stream_header_.size());
00197     //th_comment_clear(&comment);  //TODO: this should happen but is causing crazy seg faults (probably trying to free a string literal)
00198 
00199     //Stream the header in case anyone is already listening
00200     theora_imagem_transport::packet msg;
00201     for (unsigned int i = 0; i < stream_header_.size(); i++)
00202     {
00203       oggPacketToMsg(stream_header_[i], msg);
00204       publish_fn(msg);
00205       ROS_DEBUG("Published header packet, sleeping for 0.1 second");
00206       ros::Duration d = ros::Duration(0, 100000000);
00207       d.sleep();
00208     }
00209 
00210     if (encoding_context_ == null)
00211       ROS_DEBUG("Encoding context killed by header flushing.");
00212   }
00213 }
00214 
00215 void TheoraPublisher::oggPacketToMsg(const ogg_packet &oggpacket, theora_imagem_transport::packet &msgOutput) const
00216 {
00217   msgOutput.blob.resize(oggpacket.bytes);
00218   memcpy(&msgOutput.blob[0], oggpacket.packet, oggpacket.bytes);
00219   msgOutput.bytes = oggpacket.bytes;
00220   msgOutput.b_o_s = oggpacket.b_o_s;
00221   msgOutput.e_o_s = oggpacket.e_o_s;
00222   msgOutput.granulepos = oggpacket.granulepos;
00223   msgOutput.packetno = oggpacket.packetno;
00224   //ROS_DEBUG("Ready to send %d bytes in packet#%d and granule%d (and this is BOS: %d).", msgOutput.bytes, msgOutput.packetno, msgOutput.granulepos, msgOutput.b_o_s);
00225   /*unsigned int i = 0;
00226   for (int j = 0; j < msgOutput.bytes; j++)
00227     i = i * 2 % 91 + msgOutput.blob[j];
00228   ROS_DEBUG("Checksum is: %d", i);*/
00229 }
00230 
00231 } //namespace theora_imagem_transport