ObjectProjection.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 #
00003 # Copyright (C) 2009 Rosen Diankov
00004 # 
00005 # Licensed under the Apache License, Version 2.0 (the "License");
00006 # you may not use this file except in compliance with the License.
00007 # You may obtain a copy of the License at
00008 #     http://www.apache.org/licenses/LICENSE-2.0
00009 # 
00010 # Unless required by applicable law or agreed to in writing, software
00011 # distributed under the License is distributed on an "AS IS" BASIS,
00012 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013 # See the License for the specific language governing permissions and
00014 # limitations under the License. 
00015 from __future__ import with_statement # for python 2.5
00016 PKG = 'posedetectiondb' # this package name
00017 import roslib; roslib.load_manifest(PKG) 
00018 import os, sys, time, threading, string, struct
00019 from optparse import OptionParser
00020 from numpy import *
00021 import scipy
00022 import scipy.signal
00023 import scipy.ndimage
00024 import numpy
00025 
00026 #import cv
00027 #from cv_bridge import CvBridge, CvBridgeError
00028 
00029 from openravepy import *
00030 from openravepy import pyANN
00031 
00032 class ObjectProjection(metaclass.AutoReloader):
00033     """Offers several functions for querying projection results of objects whose geometry is loaded into openrave"""
00034     def __init__(self, kinbodydata=None,kinbodyfilename=None):
00035         self.orenv = Environment()
00036         orcol = self.orenv.CreateCollisionChecker('ode')
00037         if orcol is not None:
00038             self.orenv.SetCollisionChecker(orcol)
00039         #self.orenv.SetViewer('qtcoin',False)
00040         if kinbodydata is not None:
00041             self.orobj = self.orenv.ReadKinBodyXMLData(kinbodydata)
00042         elif kinbodyfilename is not None:
00043             self.orobj = self.orenv.ReadKinBodyXMLFile(kinbodyfilename)
00044         self.orenv.AddKinBody(self.orobj)
00045         self.orobj.SetTransform(eye(4))
00046         self.ab = self.orobj.ComputeAABB()
00047         self.maxradius = sqrt(sum(self.ab.extents()**2))
00048         self.vertices = transpose(self.orenv.Triangulate(self.orobj).vertices)
00049 
00050     def __del__(self):
00051         self.orenv.Destroy()
00052 
00053     def Compute3DPositionImage(self,Tcamera, KK, imagewidth,imageheight,ROI=None,buffer=10,computedepth=True):
00054         """Given the camera position, its intrisnic matrix, and its region of interest [upperleft_xy lowerright_xy],
00055         compute the position and depth maps. The position map gives the local 3d surface position of every image point.
00056         if computedepth is True, second returned parameter will be depthmap, otherwise it is the mask.
00057         """
00058         projpts = self.ProjectPoints(self.vertices,linalg.inv(Tcamera),KK)
00059         if ROI is None:
00060             bbmin = numpy.min(projpts,1)
00061             bbmax = numpy.max(projpts,1)
00062             ROI = [max(0,floor(bbmin[0]-buffer)),max(0,floor(bbmin[1]-buffer)),min(imagewidth,floor(bbmax[0]+2*buffer)),min(imageheight,floor(bbmax[1]+2*buffer))]
00063         ROI = [max(0,ROI[0]), max(0,ROI[1]), min(imagewidth,ROI[2]), min(imageheight,ROI[3])]
00064         width = int(ROI[2]-ROI[0])
00065         height = int(ROI[3]-ROI[1])
00066         offset = [int(ROI[0]),int(ROI[1])]
00067         if width <= 0 or height <= 0:
00068             return array(()),array(()),KK,offset
00069         newKK = array(KK)
00070         newKK[0,2] -= offset[0]
00071         newKK[1,2] -= offset[1]
00072         inds = array(range(width*height))
00073         imagepoints = array((mod(inds,width),floor(inds/width)))
00074         camerapoints = transpose(dot(linalg.inv(newKK), r_[imagepoints,ones((1,imagepoints.shape[1]))]))
00075         hitindices,hitpositions = self.Get3DPointsFromImageRays(camerapoints, Tcamera)
00076         infinds = flatnonzero(1-hitindices)
00077         hitpositions[infinds,:] = inf
00078         # create an image of positions
00079         if computedepth:
00080             Idepth = dot(hitpositions,Tcamera[0:3,2])-dot(Tcamera[0:3,2],Tcamera[0:3,3])
00081             Idepth = reshape(Idepth,(height,width))
00082         else:
00083             Idepth = reshape(isinf(hitpositions[:,0])==False,(height,width))
00084         Iposition = reshape(hitpositions,(height,width,3))
00085         return Iposition,Idepth,newKK,offset
00086 
00087     def Get3DPointsFromImageRays(self,*args,**kwargs):
00088         hitindices,hitpositions = self.GetContactPointsFromImageRays(*args,**kwargs)
00089         return hitindices,hitpositions[:,0:3]
00090 
00091     def GetContactPointsFromImageRays(self,camerapoints, Tcamera,rayneardist=0):
00092         """Retrives the positions on the object of points in camera space.
00093         camerapoints is Nx3 array
00094         Returns hitindices (N points) ,hitpositions (Nx3 vector)"""
00095         if camerapoints.size == 0:
00096             return array(()),array(())
00097         maxdist = fabs(dot(Tcamera[0:3,2],Tcamera[0:3,3]))+self.maxradius+0.2
00098         raydirs = dot(camerapoints / tile(vstack(sqrt(sum(camerapoints**2,1))),(1,3)),transpose(Tcamera[0:3,0:3]))
00099         rays = c_[tile(Tcamera[0:3,3],(raydirs.shape[0],1))+rayneardist*raydirs,raydirs*maxdist]
00100         with self.orenv:
00101             indices,info = self.orenv.CheckCollisionRays(rays,self.orobj)
00102             return indices,info
00103 
00104     @staticmethod
00105     def ProjectPoints(pts,T,KK):
00106         pts3d = dot(T[0:3,0:3],pts)+tile(T[0:3,3:4],(1,pts.shape[1]))
00107         return dot(KK[0:2,0:3],pts3d)/tile(pts3d[2:3,:],(2,1))
00108     @staticmethod
00109     def ComputeRigidTransform(points1, points2,doscale=False):
00110         """Finds an affine k+1xk+1 transformation T such that T*transpose(points1)=transpose(points2). points1, points2 is a Nxk array"""
00111         assert(points1.shape == points2.shape)
00112         N = points1.shape[0]
00113         points1mean = mean(points1,0)
00114         points2mean = mean(points2,0)
00115         [U,s,Vh] = svd( dot(transpose(points1-tile(points1mean,(N,1))),points2-tile(points2mean,(N,1))) )
00116         I = eye(points1.shape[1])
00117         I[-1,-1] = det(dot(U,Vh))
00118         R = transpose(dot(dot(U,I),Vh))
00119         if doscale:
00120             R = dot(R, diag(s))
00121         T = eye(points1.shape[1]+1)
00122         T[0:-1,0:-1] = R
00123         T[0:-1,-1] = points2mean-dot(R,points1mean)
00124         return T
00125     @staticmethod
00126     def CameraQuaternionDistSqr(q,qarray,angleweight=0.0):
00127         """ distance between two quaternions ignoring left rotation around z axis of qarray"""
00128         sinang = -q[0]*qarray[:,3]-q[1]*qarray[:,2]+q[2]*qarray[:,1]+q[3]*qarray[:,0]
00129         cosang = q[0]*qarray[:,0]+q[1]*qarray[:,1]+q[2]*qarray[:,2]+q[3]*qarray[:,3]
00130         length = 1.0/sqrt(sinang**2+cosang**2)
00131         angle = atan2(sinang,cosange)
00132         sinang *= length
00133         cosang *= length
00134         return quatArrayTDist(q,c_[cosang*qarray[:,0]-sinang*qarray[:,3], cosang*qarray[:,1]-sinang*qarray[:,2], cosang*qarray[:,2]-sinang*qarray[:,1], cosang*qarray[:,3]-sinang*qarray[:,0]])**2+angleweight*angle**2
00135     @staticmethod
00136     def CameraPoseDistSqr(pose,posearray,rotweightsqr=0.3,angleweight=0.0):
00137         """distance between two poses ignoring left rotation around zaxis of posearray. Squared quaternion distance is scaled by rotweightsqr"""
00138         return sum((tile(pose[4:7],(len(posearray),1))-posearray[:,4:7])**2,1)+rotweightsqr*ObjectProjection.CameraQuaternionDistSqr(pose[0:4],posearray[:,0:4],angleweight=angleweight)
00139     @staticmethod
00140     def ComputeNormalizationTransformation(points):
00141         """Computes a k+1xk+1 affine transformation to normalize a Nxk points array"""
00142         pointsmean = mean(points, 0)
00143         pointsnorm = points - tile(pointsmean, (points.shape[0],1))
00144         pointssigma = dot(transpose(pointsnorm), pointsnorm) / points.shape[0]
00145         [U, s, Vh] = svd(pointssigma)
00146         T = eye(points.shape[1]+1)
00147         T[0:-1,0:-1] = transpose(U)
00148         T[0:-1,-1] = -dot(transpose(U),pointsmean)
00149         return T
00150     @staticmethod
00151     def Compute3DPositionFromDepth(Idepth,Tcamera,KK):
00152         depthr,depthc = nonzero(isfinite(Idepth))
00153         depth = Idepth[depthr,depthc]
00154         camerapos = dot(c_[depthc*depth,depthr*depth,depth],transpose(linalg.inv(KK)))
00155         Iposition = tile(inf,(Idepth.shape[0],Idepth.shape[1],3))
00156         # transform by Tcamera
00157         pos = dot(camerapos,transpose(Tcamera[0:3,0:3]))+tile(Tcamera[0:3,3],(camerapos.shape[0],1))
00158         Iposition[depthr,depthc,:] = dot(camerapos,transpose(Tcamera[0:3,0:3]))+tile(Tcamera[0:3,3],(camerapos.shape[0],1))
00159         return Iposition
00160 
00161     @staticmethod
00162     def FillDepthMap(Idepth,dilateradius=8):
00163         Imask = isfinite(Idepth)
00164         Idepthfilled = array(Idepth)
00165         if dilateradius > 0:
00166             Ieroded = scipy.signal.convolve2d(Imask,ObjectProjection.streldisk(dilateradius),'same')==0
00167         else:
00168             Ieroded = Imask==False;
00169         
00170         L,numlabels = scipy.ndimage.label(Ieroded,[[1,1,1], [1,1,1], [1,1,1]])
00171         props = ObjectProjection.ImageRegionProps(L)
00172         if len(props) == 0:
00173             return Idepthfilled
00174 
00175         for prop in props:
00176             if prop['area'] < 5000:
00177                 Ieroded[L==prop['id']] = False
00178         Imaskfilled = Ieroded==False
00179         
00180         # fill the depth map
00181         fillr,fillc = nonzero(logical_and(Imask==False,Imaskfilled))
00182         if len(fillr) > 0:
00183             # find the 8 nearest neighbors of each point, fit a plane through them,
00184             # and see where the query point lies on that z of that plane
00185             maskr,maskc = nonzero(Imask)
00186             maskpoints = c_[maskr,maskc]
00187             if len(maskpoints) > 0:
00188                 maskdepth = Idepth[maskr,maskc]
00189                 kdtree = pyANN.KDTree(maskpoints)
00190                 neighs,dists = kdtree.kSearchArray(c_[fillr,fillc],8,0.1)
00191                 for i,n in enumerate(neighs):
00192                     A = c_[maskpoints[n,:],maskdepth[n]]
00193                     mA = A-tile(mean(A,0),(A.shape[0],1))
00194                     u,s,v = linalg.svd(dot(transpose(mA),mA))
00195                     # compute the plane and evaluate at the middle point
00196                     planedist = -mean(dot(A,u[:,2:3]))
00197                     Idepthfilled[fillr[i],fillc[i]] = -(u[0,2]*fillr[i]+u[1,2]*fillc[i]+planedist)/u[2,2]
00198         return Idepthfilled
00199 
00200     @staticmethod
00201     def ImageRegionProps(L):
00202         """calculates region properties of a labeled image similar to MATLAB's regionprops
00203         Output:
00204           props - structure where each element contains
00205           Area - number of pixels in region
00206           BoundingBox - [startrow startcol numrows numcols]
00207           """
00208         ids = unique(L).tolist()
00209         if 0 in ids:
00210             ids.remove(0)
00211         props = []
00212         for id in ids:
00213             prop = dict()
00214             r,c = nonzero(L==id)
00215             prop['id'] = id
00216             prop['area'] = len(r)
00217             ul = [numpy.min(c),numpy.min(r)]
00218             prop['boundingbox'] = ul+[numpy.max(c)-ul[0],numpy.max(r)-ul[1]]
00219             props.append(prop)
00220         return props
00221 
00222     @staticmethod
00223     def streldisk(r):
00224         if r == 1:
00225             return array([[0,1,0],[1,1,1],[0,1,0]])
00226         elif r == 2:
00227             return array([[0,0,1,0,0],[0,1,1,1,0],[1,1,1,1,1],[0,1,1,1,0],[0,0,1,0,0]])
00228         numempty = 2+4*int((r-4)/7)+2*int(mod(r-4,7)/4) #don't ask.. this is the function matlab uses
00229         A = ones((2*r-1, 2*r-1))
00230         for i in range(numempty):
00231             A[i,0:(numempty-i)] = 0
00232             A[-i-1,0:(numempty-i)] = 0
00233             A[i,(-numempty+i):] = 0
00234             A[-i-1,(-numempty+i):] = 0
00235         return A
00236 
00237     @staticmethod
00238     def imshow(I):
00239         If = array(I,'float')
00240         a = numpy.min([m for m in If.flat if isfinite(m)])
00241         b = numpy.max([m for m in If.flat if isfinite(m)])
00242         if fabs(a-b)<1e-5:
00243             a=0
00244         scipy.misc.pilutil.imshow(array((If-a)/(b-a)*255,'uint8'))
00245 
00246 def test():
00247     import ObjectProjection
00248     I = scipy.misc.pilutil.imread('image001.png')
00249     self = ObjectProjection.ObjectProjection(kinbodyfilename='brkt_hvac.kinbody.xml')
00250     Tcamera = linalg.inv(array(((-0.2276663,0.0930725,0.9692811,-0.1170402),
00251                      (-0.4038756,-0.9147867,-0.0070232,-0.0036750),
00252                      (0.8860320, -0.3930678, 0.2458557,1.8410743),
00253                      (0,0,0,1))))
00254     KK = array(((3.4724e+03,0.0000e+00,5.8521e+02),
00255                 (0.0000e+00,3.4861e+03,3.8965e+02),
00256                 (0.0000e+00,0.0000e+00,1.0000e+00)))
00257     imagewidth = 1024
00258     imageheight = 768
00259     Iposition,Idepth,newKK,offset = self.Compute3DPositionImage(Tcamera=Tcamera,KK=KK,imagewidth=imagewidth,imageheight=imageheight)
00260     ObjectProjection.ObjectProjection.imshow(Iposition)
00261     ObjectProjection.ObjectProjection.imshow(Idepth)


posedetectiondb
Author(s): Rosen Diankov (rdiankov@cs.cmu.edu)
autogenerated on Tue Jan 27 2015 11:53:11