gaussian_process: gpSparseRegression.hpp Source File

Go to the documentation of this file.
00001 #ifndef _GP_SPARSE_UMFPACK_REGRESSION_HPP_
00002 #define _GP_SPARSE_UMFPACK_REGRESSION_HPP_
00003 
00004 #include <vector>
00005 
00006 #include <assert.h>
00007 
00008 #include "gpReg/covarianceFunction.hpp"
00009 
00010 //#include "umfpack.hpp"
00011 #include <boost/numeric/ublas/matrix.hpp>
00012 #include <boost/numeric/ublas/io.hpp>
00013 #include "gpReg/Matrix.hh"
00014 
00015 // strange UBLAS compile errors concerning "same_impl_ex" (sth. with int vs. size_t)
00016 // source: ttp://archives.free.net.ph/message/20061016.230807.43f957a0.en.html
00017 // solutions: added -DNDEBUG to the compile options
00018 //template<class T>
00019 //BOOST_UBLAS_INLINE size_t same_impl_ex (const size_t &size1, const int &size2, const char *file, int line) {
00020 //  BOOST_UBLAS_CHECK_EX (size1 == size2, file, line, bad_argument ());
00021   // WARNING: this fails if size2 is negative!
00022 //  return (size1<size2?(size_t)size1:(size_t)size2);
00023 //}
00024 
00025 #define TVector boost::numeric::ublas::vector<double>
00026 #define TMatrix boost::numeric::ublas::compressed_matrix<double>
00027 
00028 
00029 using namespace std;
00030 
00031 #define PROFILING
00032 
00033 #ifdef PROFILING
00034 #include "gpReg/profiler.hpp"
00035 #endif
00036 
00037 
00038 // -----------------------------------------------------------------------
00039 class GPSparseRegression
00040 {
00041   
00042   public:
00043   
00044 
00045   
00046     // -----------------------------------------------------------------------
00047     GPSparseRegression( CovarianceFunction &covFunc, double sigmaNoise,
00048         GPSparseRegression *noiseGP = NULL ) :
00049         m_dataPoints(0), m_covFunc(covFunc), m_sigmaNoise(sigmaNoise)
00050     {
00051       s_iC  = NULL;
00052       s_t   = NULL;
00053       s_k   = NULL;
00054       s_iCt = NULL;
00055       s_Cs  = NULL;
00056       m_numDataPoints = 0;
00057       m_noiseGP = noiseGP;
00058     }
00059     
00060 
00061  
00062   // -----------------------------------------------------------------------
00063   // copy constructor
00064   GPSparseRegression( const GPSparseRegression &orig ) : m_covFunc(orig.m_covFunc),
00065       m_numDataPoints(orig.m_numDataPoints), m_dataPoints(orig.m_dataPoints),
00066       s_t(orig.s_t), m_sigmaNoise(orig.m_sigmaNoise)
00067   {
00068         fprintf( stderr, "ERROR (no copy const)\n" );
00069         assert( false ); // not implemented
00070   }
00071   
00072   // -----------------------------------------------------------------------
00073   // copy constructor
00074   /*
00075   GPSparseRegression( const GPSparseRegression &orig ) : m_covFunc(orig.m_covFunc),
00076         m_numDataPoints(orig.m_numDataPoints), m_dataPoints(orig.m_dataPoints),
00077         s_t(orig.s_t), m_sigmaNoise(orig.m_sigmaNoise) {
00078       s_iC = NULL;
00079       if (orig.s_iC) {
00080         s_iC = new umfpack::umfpack_matrix_operator<double>( *(orig.s_iC) );
00081       }
00082       s_t = NULL;
00083       if (orig.s_t) {
00084         s_t = new TVector( *(orig.s_t) );
00085       }
00086       s_k = NULL;
00087       if (orig.s_k) {
00088         s_k = new TVector( *(orig.s_k) );
00089       }
00090       s_iCt = NULL;
00091       if (orig.s_iCt) {
00092         s_iCt = new TVector( *(orig.s_iCt) );
00093       }
00094       s_Cs = NULL;
00095       if (orig.s_Cs) {
00096         s_Cs = new TMatrix( *(orig.s_Cs) );
00097       }
00098       s_noiseGP = NULL;
00099       if (orig.s_noiseGP) {
00100         s_noiseGP = new GPRegression( *(orig.s_noiseGP) );
00101       }
00102     }
00103    */
00104     
00105 
00106 
00107     // -----------------------------------------------------------------------
00108     ~GPSparseRegression()
00109     {
00110       if (s_iC) delete (s_iC);
00111       if (s_t) delete (s_t);
00112       if (s_k) delete (s_k);
00113       if (s_iCt) delete (s_iCt);
00114       if (s_Cs) delete (s_Cs);
00115     }
00116 
00117 
00118 
00119     // -----------------------------------------------------------------------
00120     void setSigmaNoise( double sigma ) { m_sigmaNoise = sigma; }
00121 
00122 
00123 
00124     // -----------------------------------------------------------------------
00125     void setDataPoints( vector<Vector> &dataPoints, vector<double> &dataTargets )
00126     {
00127       // copy
00128       m_dataPoints = dataPoints;
00129       m_numDataPoints = dataPoints.size();
00130       if (s_t) delete (s_t);
00131       s_t = new (TVector)(m_numDataPoints);
00132       for (int i=0; i<m_numDataPoints; i++) {
00133         (*s_t)(i) = dataTargets[i];
00134       }
00135     }      
00136 
00137 
00138 
00139     // -----------------------------------------------------------------------
00140     void buildGP()
00141     {
00142       assert( m_numDataPoints>0 );
00143       using namespace boost::numeric::ublas;
00144       using namespace umfpack;
00145 
00146       // coordinate_matrix: incredibly slow
00147       // matrix and compressed_matrix: fast and similar
00148       // default: row_major -> always have first matrix dimension in outer loop!!!
00149       //PROFILER_start();
00150       TMatrix s_C( m_numDataPoints, m_numDataPoints );
00151       double cov;
00152       double logNoise;
00153       for (int i1=0; i1<m_numDataPoints; i1++) {
00154         for (int i2=0; i2<m_numDataPoints; i2++) {
00155                   cov = m_covFunc.getCov( m_dataPoints[i1], m_dataPoints[i2] );
00156           if (i1==i2) {
00157             cov += exp(2.0 * m_sigmaNoise);
00158             if (m_noiseGP) {
00159               m_noiseGP->evalGP(m_dataPoints[i1],logNoise);
00160               cov += exp(logNoise);
00161             }
00162           }
00163           s_C(i1,i2) = cov;
00164         }
00165       }
00166       //fprintf( stdout, "  build s_C: %f\n", PROFILER_getElapsed() );
00167 
00168       if (s_k) delete (s_k);
00169       s_k = new TVector(m_numDataPoints);
00170       if (s_Cs) delete (s_Cs);
00171       s_Cs = new umfpack_matrix_operator<double>::matrix_type(s_C);
00172 
00173       //PROFILER_start();
00174       if (s_iC) delete (s_iC);
00175       s_iC = new (umfpack_matrix_operator<double>)( *s_Cs );
00176       //fprintf( stdout, "build s_iC: %f\n", PROFILER_getElapsed() );
00177 
00178       if (s_iCt) delete (s_iCt);
00179       s_iCt = new umfpack_matrix_operator<double>::vector_type(m_numDataPoints);
00180 
00181       //PROFILER_start();
00182       s_iC->apply( *s_t, *s_iCt );
00183       //fprintf( stdout, "build s_iCt: %f\n", PROFILER_getElapsed() );
00184 
00185       //if (s_iCk) delete (s_iCk);
00186       //s_iCk = new (umfpack_matrix_operator<double>::vector_type)(m_numDataPoints);
00187     }
00188 
00189 
00190 
00191     // -----------------------------------------------------------------------
00192     void evalGP( Vector &x, double &mean, double &var )
00193     {
00194       using namespace boost::numeric::ublas;
00195       using namespace umfpack;
00196 
00197       for (int i1=0; i1<m_numDataPoints; i1++) {
00198         (*s_k)(i1) =  m_covFunc.getCov( x, m_dataPoints[i1] );
00199       }
00200 
00201       TVector s_iCk(m_numDataPoints);
00202       s_iC->apply( *s_k, s_iCk );
00203 
00204       mean = inner_prod( *s_k, *s_iCt );
00205       var = m_covFunc.getCov(x,x) - (inner_prod( *s_k, s_iCk ) );
00206 
00207       if (m_noiseGP) {
00208         double logNoise;
00209         m_noiseGP->evalGP( x, logNoise );
00210         var += exp(logNoise);
00211       }
00212     }
00213 
00214 
00215 
00216     // -----------------------------------------------------------------------
00217     void evalGP( Vector &x, double &mean )
00218     {
00219       using namespace boost::numeric::ublas;
00220       using namespace umfpack;
00221 
00222       for (int i1=0; i1<m_numDataPoints; i1++) {
00223         (*s_k)(i1) = m_covFunc.getCov( x, m_dataPoints[i1] );
00224       }
00225 
00226       mean = inner_prod( *s_k, *s_iCt );
00227     }
00228 
00229 
00230 
00231     // -----------------------------------------------------------------------
00232     void save()
00233     {}
00234     // -----------------------------------------------------------------------
00235     void load()
00236     {}
00237 
00238 
00239 
00240     // -----------------------------------------------------------------------
00241     double getObservationLikelihood( vector<Vector> &dataPoints, vector<double> &observations )
00242     {
00243       int numObs = dataPoints.size();
00244       using namespace boost::numeric::ublas;
00245 
00246       //TMatrix sK__(numObs,numObs);
00247       // compressed matrix: default: row_major
00248       //PROFILER_start();
00249       TMatrix sK__(numObs,numObs);
00250       double cov;
00251       double logNoise;
00252       for (int i=0; i<numObs; i++) {
00253         for (int j=0; j<numObs; j++) {
00254                   cov = m_covFunc.getCov( dataPoints[i], dataPoints[j] );
00255           if (i==j) {
00256             cov += exp(2.0 * m_sigmaNoise);
00257             if (m_noiseGP) {
00258               m_noiseGP->evalGP( dataPoints[i], logNoise );
00259               cov += exp(logNoise);
00260             }
00261           }
00262           sK__(i,j) = cov; 
00263         }
00264       }
00265       //fprintf( stderr, "  spReg.getLh(1): %f\n", PROFILER_getElapsed() );
00266 
00267       //PROFILER_start();
00268       TMatrix sK_(numObs,m_numDataPoints);
00269       for (int i=0; i<numObs; i++) {
00270         for (int j=0; j<m_numDataPoints; j++) {
00271           sK_(i,j) = m_covFunc.getCov( dataPoints[i], m_dataPoints[j] );
00272         }
00273       }
00274 
00275       TVector y_(numObs);
00276       double mean;
00277       for (int i=0; i<numObs; i++) {
00278         evalGP( dataPoints[i], mean );
00279         y_(i) = (observations[i]-mean);
00280       }
00281 
00282       double y__ = inner_prod( y_, TVector(prod(sK__,y_)) );
00283       //fprintf( stderr, "  ** y_ %f\n", y__ );
00284 
00285       TVector ytB = prod( trans(sK_), y_ );
00286       
00287       TVector iCytB(m_numDataPoints);
00288       s_iC->apply( ytB, iCytB );
00289 
00290       double dotProd = inner_prod( ytB, iCytB );
00291       //fprintf( stderr, "  ** dp %f\n", dotProd );
00292       
00293       double dataFit = -0.5 * ( y__ - dotProd );
00294 
00295       //fprintf( stderr, "  ** log df %f\n", dataFit );
00296 
00297       //fprintf( stderr, "  spReg.getLh(2): %f\n", PROFILER_getElapsed() );
00298       return dataFit;
00299       //double complPenalty = - 0.5 * log( K.Det() );
00300       //double norm = - 0.5 * numObs*log(2.0*M_PI);
00301       //double logLh = dataFit + complPenalty + norm;
00302       //fprintf( stderr, "# dataFit: %f\t complPenalty: %f\t norm: %f\t logLh: %f\n", dataFit, complPenalty, norm, logLh );
00303       //FILE *lhFile = fopen( "logLH.dat", "a" );
00304       //fprintf( lhFile, "%f %f %f %f\n", dataFit, complPenalty, norm, logLh );
00305       //fclose(lhFile);
00306       //return logLh;
00307     }
00308 
00309 
00310     // -----------------------------------------------------------------------
00311   public:
00312     int m_numDataPoints;
00313     vector<Vector> m_dataPoints;
00314     vector<double> m_dataVariances;
00315     CovarianceFunction &m_covFunc;
00316     double              m_sigmaNoise;
00317     GPSparseRegression *m_noiseGP;
00318   
00319   //private:
00320     umfpack::umfpack_matrix_operator<double> *s_iC;
00321     TVector *s_t;
00322     TVector *s_k;
00323     TVector *s_iCt;
00324     umfpack::umfpack_matrix_operator<double>::matrix_type *s_Cs;
00325 
00326 };
00327 
00328 
00329 
00330 #endif //_GP_SPARSE_UMFPACK_REGRESSION_HPP_