$search
00001 #ifndef _GP_SPARSE_UMFPACK_REGRESSION_HPP_ 00002 #define _GP_SPARSE_UMFPACK_REGRESSION_HPP_ 00003 00004 #include <vector> 00005 00006 #include <assert.h> 00007 00008 #include "gpReg/covarianceFunction.hpp" 00009 00010 //#include "umfpack.hpp" 00011 #include <boost/numeric/ublas/matrix.hpp> 00012 #include <boost/numeric/ublas/io.hpp> 00013 #include "gpReg/Matrix.hh" 00014 00015 // strange UBLAS compile errors concerning "same_impl_ex" (sth. with int vs. size_t) 00016 // source: ttp://archives.free.net.ph/message/20061016.230807.43f957a0.en.html 00017 // solutions: added -DNDEBUG to the compile options 00018 //template<class T> 00019 //BOOST_UBLAS_INLINE size_t same_impl_ex (const size_t &size1, const int &size2, const char *file, int line) { 00020 // BOOST_UBLAS_CHECK_EX (size1 == size2, file, line, bad_argument ()); 00021 // WARNING: this fails if size2 is negative! 00022 // return (size1<size2?(size_t)size1:(size_t)size2); 00023 //} 00024 00025 #define TVector boost::numeric::ublas::vector<double> 00026 #define TMatrix boost::numeric::ublas::compressed_matrix<double> 00027 00028 00029 using namespace std; 00030 00031 #define PROFILING 00032 00033 #ifdef PROFILING 00034 #include "gpReg/profiler.hpp" 00035 #endif 00036 00037 00038 // ----------------------------------------------------------------------- 00039 class GPSparseRegression 00040 { 00041 00042 public: 00043 00044 00045 00046 // ----------------------------------------------------------------------- 00047 GPSparseRegression( CovarianceFunction &covFunc, double sigmaNoise, 00048 GPSparseRegression *noiseGP = NULL ) : 00049 m_dataPoints(0), m_covFunc(covFunc), m_sigmaNoise(sigmaNoise) 00050 { 00051 s_iC = NULL; 00052 s_t = NULL; 00053 s_k = NULL; 00054 s_iCt = NULL; 00055 s_Cs = NULL; 00056 m_numDataPoints = 0; 00057 m_noiseGP = noiseGP; 00058 } 00059 00060 00061 00062 // ----------------------------------------------------------------------- 00063 // copy constructor 00064 GPSparseRegression( const GPSparseRegression &orig ) : m_covFunc(orig.m_covFunc), 00065 m_numDataPoints(orig.m_numDataPoints), m_dataPoints(orig.m_dataPoints), 00066 s_t(orig.s_t), m_sigmaNoise(orig.m_sigmaNoise) 00067 { 00068 fprintf( stderr, "ERROR (no copy const)\n" ); 00069 assert( false ); // not implemented 00070 } 00071 00072 // ----------------------------------------------------------------------- 00073 // copy constructor 00074 /* 00075 GPSparseRegression( const GPSparseRegression &orig ) : m_covFunc(orig.m_covFunc), 00076 m_numDataPoints(orig.m_numDataPoints), m_dataPoints(orig.m_dataPoints), 00077 s_t(orig.s_t), m_sigmaNoise(orig.m_sigmaNoise) { 00078 s_iC = NULL; 00079 if (orig.s_iC) { 00080 s_iC = new umfpack::umfpack_matrix_operator<double>( *(orig.s_iC) ); 00081 } 00082 s_t = NULL; 00083 if (orig.s_t) { 00084 s_t = new TVector( *(orig.s_t) ); 00085 } 00086 s_k = NULL; 00087 if (orig.s_k) { 00088 s_k = new TVector( *(orig.s_k) ); 00089 } 00090 s_iCt = NULL; 00091 if (orig.s_iCt) { 00092 s_iCt = new TVector( *(orig.s_iCt) ); 00093 } 00094 s_Cs = NULL; 00095 if (orig.s_Cs) { 00096 s_Cs = new TMatrix( *(orig.s_Cs) ); 00097 } 00098 s_noiseGP = NULL; 00099 if (orig.s_noiseGP) { 00100 s_noiseGP = new GPRegression( *(orig.s_noiseGP) ); 00101 } 00102 } 00103 */ 00104 00105 00106 00107 // ----------------------------------------------------------------------- 00108 ~GPSparseRegression() 00109 { 00110 if (s_iC) delete (s_iC); 00111 if (s_t) delete (s_t); 00112 if (s_k) delete (s_k); 00113 if (s_iCt) delete (s_iCt); 00114 if (s_Cs) delete (s_Cs); 00115 } 00116 00117 00118 00119 // ----------------------------------------------------------------------- 00120 void setSigmaNoise( double sigma ) { m_sigmaNoise = sigma; } 00121 00122 00123 00124 // ----------------------------------------------------------------------- 00125 void setDataPoints( vector<Vector> &dataPoints, vector<double> &dataTargets ) 00126 { 00127 // copy 00128 m_dataPoints = dataPoints; 00129 m_numDataPoints = dataPoints.size(); 00130 if (s_t) delete (s_t); 00131 s_t = new (TVector)(m_numDataPoints); 00132 for (int i=0; i<m_numDataPoints; i++) { 00133 (*s_t)(i) = dataTargets[i]; 00134 } 00135 } 00136 00137 00138 00139 // ----------------------------------------------------------------------- 00140 void buildGP() 00141 { 00142 assert( m_numDataPoints>0 ); 00143 using namespace boost::numeric::ublas; 00144 using namespace umfpack; 00145 00146 // coordinate_matrix: incredibly slow 00147 // matrix and compressed_matrix: fast and similar 00148 // default: row_major -> always have first matrix dimension in outer loop!!! 00149 //PROFILER_start(); 00150 TMatrix s_C( m_numDataPoints, m_numDataPoints ); 00151 double cov; 00152 double logNoise; 00153 for (int i1=0; i1<m_numDataPoints; i1++) { 00154 for (int i2=0; i2<m_numDataPoints; i2++) { 00155 cov = m_covFunc.getCov( m_dataPoints[i1], m_dataPoints[i2] ); 00156 if (i1==i2) { 00157 cov += exp(2.0 * m_sigmaNoise); 00158 if (m_noiseGP) { 00159 m_noiseGP->evalGP(m_dataPoints[i1],logNoise); 00160 cov += exp(logNoise); 00161 } 00162 } 00163 s_C(i1,i2) = cov; 00164 } 00165 } 00166 //fprintf( stdout, " build s_C: %f\n", PROFILER_getElapsed() ); 00167 00168 if (s_k) delete (s_k); 00169 s_k = new TVector(m_numDataPoints); 00170 if (s_Cs) delete (s_Cs); 00171 s_Cs = new umfpack_matrix_operator<double>::matrix_type(s_C); 00172 00173 //PROFILER_start(); 00174 if (s_iC) delete (s_iC); 00175 s_iC = new (umfpack_matrix_operator<double>)( *s_Cs ); 00176 //fprintf( stdout, "build s_iC: %f\n", PROFILER_getElapsed() ); 00177 00178 if (s_iCt) delete (s_iCt); 00179 s_iCt = new umfpack_matrix_operator<double>::vector_type(m_numDataPoints); 00180 00181 //PROFILER_start(); 00182 s_iC->apply( *s_t, *s_iCt ); 00183 //fprintf( stdout, "build s_iCt: %f\n", PROFILER_getElapsed() ); 00184 00185 //if (s_iCk) delete (s_iCk); 00186 //s_iCk = new (umfpack_matrix_operator<double>::vector_type)(m_numDataPoints); 00187 } 00188 00189 00190 00191 // ----------------------------------------------------------------------- 00192 void evalGP( Vector &x, double &mean, double &var ) 00193 { 00194 using namespace boost::numeric::ublas; 00195 using namespace umfpack; 00196 00197 for (int i1=0; i1<m_numDataPoints; i1++) { 00198 (*s_k)(i1) = m_covFunc.getCov( x, m_dataPoints[i1] ); 00199 } 00200 00201 TVector s_iCk(m_numDataPoints); 00202 s_iC->apply( *s_k, s_iCk ); 00203 00204 mean = inner_prod( *s_k, *s_iCt ); 00205 var = m_covFunc.getCov(x,x) - (inner_prod( *s_k, s_iCk ) ); 00206 00207 if (m_noiseGP) { 00208 double logNoise; 00209 m_noiseGP->evalGP( x, logNoise ); 00210 var += exp(logNoise); 00211 } 00212 } 00213 00214 00215 00216 // ----------------------------------------------------------------------- 00217 void evalGP( Vector &x, double &mean ) 00218 { 00219 using namespace boost::numeric::ublas; 00220 using namespace umfpack; 00221 00222 for (int i1=0; i1<m_numDataPoints; i1++) { 00223 (*s_k)(i1) = m_covFunc.getCov( x, m_dataPoints[i1] ); 00224 } 00225 00226 mean = inner_prod( *s_k, *s_iCt ); 00227 } 00228 00229 00230 00231 // ----------------------------------------------------------------------- 00232 void save() 00233 {} 00234 // ----------------------------------------------------------------------- 00235 void load() 00236 {} 00237 00238 00239 00240 // ----------------------------------------------------------------------- 00241 double getObservationLikelihood( vector<Vector> &dataPoints, vector<double> &observations ) 00242 { 00243 int numObs = dataPoints.size(); 00244 using namespace boost::numeric::ublas; 00245 00246 //TMatrix sK__(numObs,numObs); 00247 // compressed matrix: default: row_major 00248 //PROFILER_start(); 00249 TMatrix sK__(numObs,numObs); 00250 double cov; 00251 double logNoise; 00252 for (int i=0; i<numObs; i++) { 00253 for (int j=0; j<numObs; j++) { 00254 cov = m_covFunc.getCov( dataPoints[i], dataPoints[j] ); 00255 if (i==j) { 00256 cov += exp(2.0 * m_sigmaNoise); 00257 if (m_noiseGP) { 00258 m_noiseGP->evalGP( dataPoints[i], logNoise ); 00259 cov += exp(logNoise); 00260 } 00261 } 00262 sK__(i,j) = cov; 00263 } 00264 } 00265 //fprintf( stderr, " spReg.getLh(1): %f\n", PROFILER_getElapsed() ); 00266 00267 //PROFILER_start(); 00268 TMatrix sK_(numObs,m_numDataPoints); 00269 for (int i=0; i<numObs; i++) { 00270 for (int j=0; j<m_numDataPoints; j++) { 00271 sK_(i,j) = m_covFunc.getCov( dataPoints[i], m_dataPoints[j] ); 00272 } 00273 } 00274 00275 TVector y_(numObs); 00276 double mean; 00277 for (int i=0; i<numObs; i++) { 00278 evalGP( dataPoints[i], mean ); 00279 y_(i) = (observations[i]-mean); 00280 } 00281 00282 double y__ = inner_prod( y_, TVector(prod(sK__,y_)) ); 00283 //fprintf( stderr, " ** y_ %f\n", y__ ); 00284 00285 TVector ytB = prod( trans(sK_), y_ ); 00286 00287 TVector iCytB(m_numDataPoints); 00288 s_iC->apply( ytB, iCytB ); 00289 00290 double dotProd = inner_prod( ytB, iCytB ); 00291 //fprintf( stderr, " ** dp %f\n", dotProd ); 00292 00293 double dataFit = -0.5 * ( y__ - dotProd ); 00294 00295 //fprintf( stderr, " ** log df %f\n", dataFit ); 00296 00297 //fprintf( stderr, " spReg.getLh(2): %f\n", PROFILER_getElapsed() ); 00298 return dataFit; 00299 //double complPenalty = - 0.5 * log( K.Det() ); 00300 //double norm = - 0.5 * numObs*log(2.0*M_PI); 00301 //double logLh = dataFit + complPenalty + norm; 00302 //fprintf( stderr, "# dataFit: %f\t complPenalty: %f\t norm: %f\t logLh: %f\n", dataFit, complPenalty, norm, logLh ); 00303 //FILE *lhFile = fopen( "logLH.dat", "a" ); 00304 //fprintf( lhFile, "%f %f %f %f\n", dataFit, complPenalty, norm, logLh ); 00305 //fclose(lhFile); 00306 //return logLh; 00307 } 00308 00309 00310 // ----------------------------------------------------------------------- 00311 public: 00312 int m_numDataPoints; 00313 vector<Vector> m_dataPoints; 00314 vector<double> m_dataVariances; 00315 CovarianceFunction &m_covFunc; 00316 double m_sigmaNoise; 00317 GPSparseRegression *m_noiseGP; 00318 00319 //private: 00320 umfpack::umfpack_matrix_operator<double> *s_iC; 00321 TVector *s_t; 00322 TVector *s_k; 00323 TVector *s_iCt; 00324 umfpack::umfpack_matrix_operator<double>::matrix_type *s_Cs; 00325 00326 }; 00327 00328 00329 00330 #endif //_GP_SPARSE_UMFPACK_REGRESSION_HPP_