libtoon: derivatives.h Source File

00001 #ifndef TOON_INCLUDE_DERIVATIVES_NUMERICAL_H
00002 #define TOON_INCLUDE_DERIVATIVES_NUMERICAL_H
00003 
00004 #include <TooN/TooN.h>
00005 #include <vector>
00006 #include <cmath>
00007 
00008 using namespace std;
00009 using namespace TooN;
00010 
00011 namespace TooN{
00012         namespace Internal{
00018                 template<class F, class Precision> std::pair<Precision, Precision> extrapolate_to_zero(F& f)
00019                 {
00020                         using std::isfinite;
00021                         using std::max;
00022                         using std::isnan;
00023                         //Use points at c^0, c^-1, ... to extrapolate towards zero.
00024                         const Precision c = 1.1, t = 2;
00025 
00026                         static const int Npts = 400;
00027 
00028                         /* Neville's table is:  
00029                         x_0      y_0    P_0                                 
00030                                                                   P_{01}                        
00031                         x_1      y_1    P_1            P_{012}              
00032                                                                   P_{12}             P_{0123}   
00033                         x_2      y_2    P_2            P_{123}              
00034                                                                   P_{23}                        
00035                         x_3      y_3    P_3   
00036 
00037                         In Matrix form, this is rearranged as:
00038                         
00039                         
00040                         x_0      y_0    P_0                                 
00041                                                                                                                   
00042                         x_1      y_1    P_1   P_{01}                 
00043                                                                                            
00044                         x_2      y_2    P_2   P_{12}  P_{012}               
00045                                                                                                                                            
00046                         x_3      y_3    P_3   P_{23}  P_{123} P_{0123} 
00047 
00048                         This is rewritten further as:
00049 
00050                                   |                  0      1      2       3
00051                          _____|___________________________________________________      
00052                           0   | x_0      y_0    P^00                                
00053                                   |                                               
00054                           1   | x_1      y_1    P^10  P^11                   
00055                                   |                                    
00056                           2   | x_2      y_2    P^10  P^21    P^22                  
00057                                   |                                                            
00058                           3   | x_3      y_3    P^10  P^31    P^23    P^33     
00059 
00060                         So, P^ij == P_{j-i...j}
00061 
00062                         Finally, only the current and previous row are required. This reduces
00063                         the memory cost from quadratic to linear.  The naming scheme is:
00064 
00065                         P[i][j]    -> P_i[j]
00066                         P[i-1][j]  -> P_i_1[j]
00067                         */
00068 
00069                         vector<Precision> P_i(1), P_i_1;
00070                         
00071                         Precision best_err = HUGE_VAL;
00072                         Precision best_point=HUGE_VAL;
00073 
00074                         //The first tranche of points might be bad.
00075                         //Don't quit while no good points have ever happened.
00076                         bool ever_ok=0;
00077 
00078                         //Compute f(x) as x goes from 1 towards zero and extrapolate to 0
00079                         Precision x=1;
00080                         for(int i=0; i < Npts; i++)
00081                         {
00082                                 swap(P_i, P_i_1);
00083                                 P_i.resize(i+2);
00084 
00085 
00086                                 //P[i][0] = c^ -i;
00087                                 P_i[0] = f(x);
00088 
00089                                 x /= c;
00090                                 
00091                                 //Compute the extrapolations
00092                                 //cj id c^j
00093                                 Precision cj = 1;
00094                                 bool better=0; //Did we get better?
00095                                 bool any_ok=0;
00096                                 for(int j=1; j <= i; j++)
00097                                 {
00098                                         cj *= c;
00099                                         //We have (from above) n = i and k = n - j
00100                                         //n-k = i - (n - j) = i - i + j = j
00101                                         //Therefore c^(n-k) is just c^j
00102 
00103                                         P_i[j] = (cj * P_i[j-1] - P_i_1[j-1]) / (cj - 1);
00104 
00105                                         if(any_ok || isfinite(P_i[j]))
00106                                                 ever_ok = 1;
00107 
00108                                         //Compute the difference between the current point (high order)
00109                                         //and the corresponding lower order point at the current step size
00110                                         Precision err1 = abs(P_i[j] - P_i[j-1]);
00111 
00112                                         //Compute the difference between two consecutive points at the
00113                                         //corresponding lower order point at the larger stepsize
00114                                         Precision err2 = abs(P_i[j] - P_i_1[j-1]);
00115 
00116                                         //The error is the larger of these.
00117                                         Precision err = max(err1, err2);
00118 
00119                                         if(err < best_err && isfinite(err))
00120                                         {
00121                                                 best_err = err;
00122                                                 best_point = P_i[j];
00123                                                 better=1;
00124                                         }
00125                                 }
00126                                 
00127                                 using namespace std;
00128                                 //If the highest order point got worse, or went off the rails, 
00129                                 //and some good points have been seen, then break.
00130                                 if(ever_ok && !better && i > 0 && (abs(P_i[i] - P_i_1[i-1]) > t * best_err|| isnan(P_i[i])))
00131                                         break;
00132                         }
00133 
00134                         return std::make_pair(best_point, best_err);
00135                 }
00136 
00140                 template<class Functor, class  Precision, int Size, class Base> struct CentralDifferenceGradient
00141                 {
00142                         const Vector<Size, Precision, Base>& v; 
00143                         Vector<Size, Precision> x;      
00144                         const Functor&  f;                      
00145                         int i;                                  
00146 
00147                         CentralDifferenceGradient(const Vector<Size, Precision, Base>& v_, const Functor& f_)
00148                         :v(v_),x(v),f(f_),i(0)
00149                         {}
00150                         
00152                         Precision operator()(Precision hh) 
00153                         {
00154                                 using std::max;
00155                                 using std::abs;
00156 
00157                                 //Make the step size be on the scale of the value.
00158                                 double h = hh * max(abs(v[i]) * 1e-3, 1e-3);
00159 
00160                                 x[i] = v[i] - h;
00161                                 double f1 = f(x);
00162                                 x[i] = v[i] + h;
00163                                 double f2 = f(x);
00164                                 x[i] = v[i];
00165 
00166                                 double d =  (f2 - f1) / (2*h);
00167                                 return d;
00168                         }
00169                 };
00170 
00174                 template<class Functor, class  Precision, int Size, class Base> struct CentralDifferenceSecond
00175                 {
00176                         const Vector<Size, Precision, Base>& v; 
00177                         Vector<Size, Precision> x;              
00178                         const Functor&  f;                      
00179                         int i;                                  
00180                         const Precision central;                
00181 
00182                         CentralDifferenceSecond(const Vector<Size, Precision, Base>& v_, const Functor& f_)
00183                         :v(v_),x(v),f(f_),i(0),central(f(x))
00184                         {}
00185                         
00187                         Precision operator()(Precision hh) 
00188                         {
00189                                 using std::max;
00190                                 using std::abs;
00191 
00192                                 //Make the step size be on the scale of the value.
00193                                 double h = hh * max(abs(v[i]) * 1e-3, 1e-3);
00194 
00195                                 x[i] = v[i] - h;
00196                                 double f1 = f(x);
00197 
00198                                 x[i] = v[i] + h;
00199                                 double f2 = f(x);
00200                                 x[i] = v[i];
00201 
00202                                 double d =  (f2 - 2*central + f1) / (h*h);
00203                                 return d;
00204                         }
00205                 };
00206 
00210                 template<class Functor, class  Precision, int Size, class Base> struct CentralCrossDifferenceSecond
00211                 {
00212                         const Vector<Size, Precision, Base>& v; 
00213                         Vector<Size, Precision> x;              
00214                         const Functor&  f;                      
00215                         int i;                                  
00216                         int j;                                  
00217 
00218                         CentralCrossDifferenceSecond(const Vector<Size, Precision, Base>& v_, const Functor& f_)
00219                         :v(v_),x(v),f(f_),i(0),j(0)
00220                         {}
00221                         
00223                         Precision operator()(Precision hh) 
00224                         {
00225                                 using std::max;
00226                                 using std::abs;
00227 
00228                                 //Make the step size be on the scale of the value.
00229                                 double h = hh * max(abs(v[i]) * 1e-3, 1e-3);
00230 
00231                                 x[i] = v[i] + h;
00232                                 x[j] = v[j] + h;
00233                                 double a = f(x);
00234 
00235                                 x[i] = v[i] - h;
00236                                 x[j] = v[j] + h;
00237                                 double b = f(x);
00238 
00239                                 x[i] = v[i] + h;
00240                                 x[j] = v[j] - h;
00241                                 double c = f(x);
00242 
00243 
00244                                 x[i] = v[i] - h;
00245                                 x[j] = v[j] - h;
00246                                 double d = f(x);
00247 
00248                                 return (a-b-c+d) / (4*h*h);
00249                         }
00250                 };
00251 
00252         }
00253 
00254 
00326         template<class F, int S, class P, class B> Vector<S, P> numerical_gradient(const F& f, const Vector<S, P, B>& x)
00327         {
00328                 using namespace Internal;
00329                 Vector<S> grad(x.size());
00330 
00331                 CentralDifferenceGradient<F, P, S, B> d(x, f);
00332 
00333                 for(int i=0; i < x.size(); i++)
00334                 {
00335                         d.i = i;
00336                         grad[i] = extrapolate_to_zero<CentralDifferenceGradient<F, P, S, B>, P>(d).first;
00337                 }
00338 
00339                 return grad;
00340         }
00341         
00350         template<class F, int S, class P, class B> Matrix<S,2,P> numerical_gradient_with_errors(const F& f, const Vector<S, P, B>& x)
00351         {
00352                 using namespace Internal;
00353                 Matrix<S,2> g(x.size(), 2);
00354 
00355                 CentralDifferenceGradient<F, P, S, B> d(x, f);
00356 
00357                 for(int i=0; i < x.size(); i++)
00358                 {
00359                         d.i = i;
00360                         pair<double, double> r= extrapolate_to_zero<CentralDifferenceGradient<F, P, S, B>, P>(d);
00361                         g[i][0] = r.first;
00362                         g[i][1] = r.second;
00363                 }
00364 
00365                 return g;
00366         }
00367 
00368         
00380         template<class F, int S, class P, class B> pair<Matrix<S, S, P>, Matrix<S, S, P> > numerical_hessian_with_errors(const F& f, const Vector<S, P, B>& x)
00381         {
00382                 using namespace Internal;
00383                 Matrix<S> hess(x.size(), x.size());
00384                 Matrix<S> errors(x.size(), x.size());
00385 
00386                 CentralDifferenceSecond<F, P, S, B> curv(x, f);
00387                 CentralCrossDifferenceSecond<F, P, S, B> cross(x, f);
00388 
00389                 //Perform the cross differencing.
00390 
00391                 for(int r=0; r < x.size(); r++)
00392                         for(int c=r+1; c < x.size(); c++)
00393                         {
00394                                 cross.i = r;
00395                                 cross.j = c;
00396                                 pair<double, double> e =  extrapolate_to_zero<CentralCrossDifferenceSecond<F, P, S, B>, P >(cross);
00397                                 hess[r][c] = hess[c][r] = e.first;
00398                                 errors[r][c] = errors[c][r] = e.second;
00399                         }
00400 
00401                 for(int i=0; i < x.size(); i++)
00402                 {
00403                         curv.i = i;
00404                         pair<double, double> e = extrapolate_to_zero<CentralDifferenceSecond<F, P, S, B>, P>(curv);
00405                         hess[i][i] = e.first;
00406                         errors[i][i] = e.second;
00407                 }
00408 
00409                 return make_pair(hess, errors);
00410         }
00411         
00418         template<class F, int S, class P, class B> Matrix<S, S, P> numerical_hessian(const F& f, const Vector<S, P, B>& x)
00419         {
00420                 using namespace Internal;
00421                 Matrix<S> hess(x.size(), x.size());
00422 
00423                 CentralDifferenceSecond<F, P, S, B> curv(x, f);
00424                 CentralCrossDifferenceSecond<F, P, S, B> cross(x, f);
00425 
00426                 //Perform the cross differencing.
00427                 for(int r=0; r < x.size(); r++)
00428                         for(int c=r+1; c < x.size(); c++)
00429                         {
00430                                 cross.i = r;
00431                                 cross.j = c;
00432                                 pair<double, double> e =  extrapolate_to_zero<CentralCrossDifferenceSecond<F, P, S, B>, P >(cross);
00433                                 hess[r][c] = hess[c][r] = e.first;
00434                         }
00435 
00436                 for(int i=0; i < x.size(); i++)
00437                 {
00438                         curv.i = i;
00439                         pair<double, double> e = extrapolate_to_zero<CentralDifferenceSecond<F, P, S, B>, P>(curv);
00440                         hess[i][i] = e.first;
00441                 }
00442 
00443                 return hess;
00444         }
00445 }
00446 
00447 #endif
00448