00001 00002 00003 00006 00007 // Copyright (C) 1993,4,5: R B Davies 00008 00009 #ifndef NEWMATNL_LIB 00010 #define NEWMATNL_LIB 0 00011 00012 #include "newmat.h" 00013 00014 #ifdef use_namespace 00015 namespace NEWMAT { 00016 #endif 00017 00018 00019 00020 /* 00021 00022 This is a beginning of a series of classes for non-linear optimisation. 00023 00024 At present there are two classes. FindMaximum2 is the basic optimisation 00025 strategy when one is doing an optimisation where one has first 00026 derivatives and estimates of the second derivatives. Class 00027 NonLinearLeastSquares is derived from FindMaximum2. This provides the 00028 functions that calculate function values and derivatives. 00029 00030 A third class is now added. This is for doing maximum-likelihood when 00031 you have first derviatives and something like the Fisher Information 00032 matrix (eg the variance covariance matrix of the first derivatives or 00033 minus the second derivatives - this matrix is assumed to be positive 00034 definite). 00035 00036 00037 00038 class FindMaximum2 00039 00040 Suppose T is the ColumnVector of parameters, F(T) the function we want 00041 to maximise, D(T) the ColumnVector of derivatives of F with respect to 00042 T, and S(T) the matrix of second derivatives. 00043 00044 Then the basic iteration is given a value of T, update it to 00045 00046 T - S.i() * D 00047 00048 where .i() denotes inverse. 00049 00050 If F was quadratic this would give exactly the right answer (except it 00051 might get a minimum rather than a maximum). Since F is not usually 00052 quadratic, the simple procedure would be to recalculate S and D with the 00053 new value of T and keep iterating until the process converges. This is 00054 known as the method of conjugate gradients. 00055 00056 In practice, this method may not converge. FindMaximum2 considers an 00057 iteration of the form 00058 00059 T - x * S.i() * D 00060 00061 where x is a number. It tries x = 1 and uses the values of F and its 00062 slope with respect to x at x = 0 and x = 1 to fit a cubic in x. It then 00063 choses x to maximise the resulting function. This gives our new value of 00064 T. The program checks that the value of F is getting better and carries 00065 out a variety of strategies if it is not. 00066 00067 The program also has a second strategy. If the successive values of T 00068 seem to be lying along a curve - eg we are following along a curved 00069 ridge, the program will try to fit this ridge and project along it. This 00070 does not work at present and is commented out. 00071 00072 FindMaximum2 has three virtual functions which need to be over-ridden by 00073 a derived class. 00074 00075 void Value(const ColumnVector& T, bool wg, Real& f, bool& oorg); 00076 00077 T is the column vector of parameters. The function returns the value of 00078 the function to f, but may instead set oorg to true if the parameter 00079 values are not valid. If wg is true it may also calculate and store the 00080 second derivative information. 00081 00082 bool NextPoint(ColumnVector& H, Real& d); 00083 00084 Using the value of T provided in the previous call of Value, find the 00085 conjugate gradients adjustment to T, that is - S.i() * D. Also return 00086 00087 d = D.t() * S.i() * D. 00088 00089 NextPoint should return true if it considers that the process has 00090 converged (d very small) and false otherwise. The previous call of Value 00091 will have set wg to true, so that S will be available. 00092 00093 Real LastDerivative(const ColumnVector& H); 00094 00095 Return the scalar product of H and the vector of derivatives at the last 00096 value of T. 00097 00098 The function Fit is the function that calls the iteration. 00099 00100 void Fit(ColumnVector&, int); 00101 00102 The arguments are the trial parameter values as a ColumnVector and the 00103 maximum number of iterations. The program calls a DataException if the 00104 initial parameters are not valid and a ConvergenceException if the 00105 process fails to converge. 00106 00107 00108 class NonLinearLeastSquares 00109 00110 This class is derived from FindMaximum2 and carries out a non-linear 00111 least squares fit. It uses a QR decomposition to carry out the 00112 operations required by FindMaximum2. 00113 00114 A prototype class R1_Col_I_D is provided. The user needs to derive a 00115 class from this which includes functions the predicted value of each 00116 observation its derivatives. An object from this class has to be 00117 provided to class NonLinearLeastSquares. 00118 00119 Suppose we observe n normal random variables with the same unknown 00120 variance and such the i-th one has expected value given by f(i,P) 00121 where P is a column vector of unknown parameters and f is a known 00122 function. We wish to estimate P. 00123 00124 First derive a class from R1_Col_I_D and override Real operator()(int i) 00125 to give the value of the function f in terms of i and the ColumnVector 00126 para defined in class R1_CoL_I_D. Also override ReturnMatrix 00127 Derivatives() to give the derivates of f at para and the value of i 00128 used in the preceeding call to operator(). Return the result as a 00129 RowVector. Construct an object from this class. Suppose in what follows 00130 it is called pred. 00131 00132 Now constuct a NonLinearLeastSquaresObject accessing pred and optionally 00133 an iteration limit and an accuracy critierion. 00134 00135 NonLinearLeastSquares NLLS(pred, 1000, 0.0001); 00136 00137 The accuracy critierion should be somewhat less than one and 0.0001 is 00138 about the smallest sensible value. 00139 00140 Define a ColumnVector P containing a guess at the value of the unknown 00141 parameter, and a ColumnVector Y containing the unknown data. Call 00142 00143 NLLS.Fit(Y,P); 00144 00145 If the process converges, P will contain the estimates of the unknown 00146 parameters. If it does not converge an exception will be generated. 00147 00148 The following member functions can be called after you have done a fit. 00149 00150 Real ResidualVariance() const; 00151 00152 The estimate of the variance of the observations. 00153 00154 void GetResiduals(ColumnVector& Z) const; 00155 00156 The residuals of the individual observations. 00157 00158 void GetStandardErrors(ColumnVector&); 00159 00160 The standard errors of the observations. 00161 00162 void GetCorrelations(SymmetricMatrix&); 00163 00164 The correlations of the observations. 00165 00166 void GetHatDiagonal(DiagonalMatrix&) const; 00167 00168 Forms a diagonal matrix of values between 0 and 1. If the i-th value is 00169 larger than, say 0.2, then the i-th data value could have an undue 00170 influence on your estimates. 00171 00172 00173 */ 00174 00175 class FindMaximum2 00176 { 00177 virtual void Value(const ColumnVector&, bool, Real&, bool&) = 0; 00178 virtual bool NextPoint(ColumnVector&, Real&) = 0; 00179 virtual Real LastDerivative(const ColumnVector&) = 0; 00180 public: 00181 void Fit(ColumnVector&, int); 00182 virtual ~FindMaximum2() {} // to keep gnu happy 00183 }; 00184 00185 class R1_Col_I_D 00186 { 00187 // The prototype for a Real function of a ColumnVector and an 00188 // integer. 00189 // You need to derive your function from this one and put in your 00190 // function for operator() and Derivatives() at least. 00191 // You may also want to set up a constructor to enter in additional 00192 // parameter values (that will not vary during the solve). 00193 00194 protected: 00195 ColumnVector para; // Current x value 00196 00197 public: 00198 virtual bool IsValid() { return true; } 00199 // is the current x value OK 00200 virtual Real operator()(int i) = 0; // i-th function value at current para 00201 virtual void Set(const ColumnVector& X) { para = X; } 00202 // set current para 00203 bool IsValid(const ColumnVector& X) 00204 { Set(X); return IsValid(); } 00205 // set para, check OK 00206 Real operator()(int i, const ColumnVector& X) 00207 { Set(X); return operator()(i); } 00208 // set para, return value 00209 virtual ReturnMatrix Derivatives() = 0; 00210 // return derivatives as RowVector 00211 virtual ~R1_Col_I_D() {} // to keep gnu happy 00212 }; 00213 00214 00215 class NonLinearLeastSquares : public FindMaximum2 00216 { 00217 // these replace the corresponding functions in FindMaximum2 00218 void Value(const ColumnVector&, bool, Real&, bool&); 00219 bool NextPoint(ColumnVector&, Real&); 00220 Real LastDerivative(const ColumnVector&); 00221 00222 Matrix X; // the things we need to do the 00223 ColumnVector Y; // QR triangularisation 00224 UpperTriangularMatrix U; // see the write-up in newmata.txt 00225 ColumnVector M; 00226 Real errorvar, criterion; 00227 int n_obs, n_param; 00228 const ColumnVector* DataPointer; 00229 RowVector Derivs; 00230 SymmetricMatrix Covariance; 00231 DiagonalMatrix SE; 00232 R1_Col_I_D& Pred; // Reference to predictor object 00233 int Lim; // maximum number of iterations 00234 00235 public: 00236 NonLinearLeastSquares(R1_Col_I_D& pred, int lim=1000, Real crit=0.0001) 00237 : criterion(crit), Pred(pred), Lim(lim) {} 00238 void Fit(const ColumnVector&, ColumnVector&); 00239 Real ResidualVariance() const { return errorvar; } 00240 void GetResiduals(ColumnVector& Z) const { Z = Y; } 00241 void GetStandardErrors(ColumnVector&); 00242 void GetCorrelations(SymmetricMatrix&); 00243 void GetHatDiagonal(DiagonalMatrix&) const; 00244 00245 private: 00246 void MakeCovariance(); 00247 }; 00248 00249 00250 // The next class is the prototype class for calculating the 00251 // log-likelihood. 00252 // I assume first derivatives are available and something like the 00253 // Fisher Information or variance/covariance matrix of the first 00254 // derivatives or minus the matrix of second derivatives is 00255 // available. This matrix must be positive definite. 00256 00257 class LL_D_FI 00258 { 00259 protected: 00260 ColumnVector para; // current parameter values 00261 bool wg; // true if FI matrix wanted 00262 00263 public: 00264 virtual void Set(const ColumnVector& X) { para = X; } 00265 // set parameter values 00266 virtual void WG(bool wgx) { wg = wgx; } 00267 // set wg 00268 00269 virtual bool IsValid() { return true; } 00270 // return true is para is OK 00271 bool IsValid(const ColumnVector& X, bool wgx=true) 00272 { Set(X); WG(wgx); return IsValid(); } 00273 00274 virtual Real LogLikelihood() = 0; // return the loglikelihhod 00275 Real LogLikelihood(const ColumnVector& X, bool wgx=true) 00276 { Set(X); WG(wgx); return LogLikelihood(); } 00277 00278 virtual ReturnMatrix Derivatives() = 0; 00279 // column vector of derivatives 00280 virtual ReturnMatrix FI() = 0; // Fisher Information matrix 00281 virtual ~LL_D_FI() {} // to keep gnu happy 00282 }; 00283 00284 // This is the class for doing the maximum likelihood estimation 00285 00286 class MLE_D_FI : public FindMaximum2 00287 { 00288 // these replace the corresponding functions in FindMaximum2 00289 void Value(const ColumnVector&, bool, Real&, bool&); 00290 bool NextPoint(ColumnVector&, Real&); 00291 Real LastDerivative(const ColumnVector&); 00292 00293 // the things we need for the analysis 00294 LL_D_FI& LL; // reference to log-likelihood 00295 int Lim; // maximum number of iterations 00296 Real Criterion; // convergence criterion 00297 ColumnVector Derivs; // for the derivatives 00298 LowerTriangularMatrix LT; // Cholesky decomposition of FI 00299 SymmetricMatrix Covariance; 00300 DiagonalMatrix SE; 00301 00302 public: 00303 MLE_D_FI(LL_D_FI& ll, int lim=1000, Real criterion=0.0001) 00304 : LL(ll), Lim(lim), Criterion(criterion) {} 00305 void Fit(ColumnVector& Parameters); 00306 void GetStandardErrors(ColumnVector&); 00307 void GetCorrelations(SymmetricMatrix&); 00308 00309 private: 00310 void MakeCovariance(); 00311 }; 00312 00313 00314 #ifdef use_namespace 00315 } 00316 #endif 00317 00318 00319 00320 #endif 00321 00322 // body file: newmatnl.cpp 00323 00324 00325 00327