appl: mdpCassandra.c Source File

Go to the documentation of this file.
00001 /*  mdp.c
00002 
00003 *****
00004 Copyright 1994-1997, Brown University
00005 Copyright 1998, 1999 Anthony R. Cassandra
00006 
00007 All Rights Reserved
00008 
00009 Permission to use, copy, modify, and distribute this software and its
00010 documentation for any purpose other than its incorporation into a
00011 commercial product is hereby granted without fee, provided that the
00012 above copyright notice appear in all copies and that both that
00013 copyright notice and this permission notice appear in supporting
00014 documentation.
00015 
00016 ANTHONY CASSANDRA DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
00017 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ANY
00018 PARTICULAR PURPOSE.  IN NO EVENT SHALL ANTHONY CASSANDRA BE LIABLE FOR
00019 ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
00020 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
00021 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
00022 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
00023 *****
00024 
00025 This file contains code for reading in a mdp/pomdp file and setting
00026 the global variables for the problem for use in all the other files.
00027 
00028 */
00029 #include <stdio.h>
00030 #include <stdlib.h>
00031 
00032 #include "mdpCassandra.h"
00033 #include "imm-reward.h"
00034 #include "sparse-matrix.h"
00035 #include "CPMemUtils.h"
00036 
00037 
00038 #define DOUBLE_DISPLAY_PRECISION                  4
00039 
00040 #define EPSILON  0.00001  /* tolerance for sum of probs == 1 */
00041 
00042 /* To indicate whether we are using an MDP or POMDP. 
00043 */
00044 Problem_Type gProblemType = UNKNOWN_problem_type;
00045 
00046 /* The discount factor to be used with the problem.  
00047 */
00048 REAL_VALUE gDiscount = DEFAULT_DISCOUNT_FACTOR;
00049 
00050 char *value_type_str[] = VALUE_TYPE_STRINGS;
00051 
00052 Value_Type gValueType = DEFAULT_VALUE_TYPE;
00053 
00054 /* These specify the size of the problem.  The first two are always required.
00055 */
00056 int gNumStates = 0;
00057 int gNumActions = 0;
00058 int gNumObservations = 0;   /* remains zero for MDPs */
00059 
00060 /*  We need two sets of variable for the probabilities and values.  The first
00061 is an intermediate representation which is filled in as the MDP file
00062 is parsed, and the other is the final sparse reprsentation which is
00063 found by converting the interemediate representation.  As aresult, we 
00064 only need to allocate the intermediate memory while parsing. After parsing
00065 is completed and we are ready to convert it into the final sparse 
00066 representation, then we allocate the rest of the memory.
00067 */
00068 
00069 /* Intermediate variables */
00070 
00071 I_Matrix *IP;  /* Transition Probabilities */
00072 
00073 I_Matrix *IR;  /* Observation Probabilities (POMDP only) */
00074 
00075 I_Matrix IQ;  /* Immediate action-state pair values (both MDP and POMDP) */
00076 
00077 /* Sparse variables */
00078 
00079 Matrix *P;  /* Transition Probabilities */
00080 
00081 Matrix *R;  /* Observation Probabilities */
00082 
00083 Matrix Q;  /* Immediate values for state action pairs.  These are
00084                    expectations computed from immediate values. */
00085 
00086 /* Normal variables */
00087 
00088 /* Some type of algorithms want a place to start off the problem,
00089 especially when doing simulation type experiments.  The belief
00090 state is for POMDPs and the initial state for an MDP */
00091 
00092 REAL_VALUE *gInitialBelief; 
00093 int gInitialState = INVALID_STATE;
00094 
00095 /***************************************************************************/
00096 REAL_VALUE *newBeliefState(  ) {
00097 
00098         return( (REAL_VALUE *) calloc( gNumStates, sizeof( REAL_VALUE )));
00099 }  /* *newBeliefState */
00100 /***************************************************************************/
00101 int transformBeliefState( REAL_VALUE *pi,
00102                                                  REAL_VALUE *pi_hat,
00103                                                  int a,
00104                                                  int obs ) {
00105                                                          REAL_VALUE denom;
00106                                                          int i, j, cur_state, next_state;
00107 
00108                                                          if( gProblemType != POMDP_problem_type )
00109                                                                  return ( 0 );
00110 
00111                                                          /* zero out all elements since we will acumulate probabilities
00112                                                          as we loop */
00113                                                          for( i = 0; i < gNumStates; i++ )
00114                                                                  pi_hat[i] = 0.0;
00115 
00116                                                          for( cur_state = 0; cur_state < gNumStates; cur_state++ ) {
00117 
00118                                                                  for( j = P[a]->row_start[cur_state]; 
00119                                                                          j < P[a]->row_start[cur_state] +  P[a]->row_length[cur_state];
00120                                                                          j++ ) {
00121 
00122                                                                                  next_state = P[a]->col[j];
00123 
00124                                                                                  pi_hat[next_state] += pi[cur_state] * P[a]->mat_val[j] 
00125                                                                                  * getEntryMatrix( R[a], next_state, obs );
00126 
00127                                                                  } /* for j */
00128                                                          }  /* for i */
00129 
00130                                                          /* Normalize */
00131                                                          denom = 0.0;
00132                                                          for( i = 0; i < gNumStates; i++ )
00133                                                                  denom += pi_hat[i];
00134 
00135                                                          if( IS_ZERO( denom ))
00136                                                                  return( 0 );
00137 
00138                                                          for( i = 0; i < gNumStates; i++ )
00139                                                                  pi_hat[i] /= denom;
00140 
00141                                                          return( 1 );
00142 }  /* transformBeliefState */
00143 /**********************************************************************/
00144 void copyBeliefState( REAL_VALUE *copy, REAL_VALUE *pi ) {
00145         /*
00146         */
00147         int i;
00148 
00149         if(( pi == NULL) || (copy == NULL ))
00150                 return;
00151 
00152         for( i = 0; i < gNumStates; i++ )
00153                 copy[i] = pi[i];
00154 
00155 }  /* copyBeliefState */
00156 /**********************************************************************/
00157 void displayBeliefState( FILE *file, REAL_VALUE *pi ) {
00158         int i;
00159 
00160         fprintf( file, "[%.*f", DOUBLE_DISPLAY_PRECISION, pi[0] );
00161         for(i = 1; i < gNumStates; i++) {
00162                 fprintf(file, " ");
00163                 fprintf( file, "%.*f", DOUBLE_DISPLAY_PRECISION, pi[i] );
00164         }  /* for i */
00165         fprintf(file, "]");
00166 }  /* displayBeliefState */
00167 /***************************************************************************/
00168 int readMDP( char *filename ) {
00169         /*
00170         This routine returns 1 if the file is successfully parsed and 0 if not.
00171         */
00172 
00173         FILE *file;
00174 
00175         if( filename == NULL ) {
00176                 fprintf( stderr, "<NULL> MDP filename: %s.\n", filename );
00177                 return( 0 );
00178         }
00179 
00180         if(( file = fopen( filename, "r" )) == NULL ) {
00181                 fprintf( stderr, "Cannot open the MDP file: %s.\n", filename );
00182                 return( 0 );
00183         }
00184 
00185         if( readMDPFile( file ) == 0 ) {
00186                 fprintf( stderr, 
00187                         "MDP file '%s' was not successfully parsed!\n", filename );
00188                 return( 0 );
00189         }
00190 
00191         fclose( file );
00192 
00193         /* After the file has been parsed, we should have everything we need
00194         in the final representation.  
00195         */
00196         return( 1 );
00197 }  /* readMDP */
00198 /**********************************************************************/
00199 void allocateIntermediateMDP() {
00200         /*
00201         Assumes that the gProblemType has been set and that the variables
00202         gNumStates, gNumActions, and gNumObservation have the appropriate 
00203         values.  It will allocate the memory that will be needed to store
00204         the problem.  This allocates the space for the intermediate 
00205         representation representation for the transitions and observations,
00206         the latter for POMDPs only.
00207         */
00208 
00209         int a;
00210 
00211         /* We need an intermediate matrix for transition probs. for each
00212         action.  */
00213         IP = (I_Matrix *) malloc( gNumActions * sizeof( *IP ));
00214         checkAllocatedPointer((void *) IP );
00215 
00216         for( a = 0; a < gNumActions; a++ )
00217                 IP[a] = newIMatrix( gNumStates );
00218 
00219         /* Only need observation probabilities if it is a POMDP */
00220         if( gProblemType == POMDP_problem_type ) {
00221 
00222                 /* We need an intermediate matrix for observation probs. for each
00223                 action.  */
00224                 IR = (I_Matrix *) malloc( gNumActions * sizeof( *IR ));
00225                 checkAllocatedPointer((void *) IR );
00226 
00227                 for( a = 0; a < gNumActions; a++ )
00228                         IR[a] = newIMatrix( gNumStates );
00229 
00230                 /* Note that the immediate values are stored in a special way, so
00231                 we do not need to allocate anything at this time. */
00232 
00233                 /* For POMDPs, we will keep a starting belief state, since many */
00234                 /* type of algorithms use a simulation approach and would want to */
00235                 /* start it in a particular place. This is not kept in a sparse */
00236                 /* way, so it is just a vector of the number of states. We */
00237                 /* initialize it to be all zeroes.  */
00238 
00239                 gInitialBelief = (REAL_VALUE *) calloc( gNumStates, sizeof( REAL_VALUE ));
00240 
00241         }  /* if POMDP */
00242 
00243         /* Regardless of whether there is an MDP or POMDP, the immediate
00244         rewards for action-state pairs will always exist as an expectation
00245         over the next states and possibly actions.  These will be computed
00246         after parsing from the special immediate reward representation.
00247         */
00248 
00249         IQ = newIMatrix( gNumActions );
00250 
00251 } /* allocateIntermediateMDP */
00252 /************************************************************************/
00253 int verifyIntermediateMDP() {
00254         /*
00255         This routine will make sure that the intermediate form for the MDP
00256         is valid.  It will check to make sure that the transition and
00257         observation matrices do indeed specify probabilities.
00258 
00259         There is a similar routine in the parser.y file, which is nicer
00260         when parsing a POMDP file, but this routine is needed when we 
00261         are creating the POMDP through a program.  In this case there
00262         will be no parsing and thus no logging of errors.
00263         */
00264         int a,i,j;
00265         REAL_VALUE sum;
00266 
00267         for( a = 0; a < gNumActions; a++ )
00268                 for( i = 0; i < gNumStates; i++ ) {
00269                         sum = sumIMatrixRowValues( IP[a], i );
00270                         if((sum < ( 1.0 - EPSILON)) || (sum > (1.0 + EPSILON))) {
00271                                 return( 0 );
00272                         }
00273                 } /* for i */
00274 
00275                 if( gProblemType == POMDP_problem_type )
00276                         for( a = 0; a < gNumActions; a++ )
00277                                 for( j = 0; j < gNumStates; j++ ) {
00278                                         sum = sumIMatrixRowValues( IR[a], j );
00279                                         if((sum < ( 1.0 - EPSILON)) || (sum > (1.0 + EPSILON))) {
00280                                                 return( 0 );
00281                                         } /* if sum not == 1 */
00282                                 }  /* for j */
00283 
00284                                 return( 1 );
00285 }  /* verifyIntermediateMDP */
00286 /************************************************************************/
00287 void deallocateIntermediateMDP() {
00288         /*
00289         This routine is made available in case something goes wrong
00290         before converting the matrices from the intermediate form
00291         to the final form.  Normally the conversion routine convertMatrices()
00292         will deallocate the intermediate matrices, but it might be desirable
00293         to get rid of them before converting (especially if something
00294         has gone wrong) so that things can be started over.
00295         */
00296         int a;
00297 
00298         for( a = 0; a < gNumActions; a++ ) {
00299 
00300                 destroyIMatrix( IP[a] );
00301 
00302                 if( gProblemType == POMDP_problem_type ) {
00303                         destroyIMatrix( IR[a] );
00304                 }
00305 
00306         }
00307 
00308         free( IP );
00309 
00310         if( gProblemType == POMDP_problem_type ) {
00311                 free( IR );
00312                 free( gInitialBelief );
00313         }
00314 
00315         destroyIMatrix( IQ );
00316 
00317 }  /* deallocateIntermediateMDP */
00318 /**********************************************************************/
00319 void computeRewards() {
00320         int a, i, j, z, next_state, obs;
00321         REAL_VALUE sum, inner_sum;
00322 
00323         /* Now do the expectation thing for action-state reward values */
00324 
00325         for( a = 0; a < gNumActions; a++ )
00326                 for( i = 0; i < gNumStates; i++ ) {
00327 
00328                         sum = 0.0;
00329 
00330                         /* Note: 'j' is not a state. It is an index into an array */
00331                         for( j = P[a]->row_start[i]; 
00332                                 j < P[a]->row_start[i] +  P[a]->row_length[i];
00333                                 j++ ) {
00334 
00335                                         next_state = P[a]->col[j];
00336 
00337                                         if( gProblemType == POMDP_problem_type ) {
00338 
00339                                                 inner_sum = 0.0;
00340 
00341                                                 /* Note: 'z' is not a state. It is an index into an array */
00342                                                 for( z = R[a]->row_start[next_state]; 
00343                                                         z < (R[a]->row_start[next_state] +  R[a]->row_length[next_state]);
00344                                                         z++ ) {
00345 
00346                                                                 obs = R[a]->col[z];
00347 
00348                                                                 inner_sum += R[a]->mat_val[z] 
00349                                                                 * getImmediateReward( a, i, next_state, obs );
00350                                                 }  /* for z */
00351                                         }  /* if POMDP */
00352 
00353                                         else /* it is an MDP */
00354                                                 inner_sum = getImmediateReward( a, i, next_state, 0 );
00355 
00356                                         sum += P[a]->mat_val[j] * inner_sum;
00357 
00358                         }  /* for j */
00359 
00360                         addEntryToIMatrix( IQ, a, i, sum );
00361 
00362                 }  /* for i */
00363 
00364 }  /* computeRewards */
00365 /************************************************************************/
00366 void convertMatrices() {
00367         /*
00368         This routine is called after the parsing has been succesfully done.
00369         It will assume that the intermediate representations for the transition
00370         and observation matrices have been allocated and had their values set.
00371         It also assumes that the special immediate reward representation
00372         has been set.  
00373 
00374         This routine will do two functions.  It will convert the intermediate
00375         sparse representations for the transitions and observations to the 
00376         actual true sparse representation.  It will also compute the action-state
00377         immeidate reward pairs as an expectation over next states and possibly
00378         observations from the special immediate reward representation.  This
00379         will be the final step toward the use of the MDP/POMDP model in 
00380         computation.
00381         */
00382 
00383         int a;
00384 #if USE_DEBUG_PRINT
00385         struct timeval startTime, endTime;
00386 #endif
00387 
00388         /* Allocate room for each action */
00389         P = (Matrix *) malloc( gNumActions * sizeof( *P ) );
00390         checkAllocatedPointer((void *) P );
00391 
00392         R = (Matrix *) malloc( gNumActions * sizeof( *R ) );
00393         checkAllocatedPointer((void *) R );
00394 
00395 
00396         /* First convert the intermediate sparse matrices for trans. and obs. */
00397 
00398         for( a = 0; a < gNumActions; a++ ) {
00399 
00400 #if USE_DEBUG_PRINT
00401                 printf("pomdp_spec: transforming transition matrix [a=%d]\n", a);
00402 #endif
00403 
00404                 P[a] = transformIMatrix( IP[a] );
00405                 destroyIMatrix( IP[a] );
00406 
00407 #if USE_DEBUG_PRINT
00408                 printf("pomdp_spec: transforming obs matrix [a=%d]\n", a);
00409 #endif
00410 
00411                 if( gProblemType == POMDP_problem_type ) {
00412                         R[a] = transformIMatrix( IR[a] );
00413                         destroyIMatrix( IR[a] );
00414                 }
00415 
00416         }
00417 
00418         free( IP );
00419 
00420         if( gProblemType == POMDP_problem_type )
00421                 free( IR );
00422 
00423         /* Calculate expected immediate rewards for action-state pairs, but
00424         do it in the sparse matrix representation to eliminate zeroes */
00425 
00426 #if USE_DEBUG_PRINT
00427         printf("pomdp_spec: computing rewards\n");
00428         gettimeofday(&startTime, NULL);
00429 #endif
00430 
00431         computeRewards();
00432 
00433 #if USE_DEBUG_PRINT
00434         gettimeofday(&endTime, NULL);
00435         printf("  (took %lf seconds)\n",
00436                 endTime.tv_sec - startTime.tv_sec + 1e-6 * (endTime.tv_usec - startTime.tv_usec));
00437         printf("pomdp_spec: transforming reward matrix\n");
00438 #endif
00439 
00440         /* Then convert it into the real representation */
00441         Q = transformIMatrix( IQ );
00442         destroyIMatrix( IQ );
00443 
00444 }  /* convertMatrices */
00445 /**********************************************************************/
00446 int writeMDP( char *filename ) {
00447         FILE *file;
00448         int a, i, j, obs;
00449 
00450         if( (file = fopen( filename, "w" )) == NULL )
00451                 return( 0 );
00452 
00453         fprintf( file, "discount: %.6f\n", gDiscount );
00454 
00455         if( gValueType == COST_value_type )
00456                 fprintf( file, "values: cost\n" );
00457         else
00458                 fprintf( file, "values: reward\n" );
00459 
00460         fprintf( file, "states: %d\n", gNumStates );
00461         fprintf( file, "actions: %d\n", gNumActions );
00462 
00463         if( gProblemType == POMDP_problem_type )
00464                 fprintf( file, "observations: %d\n", gNumObservations );
00465 
00466         for( a = 0; a < gNumActions; a++ )
00467                 for( i = 0; i < gNumStates; i++ )
00468                         for( j = P[a]->row_start[i]; 
00469                                 j < P[a]->row_start[i] +  P[a]->row_length[i];
00470                                 j++ ) 
00471                                 fprintf( file, "T: %d : %d : %d %.6f\n",
00472                                 a, i, P[a]->col[j], P[a]->mat_val[j] );
00473 
00474         if( gProblemType == POMDP_problem_type )
00475                 for( a = 0; a < gNumActions; a++ )
00476                         for( j = 0; j < gNumStates; j++ )
00477                                 for( obs = R[a]->row_start[j]; 
00478                                         obs < R[a]->row_start[j] +  R[a]->row_length[j];
00479                                         obs++ ) 
00480                                         fprintf( file, "O: %d : %d : %d %.6f\n",
00481                                         a, j, R[a]->col[obs], R[a]->mat_val[obs] );
00482 
00483         if( gProblemType == POMDP_problem_type )
00484                 for( a = 0; a < gNumActions; a++ )
00485                         for( i = Q->row_start[a]; 
00486                                 i < Q->row_start[a] +  Q->row_length[a];
00487                                 i++ ) 
00488                                 fprintf( file, "R: %d : %d : * : * %.6f\n",
00489                                 a, Q->col[i], Q->mat_val[i] );
00490 
00491         else
00492                 for( a = 0; a < gNumActions; a++ )
00493                         for( i = Q->row_start[a]; 
00494                                 i < Q->row_start[a] +  Q->row_length[a];
00495                                 i++ ) 
00496                                 fprintf( file, "R: %d : %d : * %.6f\n",
00497                                 a, Q->col[i], Q->mat_val[i] );
00498 
00499         fclose( file );
00500         return( 1 );
00501 
00502 }  /* writeMDP */
00503 /**********************************************************************/
00504 void deallocateMDP() {
00505         int a;
00506 
00507         for( a = 0; a < gNumActions; a++ ) 
00508         {
00509                 if(P!=NULL)
00510                 {
00511                         destroyMatrix( P[a] );
00512                 }
00513 
00514 
00515                 if( gProblemType == POMDP_problem_type )
00516                 {
00517                         if(R!= NULL)
00518                         {
00519                                 destroyMatrix( R[a] );
00520                         }
00521                 }
00522         }  /* for a */
00523 
00524         if(P!=NULL)
00525         {
00526                 free( P );
00527         }
00528 
00529 
00530         if( gProblemType == POMDP_problem_type ) 
00531         {
00532                 if(R!= NULL)
00533                 {
00534                         free( R );
00535                 }
00536                 if(gInitialBelief!= NULL)
00537                 {
00538                         free( gInitialBelief );
00539                 }
00540                 
00541         }
00542 
00543         destroyMatrix( Q );
00544 
00545         destroyImmRewards();
00546 
00547 }  /* deallocateMDP */
00548 /**********************************************************************/
00549 void displayMDPSlice( int state ) {
00550         /*
00551         Shows the transition and observation probabilites (and rewards) for
00552         the given state.
00553         */
00554         int a, j, obs;
00555 
00556         if(( state < 0 ) || ( state >= gNumStates ) || ( gNumStates < 1 ))
00557                 return;
00558 
00559         printf( "MDP slice for state: %d\n", state );
00560 
00561         for( a = 0; a < gNumActions; a++ )
00562                 for( j = P[a]->row_start[state]; 
00563                         j < P[a]->row_start[state] +  P[a]->row_length[state];
00564                         j++ ) 
00565                         printf( "\tP( s=%d | s=%d, a=%d ) = %.6f\n",
00566                         P[a]->col[j], state, a, P[a]->mat_val[j] );
00567 
00568         if( gProblemType == POMDP_problem_type )
00569                 for( a = 0; a < gNumActions; a++ )
00570                         for( obs = R[a]->row_start[state]; 
00571                                 obs < R[a]->row_start[state] +  R[a]->row_length[state];
00572                                 obs++ ) 
00573                                 printf( "\tP( o=%d | s=%d, a=%d ) = %.6f\n",
00574                                 R[a]->col[obs], state, a, R[a]->mat_val[obs] );
00575 
00576         for( a = 0; a < gNumActions; a++ )
00577                 printf( "\tQ( s=%d, a=%d ) = %5.6f\n",
00578                 state, a, getEntryMatrix( Q, a, state ));
00579 
00580 }  /* displayMDPSlice */
00581 
00582 void memoryExhaustedErrorHandler()
00583 {
00584         deallocateMDP();
00585         printf("Not enough memory for parsing the POMDP file, exiting.\n");
00586         exit(-1);
00587 }
00588 
00589 unsigned long GlobalMemLimit = 0;
00590 
00591 void checkAllocatedPointer(void * ptr)
00592 {
00593         unsigned long memUsage = getCurrentProcessMemoryUsage();
00594 
00595         if(GlobalMemLimit == 0)
00596         {
00597                 GlobalMemLimit = getPlatformMemoryLimit();
00598         }
00599 
00600         
00601 
00602         //printf ("limit %lu memusage %lu \n", memLimit, memUsage);
00603 
00604         if(memUsage > GlobalMemLimit)
00605         {
00606                 memoryExhaustedErrorHandler();
00607         }
00608 
00609         if(ptr == NULL)
00610         {
00611                 memoryExhaustedErrorHandler();
00612         }
00613 }
00614 
00615 /**********************************************************************/
00616 
00617