appl: imm-reward.c Source File

Go to the documentation of this file.
00001 /*  imm-reward.c
00002 
00003   *****
00004   Copyright 1994-1997, Brown University
00005   Copyright 1998, 1999, Anthony R. Cassandra
00006 
00007                            All Rights Reserved
00008                            
00009   Permission to use, copy, modify, and distribute this software and its
00010   documentation for any purpose other than its incorporation into a
00011   commercial product is hereby granted without fee, provided that the
00012   above copyright notice appear in all copies and that both that
00013   copyright notice and this permission notice appear in supporting
00014   documentation.
00015   
00016   ANTHONY CASSANDRA DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
00017   INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ANY
00018   PARTICULAR PURPOSE.  IN NO EVENT SHALL ANTHONY CASSANDRA BE LIABLE FOR
00019   ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
00020   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
00021   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
00022   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
00023   *****
00024 
00025 For a sparse representation, the main thing we are trying to avoid is
00026 keeping a matrix that is NxN where N is the number of rows and columns
00027 (states in the MDP case).  This is easily accomplished with the
00028 transition matrices and observation matrices.  However, the most
00029 general form for the specification of an immediate rewards in a POMDP
00030 conditions the reward upon the actions, current state, next state and
00031 observation.  This requires a matrix for each combination of action
00032 and state and the size of these matrices will be NxO where N is the
00033 numer of states and O the number of observations.  Although we can
00034 keep the individual matrices in a sparse representation where we have
00035 a row for each state, we will require as many of these as there are
00036 states, effectively requiring a matrix that is at least NxN.  We
00037 choose not to limit the file format's reward specification as
00038 previously defined, yet want to keep a short internal repressntation
00039 for the rewards.
00040 
00041 Since the general rewards are used for computing the immediate rewards
00042 for each state-action pair at the start of the program, we do not need
00043 to be very concerned with efficiency.  We will keep the shorthand
00044 representation around, in case individual elements are needed (e.g.,
00045 in a simulation).  However, getting the immediate reward from this
00046 representation will be slower than getting the other problem
00047 parameters.
00048 
00049 Note that for MDPs we do not have to worry about this issue.  We
00050 cannot condition the immediate rewards upon the observation (there
00051 is not a notion of observation in MDPs) so we merely need a sparse NxN
00052 matrix for each action.  This will require us to keep track of the
00053 type of problem we have loaded.
00054 
00055 However, even for the MDP case we would like to take advantage of any 
00056 wildcard character shortcuts, so we use this module for both the MDP 
00057 and POMDP case.  However, things will be slightly different depending 
00058 upon the type of problem being parsed (in gProblemType).
00059 
00060 Here's how this module interacts with the parser: When the Parser sees
00061 a line that begins with R: it will call the newImmReward() routine.
00062 This will create an Imm_Reward_List node and fill in the proper
00063 information.  If necessary, it will then initialize the intermediate
00064 sparse matrix representation (i.e., next_state and obs not specified
00065 for a POMDP or cur_state and next_state not specified for an MDP).
00066 The node is not added to the list at this time.  The parser will then
00067 deliver each value for this 'R:' entry individually through the
00068 routine enterImmReward(). As the actual values are parsed, one of
00069 three things could happen: It could require only a single value, it
00070 could require a vector of values or it could be a matrix of values.
00071 With a single value we just set it in the node.  With the vector we
00072 set the proper entry each time a value is passed in.  Finally, if it
00073 is an entire matrix, we enter it into the sparse representation.  When
00074 the current 'R:' line is finished, the parser will call
00075 doneImmReward() which will first, if necessary, transform the
00076 intermediate sparse matrix in to a sparse matrix.  Then it will put
00077 this into the node and add the node to the list.
00078 
00079 Note that the semantics of the file is such that sequentially later
00080 values override earlier valus.  This means that for a particular
00081 combination of action, cur_state, next state and obs could have more
00082 than one value.  The last value in the file is the one that is
00083 correct.  Therefore we need to keep the linked list in order from
00084 oldest to newest.  Then when a particular value is desired, we must
00085 run through the entire list, setting the value each time we see a
00086 specification for it.  In this way we will be left with the last value
00087 that was specified in the file.  */
00088 
00089 #include <stdio.h>
00090 #include <stdlib.h>
00091 #include <assert.h>
00092 #include <string.h>
00093 
00094 #include "mdpCassandra.h"
00095 #include "sparse-matrix.h"
00096 #include "imm-reward.h"
00097 #include "decision-tree.h"
00098 
00099 #define USE_DECISION_TREE (1)
00100 
00101 /* As we parse the file, we will encounter only one R : * : *.... line
00102    at a time, so we will keep the intermediate matrix as a global
00103    variable.  When we start to enter a line we will initial it and
00104    when we are finished we will convert it and store the sparse matrix
00105    into the node of the linked list.  */ 
00106 I_Matrix gCurIMatrix = NULL;
00107 
00108 /* We will have most of the information we need when we first start to
00109   parse the line, so we will create the node and put that information
00110   there.  After we have read all of the values, we will put it into
00111   the linked list.  */
00112 Imm_Reward_List gCurImmRewardNode = NULL;
00113 
00114 /* This is the actual list of immediate reward lines */
00115 Imm_Reward_List gImmRewardList = NULL;
00116 
00117 /**********************************************************************/
00118 void destroyImmRewards() {
00119   Imm_Reward_List temp;
00120 
00121   while( gImmRewardList != NULL ) {
00122 
00123     temp = gImmRewardList;
00124     gImmRewardList = gImmRewardList->next;
00125 
00126     switch( temp->type ) {
00127     case ir_vector:
00128                 if(temp->rep.vector != NULL)
00129                 {
00130                         free( temp->rep.vector );
00131                 }
00132       break;
00133 
00134     case ir_matrix:
00135       destroyMatrix( temp->rep.matrix );
00136       break;
00137 
00138     case ir_value:
00139     default:
00140       break;
00141     }  /* switch */
00142 
00143     free( temp );
00144 
00145   }  /* while */
00146 
00147 #if USE_DECISION_TREE
00148   dtDeallocate();
00149 #endif
00150 
00151 }  /* destroyImmRewardList */
00152 /**********************************************************************/
00153 Imm_Reward_List appendImmRewardList( Imm_Reward_List list, Imm_Reward_List node ) {
00154   Imm_Reward_List temp = list;
00155 
00156   if( temp == NULL )
00157     return( node );
00158 
00159   while( temp->next != NULL ) 
00160     temp = temp->next;
00161 
00162   temp->next = node;
00163 
00164   return( list );
00165 
00166 }  /* appendImmRewardList */
00167 /**********************************************************************/
00168 void newImmReward( int action, int cur_state, int next_state, int obs ) {
00169   
00170   /* First we will allocate a new node for this entry */
00171   gCurImmRewardNode = (Imm_Reward_List) malloc( sizeof(*gCurImmRewardNode ));
00172   checkAllocatedPointer((void *)gCurImmRewardNode );
00173 
00174   gCurImmRewardNode->action = action;
00175   gCurImmRewardNode->cur_state = cur_state;
00176   gCurImmRewardNode->next_state = next_state;
00177   gCurImmRewardNode->obs = obs;
00178   gCurImmRewardNode->next = NULL;
00179 
00180   switch( gProblemType ) {
00181 
00182   case POMDP_problem_type:
00183     if( obs == NOT_PRESENT) {
00184       
00185       if( next_state == NOT_PRESENT ) {
00186        
00187         /* This is the situation where we will need to keep a sparse 
00188            matrix, so let us initialize the global I_Matrix variable */
00189         
00190        gCurIMatrix = newIMatrix( gNumStates );
00191        gCurImmRewardNode->rep.matrix = NULL;
00192        gCurImmRewardNode->type = ir_matrix;
00193        
00194      } /* next_state == NOT_PRESENT */
00195       
00196       else { /* we will need a vector of numbers, not a matrix */
00197         
00198         gCurImmRewardNode->rep.vector = (REAL_VALUE *) calloc( gNumObservations,
00199                                                           sizeof(REAL_VALUE));
00200         gCurImmRewardNode->type = ir_vector;
00201         
00202       }  /* else need vector, not matrix */
00203       
00204     }  /* obs == NOT_PRESENT */
00205     
00206     else {  /* We only need a single value, so let us just initialize it */
00207       /* to zero */
00208       
00209       gCurImmRewardNode->rep.value = 0.0;
00210       gCurImmRewardNode->type = ir_value;
00211     }
00212     break;
00213 
00214   case MDP_problem_type:
00215     /* for this case we completely ignor 'obs' parameters */
00216       
00217     if( next_state == NOT_PRESENT ) {
00218        
00219       if( cur_state == NOT_PRESENT ) {
00220         /* This is the situation where we will need to keep a sparse 
00221            matrix, so let us initialize the global I_Matrix variable.
00222            */
00223         
00224         gCurIMatrix = newIMatrix( gNumStates );
00225         gCurImmRewardNode->rep.matrix = NULL;
00226         gCurImmRewardNode->type = ir_matrix;
00227         
00228       } /* cur_state == NOT_PRESENT */
00229       
00230       else { /* we will need a vector of numbers, not a matrix */
00231         
00232         gCurImmRewardNode->rep.vector = (REAL_VALUE *) calloc( gNumStates,
00233                                                           sizeof(REAL_VALUE));
00234         gCurImmRewardNode->type = ir_vector;
00235         
00236       }  /* else need vector, not matrix */
00237       
00238     }  /* next_state == NOT_PRESENT */
00239     
00240     else {  /* We only need a single value, so let us just initialize it */
00241       /* to zero */
00242       
00243       gCurImmRewardNode->rep.value = 0.0;
00244       gCurImmRewardNode->type = ir_value;
00245     }
00246     break;
00247     
00248   default:
00249     fprintf( stderr, "**ERR** newImmReward: Unreckognised problem type.\n");
00250     exit( -1 );
00251     break;
00252 
00253   }  /* switch */
00254 
00255 }  /* newImmReward */
00256 /**********************************************************************/
00257 void enterImmReward( int cur_state, int next_state, int obs, 
00258                     REAL_VALUE value ) {
00259 
00260 /* cur_state is ignored for a POMDP, and obs is ignored for an MDP */
00261 
00262   assert( gCurImmRewardNode != NULL );
00263 
00264   switch( gCurImmRewardNode->type ) {
00265   case ir_value:
00266     gCurImmRewardNode->rep.value = value;
00267     break;
00268 
00269   case ir_vector:
00270     if( gProblemType == POMDP_problem_type )
00271       gCurImmRewardNode->rep.vector[obs] = value;
00272     else
00273       gCurImmRewardNode->rep.vector[next_state] = value;
00274     break;
00275 
00276   case ir_matrix:
00277     if( gProblemType == POMDP_problem_type )
00278       addEntryToIMatrix( gCurIMatrix, next_state, obs, value );
00279     else
00280       addEntryToIMatrix( gCurIMatrix, cur_state, next_state, value );
00281     break;
00282 
00283   default:
00284     fprintf( stderr, "** ERR ** Unreckognized IR_Type in enterImmReward().\n");
00285     exit( -1 );
00286     break;
00287   }  /* switch */
00288 
00289 }  /* enterImmReward */
00290 /**********************************************************************/
00291 void irAddToDecisionTree(Imm_Reward_List node)
00292 {
00293   int i, j, k;
00294   Matrix m;
00295 
00296   assert( node != NULL );
00297 
00298   /* ensure the decision tree is initialized (ok to call dtInit() more than once) */
00299   dtInit(gNumActions, gNumStates, gNumObservations);
00300 
00301   switch( node->type ) {
00302   case ir_value:
00303     if ( gProblemType == POMDP_problem_type ) { /* pomdp */
00304       dtAdd(node->action, node->cur_state, node->next_state, node->obs, node->rep.value);
00305     } else { /* mdp */
00306       dtAdd(node->action, node->cur_state, node->next_state, WILDCARD_SPEC, node->rep.value);
00307     }
00308     break;
00309     
00310   case ir_vector:
00311     if ( gProblemType == POMDP_problem_type ) { /* pomdp */
00312       for (i=0; i < gNumObservations; i++) {
00313         dtAdd(node->action, node->cur_state, node->next_state, i, node->rep.vector[i]);
00314       }
00315     } else { /* mdp */
00316       for (i=0; i < gNumStates; i++) {
00317         dtAdd(node->action, node->cur_state, i, WILDCARD_SPEC, node->rep.vector[i]);
00318       }
00319     }
00320     break;
00321     
00322   case ir_matrix:
00323     m = node->rep.matrix;
00324     for (i=0; i < m->num_rows; i++) {
00325       for (j=0; j < m->row_length[i]; j++) {
00326         k = m->row_start[i] + j;
00327         if( gProblemType == POMDP_problem_type )  { /* pomdp */
00328           dtAdd(node->action, node->cur_state, i, m->col[k], m->mat_val[k]);
00329         } else { /* mdp */
00330           dtAdd(node->action, i, m->col[k], WILDCARD_SPEC, m->mat_val[k]);
00331         }
00332       }
00333     }
00334     break;
00335     
00336   default:
00337     assert(0 /* never reach this point */);
00338   }  /* switch */
00339 }
00340 /**********************************************************************/
00341 void doneImmReward() {
00342   
00343   if( gCurImmRewardNode == NULL )
00344     return;
00345 
00346   switch( gCurImmRewardNode->type ) {
00347   case ir_value:
00348   case ir_vector:
00349     /* Do nothing for these cases */
00350     break;
00351     
00352   case ir_matrix:
00353     gCurImmRewardNode->rep.matrix = transformIMatrix( gCurIMatrix );
00354     destroyIMatrix( gCurIMatrix );
00355     gCurIMatrix = NULL;
00356     break;
00357 
00358   default:
00359     fprintf( stderr, "** ERR ** Unreckognized IR_Type in doneImmReward().\n");
00360     exit( -1 );
00361     break;
00362   }  /* switch */
00363 
00364 #if USE_DECISION_TREE
00365   irAddToDecisionTree(gCurImmRewardNode);
00366 #endif
00367 
00368   gImmRewardList = appendImmRewardList( gImmRewardList,
00369                                        gCurImmRewardNode );
00370   gCurImmRewardNode = NULL;
00371 
00372 }  /* doneImmReward */
00373 /**********************************************************************/
00374 REAL_VALUE getImmediateReward( int action, int cur_state, int next_state,
00375                            int obs ) {
00376 #if USE_DECISION_TREE
00377   return dtGet(action, cur_state, next_state, obs);
00378 #else
00379   Imm_Reward_List temp = gImmRewardList;
00380   REAL_VALUE return_value = 0.0;
00381 
00382   assert(( action >= 0) && (action < gNumActions)
00383          && (cur_state >= 0) && (cur_state < gNumStates)
00384          && (next_state >= 0) && (next_state < gNumStates));
00385 
00386   while( temp != NULL ) {
00387     
00388     if((( temp->action == WILDCARD_SPEC )
00389         || ( temp->action == action ))) {
00390 
00391       switch( temp->type ) {
00392       case ir_value:
00393 
00394         if( gProblemType == POMDP_problem_type ) {
00395           if((( temp->next_state == WILDCARD_SPEC )
00396               || ( temp->next_state == next_state))
00397              && ((temp->obs == WILDCARD_SPEC)
00398                  || (temp->obs == obs ))
00399              && ((temp->cur_state == WILDCARD_SPEC)
00400                  || (temp->cur_state == cur_state ))) {
00401 
00402             
00403             return_value = temp->rep.value;
00404             
00405           }  /* if we have a match */
00406         }  /* if POMDP */
00407 
00408         else {  /* then it is an MDP */
00409           if((( temp->cur_state == WILDCARD_SPEC )
00410               || ( temp->cur_state == cur_state))
00411              && ((temp->next_state == WILDCARD_SPEC)
00412                  || (temp->next_state == next_state ))) {
00413             
00414             return_value = temp->rep.value;
00415             
00416           }  /* if we have a match */
00417         }
00418              break;
00419     
00420       case ir_vector:
00421 
00422         if( gProblemType == POMDP_problem_type ) {
00423           if((( temp->next_state == WILDCARD_SPEC )
00424               || ( temp->next_state == next_state))
00425              && ((temp->cur_state == WILDCARD_SPEC)
00426                  || (temp->cur_state == cur_state ))) {
00427             
00428             return_value = temp->rep.vector[obs];
00429           }
00430         }  /* if POMDP */
00431 
00432         else {  /* it is an MDP */
00433           if(( temp->cur_state == WILDCARD_SPEC )
00434              || ( temp->cur_state == cur_state)) {
00435             
00436             return_value = temp->rep.vector[next_state];
00437           }
00438         }
00439 
00440         break;
00441     
00442       case ir_matrix:
00443         if( gProblemType == POMDP_problem_type )  {
00444           if(( temp->cur_state == WILDCARD_SPEC )
00445              || (temp->cur_state == cur_state ))
00446             return_value = getEntryMatrix( temp->rep.matrix, next_state,
00447                                         obs );
00448         }
00449         else
00450           return_value = getEntryMatrix( temp->rep.matrix, cur_state,
00451                                         next_state );
00452 
00453         break;
00454 
00455       default:
00456         fprintf( stderr, 
00457                 "** ERR ** Unreckognized IR_Type in getImmediateReward().\n");
00458         exit( -1 );
00459         break;
00460       }  /* switch */
00461 
00462     
00463     }  /* If we have a partially matching node */
00464 
00465     temp = temp->next;
00466   }  /* while */
00467 
00468   return( return_value );
00469 #endif /* if USE_DECISION_TREE / else */
00470 }  /* getImmediateReward */
00471 /**********************************************************************/