00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089 #include <stdio.h>
00090 #include <stdlib.h>
00091 #include <assert.h>
00092 #include <string.h>
00093
00094 #include "mdpCassandra.h"
00095 #include "sparse-matrix.h"
00096 #include "imm-reward.h"
00097 #include "decision-tree.h"
00098
00099 #define USE_DECISION_TREE (1)
00100
00101
00102
00103
00104
00105
00106 I_Matrix gCurIMatrix = NULL;
00107
00108
00109
00110
00111
00112 Imm_Reward_List gCurImmRewardNode = NULL;
00113
00114
00115 Imm_Reward_List gImmRewardList = NULL;
00116
00117
00118 void destroyImmRewards() {
00119 Imm_Reward_List temp;
00120
00121 while( gImmRewardList != NULL ) {
00122
00123 temp = gImmRewardList;
00124 gImmRewardList = gImmRewardList->next;
00125
00126 switch( temp->type ) {
00127 case ir_vector:
00128 if(temp->rep.vector != NULL)
00129 {
00130 free( temp->rep.vector );
00131 }
00132 break;
00133
00134 case ir_matrix:
00135 destroyMatrix( temp->rep.matrix );
00136 break;
00137
00138 case ir_value:
00139 default:
00140 break;
00141 }
00142
00143 free( temp );
00144
00145 }
00146
00147 #if USE_DECISION_TREE
00148 dtDeallocate();
00149 #endif
00150
00151 }
00152
00153 Imm_Reward_List appendImmRewardList( Imm_Reward_List list, Imm_Reward_List node ) {
00154 Imm_Reward_List temp = list;
00155
00156 if( temp == NULL )
00157 return( node );
00158
00159 while( temp->next != NULL )
00160 temp = temp->next;
00161
00162 temp->next = node;
00163
00164 return( list );
00165
00166 }
00167
00168 void newImmReward( int action, int cur_state, int next_state, int obs ) {
00169
00170
00171 gCurImmRewardNode = (Imm_Reward_List) malloc( sizeof(*gCurImmRewardNode ));
00172 checkAllocatedPointer((void *)gCurImmRewardNode );
00173
00174 gCurImmRewardNode->action = action;
00175 gCurImmRewardNode->cur_state = cur_state;
00176 gCurImmRewardNode->next_state = next_state;
00177 gCurImmRewardNode->obs = obs;
00178 gCurImmRewardNode->next = NULL;
00179
00180 switch( gProblemType ) {
00181
00182 case POMDP_problem_type:
00183 if( obs == NOT_PRESENT) {
00184
00185 if( next_state == NOT_PRESENT ) {
00186
00187
00188
00189
00190 gCurIMatrix = newIMatrix( gNumStates );
00191 gCurImmRewardNode->rep.matrix = NULL;
00192 gCurImmRewardNode->type = ir_matrix;
00193
00194 }
00195
00196 else {
00197
00198 gCurImmRewardNode->rep.vector = (REAL_VALUE *) calloc( gNumObservations,
00199 sizeof(REAL_VALUE));
00200 gCurImmRewardNode->type = ir_vector;
00201
00202 }
00203
00204 }
00205
00206 else {
00207
00208
00209 gCurImmRewardNode->rep.value = 0.0;
00210 gCurImmRewardNode->type = ir_value;
00211 }
00212 break;
00213
00214 case MDP_problem_type:
00215
00216
00217 if( next_state == NOT_PRESENT ) {
00218
00219 if( cur_state == NOT_PRESENT ) {
00220
00221
00222
00223
00224 gCurIMatrix = newIMatrix( gNumStates );
00225 gCurImmRewardNode->rep.matrix = NULL;
00226 gCurImmRewardNode->type = ir_matrix;
00227
00228 }
00229
00230 else {
00231
00232 gCurImmRewardNode->rep.vector = (REAL_VALUE *) calloc( gNumStates,
00233 sizeof(REAL_VALUE));
00234 gCurImmRewardNode->type = ir_vector;
00235
00236 }
00237
00238 }
00239
00240 else {
00241
00242
00243 gCurImmRewardNode->rep.value = 0.0;
00244 gCurImmRewardNode->type = ir_value;
00245 }
00246 break;
00247
00248 default:
00249 fprintf( stderr, "**ERR** newImmReward: Unreckognised problem type.\n");
00250 exit( -1 );
00251 break;
00252
00253 }
00254
00255 }
00256
00257 void enterImmReward( int cur_state, int next_state, int obs,
00258 REAL_VALUE value ) {
00259
00260
00261
00262 assert( gCurImmRewardNode != NULL );
00263
00264 switch( gCurImmRewardNode->type ) {
00265 case ir_value:
00266 gCurImmRewardNode->rep.value = value;
00267 break;
00268
00269 case ir_vector:
00270 if( gProblemType == POMDP_problem_type )
00271 gCurImmRewardNode->rep.vector[obs] = value;
00272 else
00273 gCurImmRewardNode->rep.vector[next_state] = value;
00274 break;
00275
00276 case ir_matrix:
00277 if( gProblemType == POMDP_problem_type )
00278 addEntryToIMatrix( gCurIMatrix, next_state, obs, value );
00279 else
00280 addEntryToIMatrix( gCurIMatrix, cur_state, next_state, value );
00281 break;
00282
00283 default:
00284 fprintf( stderr, "** ERR ** Unreckognized IR_Type in enterImmReward().\n");
00285 exit( -1 );
00286 break;
00287 }
00288
00289 }
00290
00291 void irAddToDecisionTree(Imm_Reward_List node)
00292 {
00293 int i, j, k;
00294 Matrix m;
00295
00296 assert( node != NULL );
00297
00298
00299 dtInit(gNumActions, gNumStates, gNumObservations);
00300
00301 switch( node->type ) {
00302 case ir_value:
00303 if ( gProblemType == POMDP_problem_type ) {
00304 dtAdd(node->action, node->cur_state, node->next_state, node->obs, node->rep.value);
00305 } else {
00306 dtAdd(node->action, node->cur_state, node->next_state, WILDCARD_SPEC, node->rep.value);
00307 }
00308 break;
00309
00310 case ir_vector:
00311 if ( gProblemType == POMDP_problem_type ) {
00312 for (i=0; i < gNumObservations; i++) {
00313 dtAdd(node->action, node->cur_state, node->next_state, i, node->rep.vector[i]);
00314 }
00315 } else {
00316 for (i=0; i < gNumStates; i++) {
00317 dtAdd(node->action, node->cur_state, i, WILDCARD_SPEC, node->rep.vector[i]);
00318 }
00319 }
00320 break;
00321
00322 case ir_matrix:
00323 m = node->rep.matrix;
00324 for (i=0; i < m->num_rows; i++) {
00325 for (j=0; j < m->row_length[i]; j++) {
00326 k = m->row_start[i] + j;
00327 if( gProblemType == POMDP_problem_type ) {
00328 dtAdd(node->action, node->cur_state, i, m->col[k], m->mat_val[k]);
00329 } else {
00330 dtAdd(node->action, i, m->col[k], WILDCARD_SPEC, m->mat_val[k]);
00331 }
00332 }
00333 }
00334 break;
00335
00336 default:
00337 assert(0 );
00338 }
00339 }
00340
00341 void doneImmReward() {
00342
00343 if( gCurImmRewardNode == NULL )
00344 return;
00345
00346 switch( gCurImmRewardNode->type ) {
00347 case ir_value:
00348 case ir_vector:
00349
00350 break;
00351
00352 case ir_matrix:
00353 gCurImmRewardNode->rep.matrix = transformIMatrix( gCurIMatrix );
00354 destroyIMatrix( gCurIMatrix );
00355 gCurIMatrix = NULL;
00356 break;
00357
00358 default:
00359 fprintf( stderr, "** ERR ** Unreckognized IR_Type in doneImmReward().\n");
00360 exit( -1 );
00361 break;
00362 }
00363
00364 #if USE_DECISION_TREE
00365 irAddToDecisionTree(gCurImmRewardNode);
00366 #endif
00367
00368 gImmRewardList = appendImmRewardList( gImmRewardList,
00369 gCurImmRewardNode );
00370 gCurImmRewardNode = NULL;
00371
00372 }
00373
00374 REAL_VALUE getImmediateReward( int action, int cur_state, int next_state,
00375 int obs ) {
00376 #if USE_DECISION_TREE
00377 return dtGet(action, cur_state, next_state, obs);
00378 #else
00379 Imm_Reward_List temp = gImmRewardList;
00380 REAL_VALUE return_value = 0.0;
00381
00382 assert(( action >= 0) && (action < gNumActions)
00383 && (cur_state >= 0) && (cur_state < gNumStates)
00384 && (next_state >= 0) && (next_state < gNumStates));
00385
00386 while( temp != NULL ) {
00387
00388 if((( temp->action == WILDCARD_SPEC )
00389 || ( temp->action == action ))) {
00390
00391 switch( temp->type ) {
00392 case ir_value:
00393
00394 if( gProblemType == POMDP_problem_type ) {
00395 if((( temp->next_state == WILDCARD_SPEC )
00396 || ( temp->next_state == next_state))
00397 && ((temp->obs == WILDCARD_SPEC)
00398 || (temp->obs == obs ))
00399 && ((temp->cur_state == WILDCARD_SPEC)
00400 || (temp->cur_state == cur_state ))) {
00401
00402
00403 return_value = temp->rep.value;
00404
00405 }
00406 }
00407
00408 else {
00409 if((( temp->cur_state == WILDCARD_SPEC )
00410 || ( temp->cur_state == cur_state))
00411 && ((temp->next_state == WILDCARD_SPEC)
00412 || (temp->next_state == next_state ))) {
00413
00414 return_value = temp->rep.value;
00415
00416 }
00417 }
00418 break;
00419
00420 case ir_vector:
00421
00422 if( gProblemType == POMDP_problem_type ) {
00423 if((( temp->next_state == WILDCARD_SPEC )
00424 || ( temp->next_state == next_state))
00425 && ((temp->cur_state == WILDCARD_SPEC)
00426 || (temp->cur_state == cur_state ))) {
00427
00428 return_value = temp->rep.vector[obs];
00429 }
00430 }
00431
00432 else {
00433 if(( temp->cur_state == WILDCARD_SPEC )
00434 || ( temp->cur_state == cur_state)) {
00435
00436 return_value = temp->rep.vector[next_state];
00437 }
00438 }
00439
00440 break;
00441
00442 case ir_matrix:
00443 if( gProblemType == POMDP_problem_type ) {
00444 if(( temp->cur_state == WILDCARD_SPEC )
00445 || (temp->cur_state == cur_state ))
00446 return_value = getEntryMatrix( temp->rep.matrix, next_state,
00447 obs );
00448 }
00449 else
00450 return_value = getEntryMatrix( temp->rep.matrix, cur_state,
00451 next_state );
00452
00453 break;
00454
00455 default:
00456 fprintf( stderr,
00457 "** ERR ** Unreckognized IR_Type in getImmediateReward().\n");
00458 exit( -1 );
00459 break;
00460 }
00461
00462
00463 }
00464
00465 temp = temp->next;
00466 }
00467
00468 return( return_value );
00469 #endif
00470 }
00471