00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include <stdio.h>
00030 #include <stdlib.h>
00031
00032 #include "mdpCassandra.h"
00033 #include "imm-reward.h"
00034 #include "sparse-matrix.h"
00035 #include "CPMemUtils.h"
00036
00037
00038 #define DOUBLE_DISPLAY_PRECISION 4
00039
00040 #define EPSILON 0.00001
00041
00042
00043
00044 Problem_Type gProblemType = UNKNOWN_problem_type;
00045
00046
00047
00048 REAL_VALUE gDiscount = DEFAULT_DISCOUNT_FACTOR;
00049
00050 char *value_type_str[] = VALUE_TYPE_STRINGS;
00051
00052 Value_Type gValueType = DEFAULT_VALUE_TYPE;
00053
00054
00055
00056 int gNumStates = 0;
00057 int gNumActions = 0;
00058 int gNumObservations = 0;
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071 I_Matrix *IP;
00072
00073 I_Matrix *IR;
00074
00075 I_Matrix IQ;
00076
00077
00078
00079 Matrix *P;
00080
00081 Matrix *R;
00082
00083 Matrix Q;
00084
00085
00086
00087
00088
00089
00090
00091
00092 REAL_VALUE *gInitialBelief;
00093 int gInitialState = INVALID_STATE;
00094
00095
00096 REAL_VALUE *newBeliefState( ) {
00097
00098 return( (REAL_VALUE *) calloc( gNumStates, sizeof( REAL_VALUE )));
00099 }
00100
00101 int transformBeliefState( REAL_VALUE *pi,
00102 REAL_VALUE *pi_hat,
00103 int a,
00104 int obs ) {
00105 REAL_VALUE denom;
00106 int i, j, cur_state, next_state;
00107
00108 if( gProblemType != POMDP_problem_type )
00109 return ( 0 );
00110
00111
00112
00113 for( i = 0; i < gNumStates; i++ )
00114 pi_hat[i] = 0.0;
00115
00116 for( cur_state = 0; cur_state < gNumStates; cur_state++ ) {
00117
00118 for( j = P[a]->row_start[cur_state];
00119 j < P[a]->row_start[cur_state] + P[a]->row_length[cur_state];
00120 j++ ) {
00121
00122 next_state = P[a]->col[j];
00123
00124 pi_hat[next_state] += pi[cur_state] * P[a]->mat_val[j]
00125 * getEntryMatrix( R[a], next_state, obs );
00126
00127 }
00128 }
00129
00130
00131 denom = 0.0;
00132 for( i = 0; i < gNumStates; i++ )
00133 denom += pi_hat[i];
00134
00135 if( IS_ZERO( denom ))
00136 return( 0 );
00137
00138 for( i = 0; i < gNumStates; i++ )
00139 pi_hat[i] /= denom;
00140
00141 return( 1 );
00142 }
00143
00144 void copyBeliefState( REAL_VALUE *copy, REAL_VALUE *pi ) {
00145
00146
00147 int i;
00148
00149 if(( pi == NULL) || (copy == NULL ))
00150 return;
00151
00152 for( i = 0; i < gNumStates; i++ )
00153 copy[i] = pi[i];
00154
00155 }
00156
00157 void displayBeliefState( FILE *file, REAL_VALUE *pi ) {
00158 int i;
00159
00160 fprintf( file, "[%.*f", DOUBLE_DISPLAY_PRECISION, pi[0] );
00161 for(i = 1; i < gNumStates; i++) {
00162 fprintf(file, " ");
00163 fprintf( file, "%.*f", DOUBLE_DISPLAY_PRECISION, pi[i] );
00164 }
00165 fprintf(file, "]");
00166 }
00167
00168 int readMDP( char *filename ) {
00169
00170
00171
00172
00173 FILE *file;
00174
00175 if( filename == NULL ) {
00176 fprintf( stderr, "<NULL> MDP filename: %s.\n", filename );
00177 return( 0 );
00178 }
00179
00180 if(( file = fopen( filename, "r" )) == NULL ) {
00181 fprintf( stderr, "Cannot open the MDP file: %s.\n", filename );
00182 return( 0 );
00183 }
00184
00185 if( readMDPFile( file ) == 0 ) {
00186 fprintf( stderr,
00187 "MDP file '%s' was not successfully parsed!\n", filename );
00188 return( 0 );
00189 }
00190
00191 fclose( file );
00192
00193
00194
00195
00196 return( 1 );
00197 }
00198
00199 void allocateIntermediateMDP() {
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209 int a;
00210
00211
00212
00213 IP = (I_Matrix *) malloc( gNumActions * sizeof( *IP ));
00214 checkAllocatedPointer((void *) IP );
00215
00216 for( a = 0; a < gNumActions; a++ )
00217 IP[a] = newIMatrix( gNumStates );
00218
00219
00220 if( gProblemType == POMDP_problem_type ) {
00221
00222
00223
00224 IR = (I_Matrix *) malloc( gNumActions * sizeof( *IR ));
00225 checkAllocatedPointer((void *) IR );
00226
00227 for( a = 0; a < gNumActions; a++ )
00228 IR[a] = newIMatrix( gNumStates );
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239 gInitialBelief = (REAL_VALUE *) calloc( gNumStates, sizeof( REAL_VALUE ));
00240
00241 }
00242
00243
00244
00245
00246
00247
00248
00249 IQ = newIMatrix( gNumActions );
00250
00251 }
00252
00253 int verifyIntermediateMDP() {
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264 int a,i,j;
00265 REAL_VALUE sum;
00266
00267 for( a = 0; a < gNumActions; a++ )
00268 for( i = 0; i < gNumStates; i++ ) {
00269 sum = sumIMatrixRowValues( IP[a], i );
00270 if((sum < ( 1.0 - EPSILON)) || (sum > (1.0 + EPSILON))) {
00271 return( 0 );
00272 }
00273 }
00274
00275 if( gProblemType == POMDP_problem_type )
00276 for( a = 0; a < gNumActions; a++ )
00277 for( j = 0; j < gNumStates; j++ ) {
00278 sum = sumIMatrixRowValues( IR[a], j );
00279 if((sum < ( 1.0 - EPSILON)) || (sum > (1.0 + EPSILON))) {
00280 return( 0 );
00281 }
00282 }
00283
00284 return( 1 );
00285 }
00286
00287 void deallocateIntermediateMDP() {
00288
00289
00290
00291
00292
00293
00294
00295
00296 int a;
00297
00298 for( a = 0; a < gNumActions; a++ ) {
00299
00300 destroyIMatrix( IP[a] );
00301
00302 if( gProblemType == POMDP_problem_type ) {
00303 destroyIMatrix( IR[a] );
00304 }
00305
00306 }
00307
00308 free( IP );
00309
00310 if( gProblemType == POMDP_problem_type ) {
00311 free( IR );
00312 free( gInitialBelief );
00313 }
00314
00315 destroyIMatrix( IQ );
00316
00317 }
00318
00319 void computeRewards() {
00320 int a, i, j, z, next_state, obs;
00321 REAL_VALUE sum, inner_sum;
00322
00323
00324
00325 for( a = 0; a < gNumActions; a++ )
00326 for( i = 0; i < gNumStates; i++ ) {
00327
00328 sum = 0.0;
00329
00330
00331 for( j = P[a]->row_start[i];
00332 j < P[a]->row_start[i] + P[a]->row_length[i];
00333 j++ ) {
00334
00335 next_state = P[a]->col[j];
00336
00337 if( gProblemType == POMDP_problem_type ) {
00338
00339 inner_sum = 0.0;
00340
00341
00342 for( z = R[a]->row_start[next_state];
00343 z < (R[a]->row_start[next_state] + R[a]->row_length[next_state]);
00344 z++ ) {
00345
00346 obs = R[a]->col[z];
00347
00348 inner_sum += R[a]->mat_val[z]
00349 * getImmediateReward( a, i, next_state, obs );
00350 }
00351 }
00352
00353 else
00354 inner_sum = getImmediateReward( a, i, next_state, 0 );
00355
00356 sum += P[a]->mat_val[j] * inner_sum;
00357
00358 }
00359
00360 addEntryToIMatrix( IQ, a, i, sum );
00361
00362 }
00363
00364 }
00365
00366 void convertMatrices() {
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383 int a;
00384 #if USE_DEBUG_PRINT
00385 struct timeval startTime, endTime;
00386 #endif
00387
00388
00389 P = (Matrix *) malloc( gNumActions * sizeof( *P ) );
00390 checkAllocatedPointer((void *) P );
00391
00392 R = (Matrix *) malloc( gNumActions * sizeof( *R ) );
00393 checkAllocatedPointer((void *) R );
00394
00395
00396
00397
00398 for( a = 0; a < gNumActions; a++ ) {
00399
00400 #if USE_DEBUG_PRINT
00401 printf("pomdp_spec: transforming transition matrix [a=%d]\n", a);
00402 #endif
00403
00404 P[a] = transformIMatrix( IP[a] );
00405 destroyIMatrix( IP[a] );
00406
00407 #if USE_DEBUG_PRINT
00408 printf("pomdp_spec: transforming obs matrix [a=%d]\n", a);
00409 #endif
00410
00411 if( gProblemType == POMDP_problem_type ) {
00412 R[a] = transformIMatrix( IR[a] );
00413 destroyIMatrix( IR[a] );
00414 }
00415
00416 }
00417
00418 free( IP );
00419
00420 if( gProblemType == POMDP_problem_type )
00421 free( IR );
00422
00423
00424
00425
00426 #if USE_DEBUG_PRINT
00427 printf("pomdp_spec: computing rewards\n");
00428 gettimeofday(&startTime, NULL);
00429 #endif
00430
00431 computeRewards();
00432
00433 #if USE_DEBUG_PRINT
00434 gettimeofday(&endTime, NULL);
00435 printf(" (took %lf seconds)\n",
00436 endTime.tv_sec - startTime.tv_sec + 1e-6 * (endTime.tv_usec - startTime.tv_usec));
00437 printf("pomdp_spec: transforming reward matrix\n");
00438 #endif
00439
00440
00441 Q = transformIMatrix( IQ );
00442 destroyIMatrix( IQ );
00443
00444 }
00445
00446 int writeMDP( char *filename ) {
00447 FILE *file;
00448 int a, i, j, obs;
00449
00450 if( (file = fopen( filename, "w" )) == NULL )
00451 return( 0 );
00452
00453 fprintf( file, "discount: %.6f\n", gDiscount );
00454
00455 if( gValueType == COST_value_type )
00456 fprintf( file, "values: cost\n" );
00457 else
00458 fprintf( file, "values: reward\n" );
00459
00460 fprintf( file, "states: %d\n", gNumStates );
00461 fprintf( file, "actions: %d\n", gNumActions );
00462
00463 if( gProblemType == POMDP_problem_type )
00464 fprintf( file, "observations: %d\n", gNumObservations );
00465
00466 for( a = 0; a < gNumActions; a++ )
00467 for( i = 0; i < gNumStates; i++ )
00468 for( j = P[a]->row_start[i];
00469 j < P[a]->row_start[i] + P[a]->row_length[i];
00470 j++ )
00471 fprintf( file, "T: %d : %d : %d %.6f\n",
00472 a, i, P[a]->col[j], P[a]->mat_val[j] );
00473
00474 if( gProblemType == POMDP_problem_type )
00475 for( a = 0; a < gNumActions; a++ )
00476 for( j = 0; j < gNumStates; j++ )
00477 for( obs = R[a]->row_start[j];
00478 obs < R[a]->row_start[j] + R[a]->row_length[j];
00479 obs++ )
00480 fprintf( file, "O: %d : %d : %d %.6f\n",
00481 a, j, R[a]->col[obs], R[a]->mat_val[obs] );
00482
00483 if( gProblemType == POMDP_problem_type )
00484 for( a = 0; a < gNumActions; a++ )
00485 for( i = Q->row_start[a];
00486 i < Q->row_start[a] + Q->row_length[a];
00487 i++ )
00488 fprintf( file, "R: %d : %d : * : * %.6f\n",
00489 a, Q->col[i], Q->mat_val[i] );
00490
00491 else
00492 for( a = 0; a < gNumActions; a++ )
00493 for( i = Q->row_start[a];
00494 i < Q->row_start[a] + Q->row_length[a];
00495 i++ )
00496 fprintf( file, "R: %d : %d : * %.6f\n",
00497 a, Q->col[i], Q->mat_val[i] );
00498
00499 fclose( file );
00500 return( 1 );
00501
00502 }
00503
00504 void deallocateMDP() {
00505 int a;
00506
00507 for( a = 0; a < gNumActions; a++ )
00508 {
00509 if(P!=NULL)
00510 {
00511 destroyMatrix( P[a] );
00512 }
00513
00514
00515 if( gProblemType == POMDP_problem_type )
00516 {
00517 if(R!= NULL)
00518 {
00519 destroyMatrix( R[a] );
00520 }
00521 }
00522 }
00523
00524 if(P!=NULL)
00525 {
00526 free( P );
00527 }
00528
00529
00530 if( gProblemType == POMDP_problem_type )
00531 {
00532 if(R!= NULL)
00533 {
00534 free( R );
00535 }
00536 if(gInitialBelief!= NULL)
00537 {
00538 free( gInitialBelief );
00539 }
00540
00541 }
00542
00543 destroyMatrix( Q );
00544
00545 destroyImmRewards();
00546
00547 }
00548
00549 void displayMDPSlice( int state ) {
00550
00551
00552
00553
00554 int a, j, obs;
00555
00556 if(( state < 0 ) || ( state >= gNumStates ) || ( gNumStates < 1 ))
00557 return;
00558
00559 printf( "MDP slice for state: %d\n", state );
00560
00561 for( a = 0; a < gNumActions; a++ )
00562 for( j = P[a]->row_start[state];
00563 j < P[a]->row_start[state] + P[a]->row_length[state];
00564 j++ )
00565 printf( "\tP( s=%d | s=%d, a=%d ) = %.6f\n",
00566 P[a]->col[j], state, a, P[a]->mat_val[j] );
00567
00568 if( gProblemType == POMDP_problem_type )
00569 for( a = 0; a < gNumActions; a++ )
00570 for( obs = R[a]->row_start[state];
00571 obs < R[a]->row_start[state] + R[a]->row_length[state];
00572 obs++ )
00573 printf( "\tP( o=%d | s=%d, a=%d ) = %.6f\n",
00574 R[a]->col[obs], state, a, R[a]->mat_val[obs] );
00575
00576 for( a = 0; a < gNumActions; a++ )
00577 printf( "\tQ( s=%d, a=%d ) = %5.6f\n",
00578 state, a, getEntryMatrix( Q, a, state ));
00579
00580 }
00581
00582 void memoryExhaustedErrorHandler()
00583 {
00584 deallocateMDP();
00585 printf("Not enough memory for parsing the POMDP file, exiting.\n");
00586 exit(-1);
00587 }
00588
00589 unsigned long GlobalMemLimit = 0;
00590
00591 void checkAllocatedPointer(void * ptr)
00592 {
00593 unsigned long memUsage = getCurrentProcessMemoryUsage();
00594
00595 if(GlobalMemLimit == 0)
00596 {
00597 GlobalMemLimit = getPlatformMemoryLimit();
00598 }
00599
00600
00601
00602
00603
00604 if(memUsage > GlobalMemLimit)
00605 {
00606 memoryExhaustedErrorHandler();
00607 }
00608
00609 if(ptr == NULL)
00610 {
00611 memoryExhaustedErrorHandler();
00612 }
00613 }
00614
00615
00616
00617