appl: SparseVector.cpp Source File

Go to the documentation of this file.
00001 #include "SparseVector.h"
00002 
00003 #include <iostream>
00004 #include <cassert>
00005 #include <sstream>
00006 #include <cmath>
00007 #include <iomanip>
00008 #include <stdexcept>
00009 #include <limits>
00010 
00011 #include "MathLib.h"
00012 #include "md5.h"
00013 using namespace std;
00014 
00015 using namespace momdp;
00016 namespace momdp
00017 {
00018     void SparseVector::add(SparseVector& result,        const SparseVector& x,  const SparseVector& y)
00019     {
00020         vector<SparseVector_Entry>::const_iterator  xi, xend;
00021 
00022         vector<SparseVector_Entry>::const_iterator  yi, yend;
00023 
00024         unsigned int xind, yind;
00025         bool xdone = false, ydone = false;
00026 
00027         assert( x.size() == y.size() );
00028         result.resize( x.size() );
00029 
00030 #define CHECK_X() \
00031         if (xi == xend) { \
00032             xdone = true; \
00033             goto main_loop_done; \
00034         } else { \
00035             xind = xi->index; \
00036         }
00037 
00038 #define CHECK_Y() \
00039         if (yi == yend) { \
00040             ydone = true; \
00041             goto main_loop_done; \
00042         } else { \
00043             yind = yi->index; \
00044         }
00045 
00046         xi = x.data.begin();
00047         yi = y.data.begin();
00048         xend = x.data.end();
00049         yend = y.data.end();
00050 
00051         CHECK_X();
00052         CHECK_Y();
00053 
00054         while (1) {
00055             if (xind < yind) {
00056                 result.push_back( xind, xi->value );
00057                 xi++;
00058                 CHECK_X();
00059             } else if (xind == yind) {
00060                 result.push_back( xind, xi->value + yi->value );
00061                 xi++;
00062                 yi++;
00063                 CHECK_X();
00064                 CHECK_Y();
00065             } else {
00066                 result.push_back( yind, yi->value );
00067                 yi++;
00068                 CHECK_Y();
00069             }
00070         }
00071 
00072 main_loop_done:
00073         if (!xdone) {
00074             for (; xi != xend; xi++) {
00075                 result.push_back( xi->index, xi->value );
00076             }
00077         } else if (!ydone) {
00078             for (; yi != yend; yi++) {
00079                 result.push_back( yi->index, yi->value );
00080             }
00081         }
00082 
00083         //result.finalize();
00084     }
00085 
00086     REAL_VALUE SparseVector::getEntropy()
00087     {
00088         REAL_VALUE entropy = 0;
00089         FOREACH (SparseVector_Entry, di, data) 
00090         {
00091             entropy += di->value * ( log(di->value) / log(2.0) );
00092         }
00093         entropy = (-1) * entropy;//negate summation
00094         return entropy;
00095     }
00096 
00097     void SparseVector::subtract(SparseVector& result,   const SparseVector& x, const SparseVector& y)
00098     {
00099         vector<SparseVector_Entry>::const_iterator  xi, xend;
00100 
00101         vector<SparseVector_Entry>::const_iterator  yi, yend;
00102 
00103         unsigned int xind, yind;
00104         bool xdone = false, ydone = false;
00105 
00106         assert( x.size() == y.size() );
00107         result.resize( x.size() );
00108 
00109 #define CHECK_X() \
00110         if (xi == xend) { \
00111             xdone = true; \
00112             goto main_loop_done; \
00113         } else { \
00114             xind = xi->index; \
00115         }
00116 
00117 #define CHECK_Y() \
00118         if (yi == yend) { \
00119             ydone = true; \
00120             goto main_loop_done; \
00121         } else { \
00122             yind = yi->index; \
00123         }
00124 
00125         xi = x.data.begin();
00126         yi = y.data.begin();
00127         xend = x.data.end();
00128         yend = y.data.end();
00129 
00130         CHECK_X();
00131         CHECK_Y();
00132 
00133         while (1) {
00134             if (xind < yind) {
00135                 result.push_back( xind, xi->value );
00136                 xi++;
00137                 CHECK_X();
00138             } else if (xind == yind) {
00139                 result.push_back( xind, xi->value - yi->value );
00140                 xi++;
00141                 yi++;
00142                 CHECK_X();
00143                 CHECK_Y();
00144             } else {
00145                 result.push_back( yind, -yi->value );
00146                 yi++;
00147                 CHECK_Y();
00148             }
00149         }
00150 
00151 main_loop_done:
00152         if (!xdone) {
00153             for (; xi != xend; xi++) {
00154                 result.push_back( xi->index, xi->value );
00155             }
00156         } else if (!ydone) {
00157             for (; yi != yend; yi++) {
00158                 result.push_back( yi->index, -yi->value );
00159             }
00160         }
00161 
00162         //result.finalize();
00163     }
00164 
00165 
00166     SparseVector::SparseVector(void) : logicalSize(0)
00167     {
00168 
00169     }
00170 
00171     SparseVector::~SparseVector(void)
00172     {
00173     }
00174 
00175 
00176     // Arithmetic
00177     int SparseVector::argSampleDist() const
00178     {
00179         REAL_VALUE randNumber = unit_rand();
00180         REAL_VALUE sum = 0.0;
00181         FOREACH(SparseVector_Entry, entry,  data) 
00182         {
00183             sum += entry->value;
00184             if(randNumber < sum )
00185             {
00186                 return entry->index; 
00187             }
00188         }
00189         return size()-1;
00190     }
00191 
00192     bool SparseVector::isDifferentByAtLeastSingleEntry(const SparseVector& x, const REAL_VALUE& threshold) const
00193     {
00194         std::vector<SparseVector_Entry>::const_iterator otherIter = x.data.begin();
00195         std::vector<SparseVector_Entry>::const_iterator iter = data.begin();
00196         for(; iter != data.end(); iter++)
00197         {
00198             while( (otherIter != x.data.end()) && (iter->index > otherIter->index))
00199             {
00200                 if(fabs(otherIter->value) > threshold)
00201                 {
00202                     return true;
00203                 }
00204                 otherIter++;
00205             }
00206 
00207             if((otherIter != x.data.end()) && (iter->index == otherIter->index))
00208             {
00209                 if(fabs(iter->value - otherIter->value) > threshold)
00210                 {
00211                     return true;
00212                 }
00213                 otherIter++;
00214             }
00215             else
00216             {
00217                 if( fabs(iter->value) > threshold)
00218                 {
00219                     return true;
00220                 }
00221             }
00222 
00223         }
00224         return false;
00225     }
00226 
00227     //return the index of maximum SparseEntry
00228     //in case of tie, only the index of first max is returned
00229     int SparseVector::argmax()
00230     {
00231         REAL_VALUE maxValue = -1 * numeric_limits<REAL_VALUE>::max();;
00232         int maxIndex = -1;
00233         FOREACH(SparseVector_Entry, di,  data) {
00234             if (di->value > maxValue) {
00235                 maxIndex = di->index;
00236                 maxValue = di->value;
00237             }
00238         }
00239         return maxIndex;
00240     }
00241 
00242 
00243     REAL_VALUE SparseVector::operator()( int index) const
00244     {
00245         FOREACH(SparseVector_Entry, di,  data) {
00246             if (di->index >= index) {
00247                 if (di->index == index) {
00248                     return di->value;
00249                 } else {
00250                     return 0.0;
00251                 }
00252             }
00253         }
00254         return 0.0;
00255     }
00256 
00257     void SparseVector::operator*=(REAL_VALUE s)
00258     {
00259         vector<SparseVector_Entry>::iterator  vi, vend = data.end();
00260 
00261         for (vi = data.begin(); vi != vend; vi++) {
00262             vi->value *= s;
00263         }
00264     }
00265 
00266     REAL_VALUE SparseVector::maskedSum( vector<int> mask)
00267     {
00268         REAL_VALUE result = 0.0;
00269         vector<SparseVector_Entry>::iterator  vi, vend = data.end();
00270 
00271         for (vi = data.begin(); vi != vend; vi++) 
00272         {
00273             if (!mask[vi->index]) 
00274             {
00275                 result += vi->value;
00276             }
00277         }
00278         return result;
00279     }
00280 
00281     void SparseVector::copyIndex( vector<int>& result)
00282     {
00283         vector<SparseVector_Entry>::iterator  vi, vend = data.end();
00284         for (vi = data.begin(); vi != vend; vi++) 
00285         {
00286             result.push_back(vi->index);
00287         }
00288     }
00289 
00290     void SparseVector::copyValue( vector<REAL_VALUE>& result)
00291     {
00292         vector<SparseVector_Entry>::iterator  vi, vend = data.end();
00293         for (vi = data.begin(); vi != vend; vi++) 
00294         {
00295             result.push_back(vi->value);
00296         }
00297     }
00298 
00299     // TODO:: this implementation may leak memory... check
00300 
00301     void SparseVector::operator+=(const SparseVector& x)
00302     {
00303         SparseVector tmp;
00304         add(tmp,*this,x);
00305         *this = tmp;
00306     }
00307 
00308     void SparseVector::operator-=(const SparseVector& x)
00309     {
00310         SparseVector tmp;
00311         subtract(tmp,*this,x);
00312         *this = tmp;
00313     }
00314 
00315     bool SparseVector::operator==(const SparseVector& x) const
00316     {
00317         if(x.logicalSize != logicalSize){
00318             return false;
00319         }
00320         std::vector<SparseVector_Entry>::const_iterator otherIter = x.data.begin();
00321         std::vector<SparseVector_Entry>::const_iterator iter = data.begin();
00322         for(; iter != data.end();
00323                 otherIter++, iter++){
00324             if(iter->index != otherIter -> index){
00325                 return false;
00326             }
00327             if(iter->value != otherIter -> value){
00328                 return false;
00329             }
00330         }
00331         return true;
00332     }
00333 
00334     REAL_VALUE SparseVector::delta(const SparseVector& x) const{
00335         REAL_VALUE del = 0;
00336         if(x.data.size() != data.size()){
00337             return 200000;
00338         }
00339         std::vector<SparseVector_Entry>::const_iterator otherIter = x.data.begin();
00340         std::vector<SparseVector_Entry>::const_iterator iter = data.begin();
00341         for(; iter != data.end(); iter++, otherIter++){
00342             if(iter->index != otherIter->index){
00343                 return 1000000;
00344             }
00345 
00346             if(fabs(iter->value - otherIter-> value) > del){
00347                 del = fabs(iter->value - otherIter->value);
00348             }
00349         }
00350         return del;
00351     }
00352 
00353 
00354     REAL_VALUE SparseVector::totalDifference(const SparseVector& x) const
00355     {
00356         REAL_VALUE del = 0;
00357 
00358         std::vector<SparseVector_Entry>::const_iterator otherIter = x.data.begin();
00359         std::vector<SparseVector_Entry>::const_iterator iter = data.begin();
00360         for(; iter != data.end(); iter++)
00361         {
00362             while( (otherIter != x.data.end()) && (iter->index > otherIter->index))
00363             {
00364                 del += fabs(otherIter->value);
00365                 otherIter++;
00366             }
00367 
00368             if((otherIter != x.data.end()) && (iter->index == otherIter->index))
00369             {
00370                 del += fabs(iter->value - otherIter->value);
00371                 otherIter++;
00372             }
00373             else
00374             {
00375                 del += fabs(iter->value);
00376             }
00377 
00378         }
00379         return del;
00380     }
00381 
00382     int SparseVector::sampleState() const
00383     {
00384         REAL_VALUE randNumber = unit_rand();
00385         REAL_VALUE sum = 0.0;
00386         FOREACH(SparseVector_Entry, entry,  data) 
00387         {
00388             sum += entry->value;
00389             if(randNumber < sum )
00390             {
00391                 return entry->index; 
00392             }
00393         }
00394         return logicalSize -1;
00395     }
00396 
00397     REAL_VALUE SparseVector::totalHashDifference(const SparseVector& x) const
00398     {
00399         REAL_VALUE del = 0;
00400 
00401         std::vector<SparseVector_Entry>::const_iterator otherIter = x.data.begin();
00402         std::vector<SparseVector_Entry>::const_iterator iter = data.begin();
00403         for(; iter != data.end(); iter++)
00404         {
00405             while( (otherIter != x.data.end()) && (iter->index > otherIter->index))
00406             {
00407                 REAL_VALUE toTruncate = otherIter->value;
00408                 toTruncate *=2^20; // TODO:: this truncation is different from the BeliefEntryTruncate function used in the hash generation function, fix it?
00409                 toTruncate = floor(toTruncate);
00410 
00411                 del += fabs(toTruncate);
00412                 otherIter++;
00413             }
00414 
00415             if((otherIter != x.data.end()) && (iter->index == otherIter->index))
00416             {
00417                 REAL_VALUE toTruncate = iter->value;
00418                 toTruncate *=2^20;
00419                 toTruncate = floor(toTruncate);
00420 
00421                 REAL_VALUE toTruncate2 = otherIter->value;
00422                 toTruncate2 *=2^20;
00423                 toTruncate2 = floor(toTruncate);
00424 
00425                 del += fabs(toTruncate - toTruncate2);
00426                 otherIter++;
00427             }
00428             else
00429             {
00430                 REAL_VALUE toTruncate = iter->value;
00431                 toTruncate *=2^20;
00432                 toTruncate = floor(toTruncate);
00433 
00434 
00435                 del += fabs(toTruncate);
00436             }
00437 
00438         }
00439         return del;
00440     }
00441 
00442     void SparseVector::resize( int _size)
00443     {
00444         logicalSize = _size;
00445         data.clear();
00446     }
00447 
00448     void SparseVector::push_back( int index, REAL_VALUE value)
00449     {
00450         data.push_back( SparseVector_Entry( index, value ) );
00451     }
00452 
00453     void SparseVector::read(std::istream& in)
00454     {
00455         int num_entries;
00456 
00457         in >> logicalSize;
00458         in >> num_entries;
00459         data.resize( num_entries );
00460         FOR (i, num_entries) {
00461             in >> data[i].index >> data[i].value;
00462         }
00463     }
00464 
00465     //@added by amit on 29th june
00466     std::ostream& SparseVector::write(std::ostream& out) const{
00467         out << "size: "<< logicalSize <<",\n data: [";
00468 
00469         for(std::vector<SparseVector_Entry>::const_iterator iter = data.begin();
00470                 iter != data.end(); iter++){
00471             out << iter->index << "= "<< iter->value;
00472             if(iter < data.end()-1){
00473                 out <<", ";
00474             }
00475             else{
00476                 out << "]";
00477             }
00478         }
00479         return out;
00480     }
00481 
00482     string SparseVector::ToString() const
00483     {
00484         stringstream out;
00485         out << "size: "<< logicalSize <<",\n data: [";
00486         printf("this: %X\n", this);
00487         cout << "size: "<< logicalSize <<",\n data: [";
00488 
00489         for(std::vector<SparseVector_Entry>::const_iterator iter = data.begin();        iter != data.end(); iter++)
00490         {
00491             printf("Iter: %X\n", iter);
00492             out << iter->index << "= "<< iter->value;
00493             if(iter < data.end()-1){
00494                 out <<", ";
00495             }
00496             else{
00497                 out << "]";
00498             }
00499         }
00500 
00501         out.flush();
00502         return out.str();
00503     }
00504 
00505 
00506     void SparseVector::finalize() 
00507     {
00508         // become immutable
00509         // TODO:: Temp Disabled
00510 
00511         md5hash = getHashFromSparseVectorTruncated(*this);
00512         //md5hash = getHashFromCVector(*this)   ;
00513     }
00514 
00515     string SparseVector::md5HashValue()
00516     {
00517         if(md5hash.size() ==0)
00518         {
00519             throw runtime_error("Bug, belief sparse vector need to call finalize() method to compute hash value");
00520         }
00521 
00522         // Sanity check
00523         //string str1 = getHashFromCVector(*this);
00524         //if(str1.compare(md5hash) != 0)
00525         //{
00526         //      throw runtime_error("Bug, cached md5 is different now, someone changed the sparse vector after finalized method....");
00527         //}
00528 
00529         return md5hash;
00530     }
00531 
00532     string SparseVector::convToString(unsigned char *bytes)
00533     {
00534         stringstream out;
00535 
00536         // TODO, use setf 
00537         //out.setf ( ios::hex, ios::basefield );       // set hex as the basefield
00538         //out.width(2);
00539 
00540         for(int i=0; i<16; i++)
00541         {
00542             out << setfill('0') << setw(2) << hex << (int) bytes[i];
00543         }       
00544 
00545         return out.str();
00546     }
00547 
00548     string SparseVector::getHashFromCVector(SparseVector& x)    
00549     {
00550         MD5_CTX context;
00551 
00552         unsigned char buffer[1024], digest[16];
00553 
00554         //init md5
00555         MD5 md5;
00556         md5.MD5Init (&context);
00557 
00558         std::vector<SparseVector_Entry>::const_iterator iter = x.data.begin();
00559 
00560 
00561         for(; iter != x.data.end();iter++)
00562         {
00563             int len1 = sizeof (iter->index);
00564             md5.MD5Update (&context, (unsigned char *)&(iter->index), len1);
00565 
00566             double toTruncate = iter->value;
00567             int len2 = sizeof (toTruncate);
00568             toTruncate *=2^20;
00569             toTruncate = floor(toTruncate);
00570             md5.MD5Update (&context, (unsigned char *)&(toTruncate), len2);
00571 
00572         }
00573         /*
00574            generate hash, close the file and return the
00575            hash as std::string
00576            */
00577         md5.MD5Final (digest, &context);
00578         string result = convToString(digest);
00579         return result;
00580     }   
00581     string SparseVector::getHashFromSparseVectorTruncated(SparseVector& x)      
00582     {
00583         MD5_CTX context;
00584 
00585         unsigned char digest[16];
00586 
00587         //init md5
00588         MD5 md5;
00589         md5.MD5Init (&context);
00590 
00591         std::vector<SparseVector_Entry>::const_iterator iter = x.data.begin();
00592 
00593 
00594         for(; iter != x.data.end();iter++)
00595         {
00596             int len1 = sizeof (iter->index);
00597             md5.MD5Update (&context, (unsigned char *)&(iter->index), len1);
00598 
00599             REAL_VALUE toTruncate = BeliefEntryTruncate(iter->value);
00600             int len2 = sizeof (toTruncate);
00601 
00602             md5.MD5Update (&context, (unsigned char *)&(toTruncate), len2);
00603 
00604         }
00605         /*
00606            generate hash, close the file and return the
00607            hash as std::string
00608            */
00609         md5.MD5Final (digest, &context);
00610         string result = convToString(digest);
00611         return result;
00612 
00613     }   
00614 
00615 }
00616