00001
00002
00003 #include "SparseTable.h"
00004 #include <algorithm>
00005
00006 void SparseRow::sortEntries(){
00007 stable_sort(entries.begin(),entries.end());
00008 }
00009
00010 bool SparseRow::onlyZeroUI(SparseEntry e){
00011 return e.hasOnlyZeroUI();
00012 }
00013
00014 void SparseRow::removeZeroEntries(){
00015 entries.erase(remove_if(entries.begin(),entries.end(),SparseRow::onlyZeroUI),entries.end());
00016 }
00017
00018 void SparseRow::removeRedundantEntries(){
00019 for (vector<SparseEntry>::iterator it=entries.begin();!entries.empty() && it!=entries.end()-1;) {
00020 if (*it ==*(it+1)) {
00021 it = entries.erase(it);
00022 }
00023 else{
00024 it++;
00025 }
00026 }
00027 }
00028
00029
00030
00031 double SparseRow::sumProbability(){
00032
00033 double prob = 0.0;
00034 for(int i=0;i<entries.size();i++){
00035 prob += entries[i].uniqueIndex[0].value;
00036 }
00037
00038 return prob;
00039
00040 }
00041
00042 SparseTable::SparseTable(vector<string> cIheader, vector<string> uIheader, vector<int> numCIValues, vector<int> numUIValues): cIheader(cIheader), uIheader(uIheader), numCIValues(numCIValues), numUIValues(numUIValues) {
00043 numOfRows = 1;
00044
00045
00046
00047
00048 for(vector<int>::iterator it=numCIValues.begin();it!=numCIValues.end();it++){
00049 numOfRows *= *it;
00050 }
00051 int numOfCommonIndex = cIheader.size();
00052 if(numOfRows == 0) numOfRows = 1;
00053
00054
00055 table = new SparseRow[numOfRows];
00056 mapIn = new int[numOfCommonIndex];
00057 mapOut = new int[numOfCommonIndex];
00058
00059
00060 for(int i=0;i<numOfCommonIndex;i++){
00061 mapIn[i] = i;
00062 mapOut[i] = i;
00063 }
00064
00065
00066 for(int i=0;i<numOfCommonIndex;i++){
00067 position.push_back(0);
00068 }
00069 rowPosition = 0;
00070
00071 }
00072
00073 SparseTable::SparseTable(SparseTable& B){
00074 cIheader = B.cIheader;
00075 uIheader = B.uIheader;
00076 numCIValues= B.numCIValues;
00077 numUIValues= B.numUIValues;
00078 numOfRows = 1;
00079
00080
00081
00082
00083 for(vector<int>::iterator it=numCIValues.begin();it!=numCIValues.end();it++){
00084 numOfRows *= *it;
00085 }
00086 int numOfCommonIndex = this->cIheader.size();
00087 if(numOfRows == 0) numOfRows = 1;
00088
00089
00090 table = new SparseRow[numOfRows];
00091 mapIn = new int[numOfCommonIndex];
00092 mapOut = new int[numOfCommonIndex];
00093
00094
00095 for(int i=0;i<numOfCommonIndex;i++){
00096 mapIn[i] = B.mapIn[i];
00097 mapOut[i] = B.mapOut[i];
00098 }
00099
00100 for(int i=0;i<numOfRows;i++){
00101 table[i] = B.table[i];
00102 }
00103
00104
00105 for(int i=0;i<numOfCommonIndex;i++){
00106 position.push_back(0);
00107 }
00108 rowPosition = 0;
00109
00110 }
00111
00112
00113 void SparseTable::resetIterator(){
00114 for(int i=0;i<position.size();i++){
00115 position[i] = 0;
00116 }
00117 rowPosition=0;
00118 }
00119
00120 vector<int> SparseTable::getIterBegin(){
00121 return vector<int>(cIheader.size(), 0);
00122 }
00123
00124 vector<int> SparseTable::getIterPosition(){
00125 return position;
00126 }
00127
00128
00129
00130
00131 bool SparseTable::errorInProbabilities(vector<vector<int> >& commonIndices, vector<double>& probs) {
00132 bool error = false;
00133 for(int i=0;i<numOfRows;i++){
00134 double prob;
00135 prob = table[i].sumProbability();
00136
00137 if ((fabs(prob - 1.0) > 0.000001)) {
00138 probs.push_back(prob);
00139 commonIndices.push_back(getCommonIndex(i));
00140 error=true;
00141 }
00142 }
00143 return error;
00144 }
00145
00146
00147 bool SparseTable::getNext(SparseEntry& se){
00148 vector<SparseEntry>* curRow = &getSparseEntries(position);
00149
00150
00151 while(rowPosition >= curRow->size()){
00152
00153 rowPosition = 0;
00154 if(getNextCI(position)){
00155 curRow = &getSparseEntries(position);
00156 }
00157 else{
00158
00159 return false;
00160 }
00161 }
00162
00163 se = (*curRow)[rowPosition];
00164 rowPosition++;
00165 return true;
00166 }
00167
00168
00169 bool SparseTable::getNextCI(vector<int>& CI){
00170 assert(CI.size() == cIheader.size());
00171 bool success = false;
00172 for(int i=CI.size()-1;i>=0;i--){
00173 if(CI[i]+1 >= numCIValues[i]){
00174 CI[i] = 0;
00175 }
00176 else{
00177 CI[i]++;
00178 success = true;
00179 break;
00180 }
00181 }
00182 return success;
00183 }
00184
00185 SparseTable::~SparseTable()
00186 {
00187 delete [] mapIn;
00188 delete [] mapOut;
00189 delete [] table;
00190 }
00191
00192 void SparseTable::sortEntries(){
00193 for(int i=0;i<numOfRows;i++){
00194 table[i].sortEntries();
00195 }
00196 }
00197
00198 void SparseTable::swapCIHeaders(int i, int j) {
00199 string tempHeaderEntry;
00200
00201 if (i < 0 || j < 0 || (unsigned) i >= cIheader.size() || (unsigned) j
00202 >= cIheader.size()) {
00203 cout << "Out of Index exception for header columns to be swapped."
00204 << endl;
00205 cout << "Check again!" << endl;
00206 exit(-1);
00207 }
00208
00209 tempHeaderEntry = cIheader[i];
00210 cIheader[i] = cIheader[j];
00211 cIheader[j] = tempHeaderEntry;
00212
00213 int tempValueEntry = numCIValues[i];
00214 numCIValues[i] = numCIValues[j];
00215 numCIValues[j] = tempValueEntry;
00216 }
00217
00218
00219 int SparseTable::getTableIndex(vector<int> commonIndex){
00220 assert(cIheader.size()==1 || commonIndex.size() == cIheader.size());
00221 int index = 0;
00222 int increment = 1;
00223
00224 for(int i=commonIndex.size()-1;i>=0;i--){
00225 assert(mapOut[i] <= commonIndex.size());
00226 index += commonIndex[mapOut[i]] * increment;
00227 increment *= numCIValues[mapOut[i]];
00228 }
00229 if(index >= numOfRows){
00230 cout << index <<" " << numOfRows << endl;
00231 assert(false);
00232 }
00233
00234 return index;
00235 }
00236
00237
00238 vector<int> SparseTable::getCommonIndex(int index){
00239 if(index >= numOfRows)
00240 assert(false);
00241
00242 vector<int> CIs;
00243 for(int i=numCIValues.size()-1;i>=0;i--){
00244 int idx = index % numCIValues[mapOut[i]];
00245 index /= numCIValues[mapOut[i]];
00246 CIs.insert(CIs.begin(),idx);
00247 }
00248
00249 vector<int> newCIs;
00250
00251 for(int i=0;i<numCIValues.size();i++){
00252 newCIs.push_back(CIs[mapOut[i]]);
00253 }
00254
00255 return newCIs;
00256 }
00257
00258 vector<SparseEntry>& SparseTable::getSparseEntries(vector<int> commonIndex){
00259 return table[getTableIndex(commonIndex)].entries;
00260 }
00261
00262
00263 void SparseTable::add(vector<int> commonIndex, SparseEntry se) {
00264 int index = getTableIndex(commonIndex);
00265 table[index].entries.push_back(se);
00266 }
00267
00268 bool SparseTable::checkNoMissingEntries(vector<int>& commonIndex){
00269 for(int i=0;i<numOfRows;i++){
00270
00271 if(table[i].entries.size()==0){
00272 commonIndex = getCommonIndex(i);
00273 return false;
00274 }
00275 }
00276 return true;
00277 }
00278
00279 void SparseTable::convertForUse() {
00280 sortEntries();
00281 for(int i=0;i<numOfRows;i++){
00282 table[i].removeRedundantEntries();
00283 }
00284 for(int i=0;i<numOfRows;i++){
00285 table[i].removeZeroEntries();
00286 }
00287 }
00288
00289 void SparseTable::removeRedundant(){
00290 for(int i=0;i<numOfRows;i++){
00291 table[i].removeRedundantEntries();
00292 }
00293 }
00294
00295 SharedPointer<SparseTable> SparseTable::removeUnmatchedCI(int cIIndex, int uIIndex) {
00296
00297 vector<string> cIheader, uIheader;
00298 vector<int> numUIValues, numCIValues;
00299
00300 for (unsigned int i = 0; i < this->cIheader.size(); i++) {
00301 if ( i != cIIndex) {
00302 cIheader.push_back(this->cIheader[i]);
00303 numCIValues.push_back(this->numCIValues[i]);
00304 }
00305 }
00306
00307 for (unsigned int i = 0 ; i < this->uIheader.size(); i++) {
00308 uIheader.push_back(this->uIheader[i]);
00309 numUIValues.push_back(this->numUIValues[i]);
00310 }
00311 SharedPointer<SparseTable> removedcITable (new SparseTable(cIheader, uIheader, numCIValues, numUIValues));
00312
00313 resetIterator();
00314 SparseEntry se;
00315 while(getNext(se)){
00316
00317
00318 vector<int> commonIndex = getIterPosition();
00319 vector<int> newCI;
00320 if (commonIndex[cIIndex] ==se.uniqueIndex[uIIndex].index) {
00321 SparseEntry newSe;
00322
00323 for (unsigned int j = 0 ; j < commonIndex.size(); j++) {
00324 if (j != cIIndex)
00325 newCI.push_back(commonIndex[j]);
00326 }
00327 for (unsigned int j = 0 ; j < se.uniqueIndex.size(); j++) {
00328 newSe.uniqueIndex.push_back(se.uniqueIndex[j]);
00329 }
00330 removedcITable->add(newCI, newSe);
00331 }
00332 }
00333
00334 return removedcITable;
00335 }
00336
00337
00338
00339
00340 SharedPointer<SparseTable> SparseTable::joinHeader(SparseTable& A, SparseTable& B, int& numCommonIndexes){
00341 unsigned int pos;
00342 vector<string> indexes;
00343
00344 indexes = A.findIntersectingCI(SharedPointer<SparseTable>(&B));
00345
00346 for (unsigned int j = 0; j < indexes.size(); j++) {
00347 pos = B.findPosition(indexes[j]);
00348 B.swapCIHeaders(j, pos);
00349 B.swapSparseColumns(j, pos);
00350 }
00351 B.sortEntries();
00352
00353
00354 for (unsigned int j = 0; j < indexes.size(); j++)
00355 {
00356 pos = A.findPosition(indexes[j]);
00357 A.swapCIHeaders(j, pos);
00358 A.swapSparseColumns(j, pos);
00359
00360 }
00361 A.sortEntries();
00362
00363
00364 vector<string> cIheader, uIheader;
00365 vector<int> numCIValues,numUIValues;
00366
00367
00368 for (unsigned int i = 0; i < A.cIheader.size(); i++) {
00369 cIheader.push_back(A.cIheader[i]);
00370 numCIValues.push_back(A.numCIValues[i]);
00371 }
00372
00373
00374 numCommonIndexes = 0;
00375 for (unsigned int i = 0; i < B.cIheader.size(); i++) {
00376 bool alreadyHas = false;
00377 for (unsigned int j = 0; j < cIheader.size(); j++) {
00378 if (cIheader[j] == B.cIheader[i]) {
00379 alreadyHas = true;
00380 numCommonIndexes++;
00381 }
00382 }
00383 if (!(alreadyHas)) {
00384 cIheader.push_back(B.cIheader[i]);
00385 numCIValues.push_back(B.numCIValues[i]);
00386 }
00387 }
00388
00389
00390 for (unsigned int i = 0; i < A.uIheader.size(); i++) {
00391 uIheader.push_back(A.uIheader[i]);
00392 numUIValues.push_back(A.numUIValues[i]);
00393 }
00394 for (unsigned int i = 0; i < B.uIheader.size(); i++) {
00395 uIheader.push_back(B.uIheader[i]);
00396 numUIValues.push_back(B.numUIValues[i]);
00397 }
00398
00399 return SharedPointer<SparseTable>(new SparseTable(cIheader, uIheader, numCIValues, numUIValues));
00400 }
00401
00402
00403 SharedPointer<SparseTable> SparseTable::join(SparseTable& A, SparseTable& B, int whichFunction) {
00404 int numCommonIndexes;
00405
00406
00407 SharedPointer<SparseTable> C = SparseTable::joinHeader(A,B, numCommonIndexes);
00408
00409
00410 vector<int> Cpos = C->getIterBegin();
00411 do{
00412
00413
00414
00415 vector<int> Apos, Bpos;
00416 for(int i=0;i<A.cIheader.size();i++){
00417 Apos.push_back(Cpos[i]);
00418 }
00419
00420 for(int i=0;i<numCommonIndexes;i++){
00421 Bpos.push_back(Cpos[i]);
00422 }
00423
00424 for(int i=A.cIheader.size();i<C->cIheader.size();i++){
00425 Bpos.push_back(Cpos[i]);
00426 }
00427
00428 vector<SparseEntry> aEntries = A.getSparseEntries(Apos);
00429 vector<SparseEntry> bEntries = B.getSparseEntries(Bpos);
00430
00431
00432 if ((whichFunction == REWARDFUNCTION) || (whichFunction == TERMINALFUNCTION)) {
00433
00434 double sum = 0;
00435 for(int i=0;i<aEntries.size();i++){
00436 sum += aEntries[i].uniqueIndex[0].value;
00437 }
00438 for(int i=0;i<bEntries.size();i++){
00439 sum += bEntries[i].uniqueIndex[0].value;
00440 }
00441
00442
00443
00444 if(sum!=0.0 || (whichFunction == TERMINALFUNCTION && (!aEntries.empty() || !bEntries.empty()))){
00445
00446 SparseEntry newEntry;
00447 UniqueIndex ui;
00448 ui.index = 0;
00449 ui.value = sum;
00450 newEntry.uniqueIndex.push_back(ui);
00451
00452 C->add(Cpos, newEntry);
00453 }
00454 }
00455 else{
00456
00457 for(int i=0;i<aEntries.size();i++){
00458 for(int k=0;k<bEntries.size();k++){
00459 C->add(Cpos, SparseTable::mergeSparseEntry(aEntries[i],bEntries[k],numCommonIndexes));
00460 }
00461 }
00462 }
00463 }
00464 while(C->getNextCI(Cpos));
00465 return C;
00466 }
00467
00468
00469 SparseEntry SparseTable::mergeSparseEntry(SparseEntry& A, SparseEntry& B,
00470 int numCommonIndexes) {
00471 SparseEntry C;
00472 for (unsigned int i = 0; i < A.uniqueIndex.size(); i++)
00473 C.uniqueIndex.push_back(A.uniqueIndex[i]);
00474 for (unsigned int i = 0; i < B.uniqueIndex.size(); i++)
00475 C.uniqueIndex.push_back(B.uniqueIndex[i]);
00476 return C;
00477 }
00478
00479 void SparseTable::swapSparseColumns(int i, int j) {
00480 int tempEntryValue;
00481
00482 if (i < 0 || j < 0 || (unsigned) i >= cIheader.size()
00483 || (unsigned) j >= cIheader.size()) {
00484 cout << "Out of Index exception for columns to be swapped." << endl;
00485 cout << "Check again!" << endl;
00486 exit(-1);
00487 }
00488
00489
00490
00491
00492
00493
00494
00495 int temp = mapIn[i];
00496 mapIn[i] = mapIn[j];
00497 mapIn[j] = temp;
00498
00499 for(int i=0;i<cIheader.size();i++){
00500 mapOut[mapIn[i]] = i;
00501 }
00502 }
00503
00504 vector<string> SparseTable::findIntersectingCI(SharedPointer<SparseTable> st) {
00505 vector<string> index;
00506
00507 for (unsigned int i = 0; i < cIheader.size(); i++) {
00508 for (unsigned int j = 0; j < st->cIheader.size(); j++) {
00509 if (st->cIheader[j] == cIheader[i]) {
00510 index.push_back(cIheader[i]);
00511 break;
00512 }
00513 }
00514 }
00515
00516 return index;
00517 }
00518
00519 unsigned int SparseTable::findPosition(string word) {
00520 for (unsigned int i = 0; i < cIheader.size(); i++) {
00521 if (cIheader[i] == word)
00522 return i;
00523 }
00524 assert(false);
00525 }
00526
00527 bool SparseTable::containsCI(string word) {
00528 for (unsigned int i=0; i < cIheader.size(); i++) {
00529 if (cIheader[i] == word)
00530 return true;
00531 }
00532 return false;
00533 }
00534
00535 int SparseTable::size(){
00536 int size=0;
00537 for(int i=0;i<numOfRows;i++){
00538 size += table[i].entries.size();
00539 }
00540 return size;
00541 }
00542
00543 string SparseTable::getInfo()
00544 {
00545 stringstream sstream;
00546 sstream << "\nSparseTable size: " << size() << endl;
00547
00548 sstream << "Headers Common Indexes" << endl;
00549 for (unsigned int i = 0; i < cIheader.size(); i++)
00550 sstream << cIheader[i] << " ";
00551 sstream << "\nHeaders Unique Indexes" << endl;
00552 for (unsigned int i = 0; i < uIheader.size(); i++)
00553 sstream << uIheader[i] << " ";
00554 sstream << endl;
00555 return sstream.str();
00556 }
00557
00558 void SparseTable::write(std::ostream& out) {
00559
00560 out << "\nSparseTable size: " << size() << endl;
00561 out << "Headers Common Indexes" << endl;
00562 for (unsigned int i = 0; i < cIheader.size(); i++)
00563 out << cIheader[i] << " ";
00564 out << "\nHeaders Unique Indexes" << endl;
00565 for (unsigned int i = 0; i < uIheader.size(); i++)
00566 out << uIheader[i] << " ";
00567 out << endl;
00568
00569 resetIterator();
00570 SparseEntry se;
00571 int numEntries = 0;
00572 while(getNext(se)){
00573 vector<int> commonIndex = getIterPosition();
00574 if(!(cIheader[0] == "null")){
00575 for (unsigned int j = 0; j < commonIndex.size(); j++) {
00576 out << commonIndex[j] << " ";
00577 }
00578 }
00579 out << " unique index: ";
00580 vector<UniqueIndex> ui = se.uniqueIndex;
00581 for (unsigned int j = 0; j < ui.size(); j++) {
00582 out << ui[j].index << "=" << ui[j].value << " ";
00583 }
00584 out << endl;
00585 numEntries++;
00586 }
00587 }