IceRevisitedRadix.cpp
Go to the documentation of this file.
00001 
00002 
00008 
00009 
00011 
00039 
00040 
00041 /*
00042 To do:
00043         - add an offset parameter between two input values (avoid some data recopy sometimes)
00044         - unroll ? asm ?
00045         - 11 bits trick & 3 passes as Michael did
00046         - prefetch stuff the day I have a P3
00047         - make a version with 16-bits indices ?
00048 */
00049 
00051 // Precompiled Header
00052 #include "Stdafx.h"
00053 
00054 using namespace IceCore;
00055 
00056 #define INVALIDATE_RANKS        mCurrentSize|=0x80000000
00057 #define VALIDATE_RANKS          mCurrentSize&=0x7fffffff
00058 #define CURRENT_SIZE            (mCurrentSize&0x7fffffff)
00059 #define INVALID_RANKS           (mCurrentSize&0x80000000)
00060 
00061 #define CHECK_RESIZE(n)                                                                                                                                         \
00062         if(n!=mPreviousSize)                                                                                                                                    \
00063         {                                                                                                                                                                               \
00064                                 if(n>mCurrentSize)      Resize(n);                                                                                              \
00065                 else                                            ResetRanks();                                                                                   \
00066                 mPreviousSize = n;                                                                                                                                      \
00067         }
00068 
00069 #define CREATE_HISTOGRAMS(type, buffer)                                                                                                         \
00070         /* Clear counters/histograms */                                                                                                                 \
00071         ZeroMemory(mHistogram, 256*4*sizeof(udword));                                                                                   \
00072                                                                                                                                                                                         \
00073         /* Prepare to count */                                                                                                                                  \
00074         ubyte* p = (ubyte*)input;                                                                                                                               \
00075         ubyte* pe = &p[nb*4];                                                                                                                                   \
00076         udword* h0= &mHistogram[0];             /* Histogram for first pass (LSB)       */                                      \
00077         udword* h1= &mHistogram[256];   /* Histogram for second pass            */                                      \
00078         udword* h2= &mHistogram[512];   /* Histogram for third pass                     */                                      \
00079         udword* h3= &mHistogram[768];   /* Histogram for last pass (MSB)        */                                      \
00080                                                                                                                                                                                         \
00081         bool AlreadySorted = true;      /* Optimism... */                                                                                       \
00082                                                                                                                                                                                         \
00083         if(INVALID_RANKS)                                                                                                                                               \
00084         {                                                                                                                                                                               \
00085                 /* Prepare for temporal coherence */                                                                                            \
00086                 type* Running = (type*)buffer;                                                                                                          \
00087                 type PrevVal = *Running;                                                                                                                        \
00088                                                                                                                                                                                         \
00089                 while(p!=pe)                                                                                                                                            \
00090                 {                                                                                                                                                                       \
00091                         /* Read input buffer in previous sorted order */                                                                \
00092                         type Val = *Running++;                                                                                                                  \
00093                         /* Check whether already sorted or not */                                                                               \
00094                         if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */                               \
00095                         /* Update for next iteration */                                                                                                 \
00096                         PrevVal = Val;                                                                                                                                  \
00097                                                                                                                                                                                         \
00098                         /* Create histograms */                                                                                                                 \
00099                         h0[*p++]++;     h1[*p++]++;     h2[*p++]++;     h3[*p++]++;                                                                     \
00100                 }                                                                                                                                                                       \
00101                                                                                                                                                                                         \
00102                 /* If all input values are already sorted, we just have to return and leave the */      \
00103                 /* previous list unchanged. That way the routine may take advantage of temporal */      \
00104                 /* coherence, for example when used to sort transparent faces.                                  */      \
00105                 if(AlreadySorted)                                                                                                                                       \
00106                 {                                                                                                                                                                       \
00107                         mNbHits++;                                                                                                                                              \
00108                         for(udword i=0;i<nb;i++)        mRanks[i] = i;                                                                          \
00109                         return *this;                                                                                                                                   \
00110                 }                                                                                                                                                                       \
00111         }                                                                                                                                                                               \
00112         else                                                                                                                                                                    \
00113         {                                                                                                                                                                               \
00114                 /* Prepare for temporal coherence */                                                                                            \
00115                 udword* Indices = mRanks;                                                                                                                       \
00116                 type PrevVal = (type)buffer[*Indices];                                                                                          \
00117                                                                                                                                                                                         \
00118                 while(p!=pe)                                                                                                                                            \
00119                 {                                                                                                                                                                       \
00120                         /* Read input buffer in previous sorted order */                                                                \
00121                         type Val = (type)buffer[*Indices++];                                                                                    \
00122                         /* Check whether already sorted or not */                                                                               \
00123                         if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */                               \
00124                         /* Update for next iteration */                                                                                                 \
00125                         PrevVal = Val;                                                                                                                                  \
00126                                                                                                                                                                                         \
00127                         /* Create histograms */                                                                                                                 \
00128                         h0[*p++]++;     h1[*p++]++;     h2[*p++]++;     h3[*p++]++;                                                                     \
00129                 }                                                                                                                                                                       \
00130                                                                                                                                                                                         \
00131                 /* If all input values are already sorted, we just have to return and leave the */      \
00132                 /* previous list unchanged. That way the routine may take advantage of temporal */      \
00133                 /* coherence, for example when used to sort transparent faces.                                  */      \
00134                 if(AlreadySorted)       { mNbHits++; return *this;      }                                                                       \
00135         }                                                                                                                                                                               \
00136                                                                                                                                                                                         \
00137         /* Else there has been an early out and we must finish computing the histograms */              \
00138         while(p!=pe)                                                                                                                                                    \
00139         {                                                                                                                                                                               \
00140                 /* Create histograms without the previous overhead */                                                           \
00141                 h0[*p++]++;     h1[*p++]++;     h2[*p++]++;     h3[*p++]++;                                                                             \
00142         }
00143 
00144 #define CHECK_PASS_VALIDITY(pass)                                                                                                                       \
00145         /* Shortcut to current counters */                                                                                                              \
00146         udword* CurCount = &mHistogram[pass<<8];                                                                                                \
00147                                                                                                                                                                                         \
00148         /* Reset flag. The sorting pass is supposed to be performed. (default) */                               \
00149         bool PerformPass = true;                                                                                                                                \
00150                                                                                                                                                                                         \
00151         /* Check pass validity */                                                                                                                               \
00152                                                                                                                                                                                         \
00153         /* If all values have the same byte, sorting is useless. */                                                             \
00154         /* It may happen when sorting bytes or words instead of dwords. */                                              \
00155         /* This routine actually sorts words faster than dwords, and bytes */                                   \
00156         /* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */                             \
00157         /* for words and O(n) for bytes. Running time for floats depends on actual values... */ \
00158                                                                                                                                                                                         \
00159         /* Get first byte */                                                                                                                                    \
00160         ubyte UniqueVal = *(((ubyte*)input)+pass);                                                                                              \
00161                                                                                                                                                                                         \
00162         /* Check that byte's counter */                                                                                                                 \
00163         if(CurCount[UniqueVal]==nb)     PerformPass=false;
00164 
00166 
00169 
00170 RadixSort::RadixSort() : mCurrentSize(0), mRanks(null), mRanks2(null), mTotalCalls(0), mNbHits(0)
00171 {
00172 #ifndef RADIX_LOCAL_RAM
00173         // Allocate input-independent ram
00174         mHistogram      = new udword[256*4];
00175         mOffset         = new udword[256];
00176 #endif
00177         // Initialize indices
00178         INVALIDATE_RANKS;
00179 }
00180 
00182 
00185 
00186 RadixSort::~RadixSort()
00187 {
00188         // Release everything
00189 #ifndef RADIX_LOCAL_RAM
00190         DELETEARRAY(mOffset);
00191         DELETEARRAY(mHistogram);
00192 #endif
00193         DELETEARRAY(mRanks2);
00194         DELETEARRAY(mRanks);
00195 }
00196 
00198 
00203 
00204 bool RadixSort::Resize(udword nb)
00205 {
00206         // Free previously used ram
00207         DELETEARRAY(mRanks2);
00208         DELETEARRAY(mRanks);
00209 
00210         // Get some fresh one
00211         mRanks  = new udword[nb];       CHECKALLOC(mRanks);
00212         mRanks2 = new udword[nb];       CHECKALLOC(mRanks2);
00213 
00214         return true;
00215 }
00216 
00217 inline_ void RadixSort::CheckResize(udword nb)
00218 {
00219         udword CurSize = CURRENT_SIZE;
00220         if(nb!=CurSize)
00221         {
00222                 if(nb>CurSize)  Resize(nb);
00223                 mCurrentSize = nb;
00224                 INVALIDATE_RANKS;
00225         }
00226 }
00227 
00229 
00237 
00238 RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint)
00239 {
00240         // Checkings
00241         if(!input || !nb || nb&0x80000000)      return *this;
00242 
00243         // Stats
00244         mTotalCalls++;
00245 
00246         // Resize lists if needed
00247         CheckResize(nb);
00248 
00249 #ifdef RADIX_LOCAL_RAM
00250         // Allocate histograms & offsets on the stack
00251         udword mHistogram[256*4];
00252 //      udword mOffset[256];
00253         udword* mLink[256];
00254 #endif
00255 
00256         // Create histograms (counters). Counters for all passes are created in one run.
00257         // Pros:        read input buffer once instead of four times
00258         // Cons:        mHistogram is 4Kb instead of 1Kb
00259         // We must take care of signed/unsigned values for temporal coherence.... I just
00260         // have 2 code paths even if just a single opcode changes. Self-modifying code, someone?
00261         if(hint==RADIX_UNSIGNED)        { CREATE_HISTOGRAMS(udword, input);     }
00262         else                                            { CREATE_HISTOGRAMS(sdword, input);     }
00263 
00264         // Compute #negative values involved if needed
00265         udword NbNegativeValues = 0;
00266         if(hint==RADIX_SIGNED)
00267         {
00268                 // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
00269                 // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
00270                 // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
00271                 udword* h3= &mHistogram[768];
00272                 for(udword i=128;i<256;i++)     NbNegativeValues += h3[i];      // 768 for last histogram, 128 for negative part
00273         }
00274 
00275         // Radix sort, j is the pass number (0=LSB, 3=MSB)
00276         for(udword j=0;j<4;j++)
00277         {
00278                 CHECK_PASS_VALIDITY(j);
00279 
00280                 // Sometimes the fourth (negative) pass is skipped because all numbers are negative and the MSB is 0xFF (for example). This is
00281                 // not a problem, numbers are correctly sorted anyway.
00282                 if(PerformPass)
00283                 {
00284                         // Should we care about negative values?
00285                         if(j!=3 || hint==RADIX_UNSIGNED)
00286                         {
00287                                 // Here we deal with positive values only
00288 
00289                                 // Create offsets
00290 //                              mOffset[0] = 0;
00291 //                              for(udword i=1;i<256;i++)               mOffset[i] = mOffset[i-1] + CurCount[i-1];
00292                                 mLink[0] = mRanks2;
00293                                 for(udword i=1;i<256;i++)               mLink[i] = mLink[i-1] + CurCount[i-1];
00294                         }
00295                         else
00296                         {
00297                                 // This is a special case to correctly handle negative integers. They're sorted in the right order but at the wrong place.
00298 
00299                                 // Create biased offsets, in order for negative numbers to be sorted as well
00300 //                              mOffset[0] = NbNegativeValues;                                                                                          // First positive number takes place after the negative ones
00301                                 mLink[0] = &mRanks2[NbNegativeValues];                                                                          // First positive number takes place after the negative ones
00302 //                              for(udword i=1;i<128;i++)               mOffset[i] = mOffset[i-1] + CurCount[i-1];      // 1 to 128 for positive numbers
00303                                 for(udword i=1;i<128;i++)               mLink[i] = mLink[i-1] + CurCount[i-1];          // 1 to 128 for positive numbers
00304 
00305                                 // Fixing the wrong place for negative values
00306 //                              mOffset[128] = 0;
00307                                 mLink[128] = mRanks2;
00308 //                              for(i=129;i<256;i++)                    mOffset[i] = mOffset[i-1] + CurCount[i-1];
00309                                 for(udword i=129;i<256;i++)             mLink[i] = mLink[i-1] + CurCount[i-1];
00310                         }
00311 
00312                         // Perform Radix Sort
00313                         ubyte* InputBytes       = (ubyte*)input;
00314                         InputBytes += j;
00315                         if(INVALID_RANKS)
00316                         {
00317 //                              for(udword i=0;i<nb;i++)        mRanks2[mOffset[InputBytes[i<<2]]++] = i;
00318                                 for(udword i=0;i<nb;i++)        *mLink[InputBytes[i<<2]]++ = i;
00319                                 VALIDATE_RANKS;
00320                         }
00321                         else
00322                         {
00323                                 udword* Indices         = mRanks;
00324                                 udword* IndicesEnd      = &mRanks[nb];
00325                                 while(Indices!=IndicesEnd)
00326                                 {
00327                                         udword id = *Indices++;
00328 //                                      mRanks2[mOffset[InputBytes[id<<2]]++] = id;
00329                                         *mLink[InputBytes[id<<2]]++ = id;
00330                                 }
00331                         }
00332 
00333                         // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
00334                         udword* Tmp     = mRanks;       mRanks = mRanks2; mRanks2 = Tmp;
00335                 }
00336         }
00337         return *this;
00338 }
00339 
00341 
00349 
00350 RadixSort& RadixSort::Sort(const float* input2, udword nb)
00351 {
00352         // Checkings
00353         if(!input2 || !nb || nb&0x80000000)     return *this;
00354 
00355         // Stats
00356         mTotalCalls++;
00357 
00358         udword* input = (udword*)input2;
00359 
00360         // Resize lists if needed
00361         CheckResize(nb);
00362 
00363 #ifdef RADIX_LOCAL_RAM
00364         // Allocate histograms & offsets on the stack
00365         udword mHistogram[256*4];
00366 //      udword mOffset[256];
00367         udword* mLink[256];
00368 #endif
00369 
00370         // Create histograms (counters). Counters for all passes are created in one run.
00371         // Pros:        read input buffer once instead of four times
00372         // Cons:        mHistogram is 4Kb instead of 1Kb
00373         // Floating-point values are always supposed to be signed values, so there's only one code path there.
00374         // Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
00375         // is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
00376         // generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
00377         // wouldn't work with mixed positive/negative values....
00378         { CREATE_HISTOGRAMS(float, input2); }
00379 
00380         // Compute #negative values involved if needed
00381         udword NbNegativeValues = 0;
00382         // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
00383         // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
00384         // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
00385         udword* h3= &mHistogram[768];
00386         for(udword i=128;i<256;i++)     NbNegativeValues += h3[i];      // 768 for last histogram, 128 for negative part
00387 
00388         // Radix sort, j is the pass number (0=LSB, 3=MSB)
00389         for(udword j=0;j<4;j++)
00390         {
00391                 // Should we care about negative values?
00392                 if(j!=3)
00393                 {
00394                         // Here we deal with positive values only
00395                         CHECK_PASS_VALIDITY(j);
00396 
00397                         if(PerformPass)
00398                         {
00399                                 // Create offsets
00400 //                              mOffset[0] = 0;
00401                                 mLink[0] = mRanks2;
00402 //                              for(udword i=1;i<256;i++)               mOffset[i] = mOffset[i-1] + CurCount[i-1];
00403                                 for(udword i=1;i<256;i++)               mLink[i] = mLink[i-1] + CurCount[i-1];
00404 
00405                                 // Perform Radix Sort
00406                                 ubyte* InputBytes = (ubyte*)input;
00407                                 InputBytes += j;
00408                                 if(INVALID_RANKS)
00409                                 {
00410 //                                      for(i=0;i<nb;i++)       mRanks2[mOffset[InputBytes[i<<2]]++] = i;
00411                                         for(udword i=0;i<nb;i++)        *mLink[InputBytes[i<<2]]++ = i;
00412                                         VALIDATE_RANKS;
00413                                 }
00414                                 else
00415                                 {
00416                                         udword* Indices         = mRanks;
00417                                         udword* IndicesEnd      = &mRanks[nb];
00418                                         while(Indices!=IndicesEnd)
00419                                         {
00420                                                 udword id = *Indices++;
00421 //                                              mRanks2[mOffset[InputBytes[id<<2]]++] = id;
00422                                                 *mLink[InputBytes[id<<2]]++ = id;
00423                                         }
00424                                 }
00425 
00426                                 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
00427                                 udword* Tmp     = mRanks;       mRanks = mRanks2; mRanks2 = Tmp;
00428                         }
00429                 }
00430                 else
00431                 {
00432                         // This is a special case to correctly handle negative values
00433                         CHECK_PASS_VALIDITY(j);
00434 
00435                         if(PerformPass)
00436                         {
00437                                 // Create biased offsets, in order for negative numbers to be sorted as well
00438 //                              mOffset[0] = NbNegativeValues;                                                                                          // First positive number takes place after the negative ones
00439                                 mLink[0] = &mRanks2[NbNegativeValues];                                                                          // First positive number takes place after the negative ones
00440 //                              for(udword i=1;i<128;i++)               mOffset[i] = mOffset[i-1] + CurCount[i-1];      // 1 to 128 for positive numbers
00441                                 for(udword i=1;i<128;i++)               mLink[i] = mLink[i-1] + CurCount[i-1];          // 1 to 128 for positive numbers
00442 
00443                                 // We must reverse the sorting order for negative numbers!
00444 //                              mOffset[255] = 0;
00445                                 mLink[255] = mRanks2;
00446 //                              for(i=0;i<127;i++)              mOffset[254-i] = mOffset[255-i] + CurCount[255-i];      // Fixing the wrong order for negative values
00447                                 for(udword i=0;i<127;i++)       mLink[254-i] = mLink[255-i] + CurCount[255-i];          // Fixing the wrong order for negative values
00448 //                              for(i=128;i<256;i++)    mOffset[i] += CurCount[i];                                                      // Fixing the wrong place for negative values
00449                                 for(udword i=128;i<256;i++)     mLink[i] += CurCount[i];                                                        // Fixing the wrong place for negative values
00450 
00451                                 // Perform Radix Sort
00452                                 if(INVALID_RANKS)
00453                                 {
00454                                         for(udword i=0;i<nb;i++)
00455                                         {
00456                                                 udword Radix = input[i]>>24;                                                    // Radix byte, same as above. AND is useless here (udword).
00457                                                 // ### cmp to be killed. Not good. Later.
00458 //                                              if(Radix<128)           mRanks2[mOffset[Radix]++] = i;          // Number is positive, same as above
00459 //                                              else                            mRanks2[--mOffset[Radix]] = i;          // Number is negative, flip the sorting order
00460                                                 if(Radix<128)           *mLink[Radix]++ = i;            // Number is positive, same as above
00461                                                 else                            *(--mLink[Radix]) = i;          // Number is negative, flip the sorting order
00462                                         }
00463                                         VALIDATE_RANKS;
00464                                 }
00465                                 else
00466                                 {
00467                                         for(udword i=0;i<nb;i++)
00468                                         {
00469                                                 udword Radix = input[mRanks[i]]>>24;                                                    // Radix byte, same as above. AND is useless here (udword).
00470                                                 // ### cmp to be killed. Not good. Later.
00471 //                                              if(Radix<128)           mRanks2[mOffset[Radix]++] = mRanks[i];          // Number is positive, same as above
00472 //                                              else                            mRanks2[--mOffset[Radix]] = mRanks[i];          // Number is negative, flip the sorting order
00473                                                 if(Radix<128)           *mLink[Radix]++ = mRanks[i];            // Number is positive, same as above
00474                                                 else                            *(--mLink[Radix]) = mRanks[i];          // Number is negative, flip the sorting order
00475                                         }
00476                                 }
00477                                 // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
00478                                 udword* Tmp     = mRanks;       mRanks = mRanks2; mRanks2 = Tmp;
00479                         }
00480                         else
00481                         {
00482                                 // The pass is useless, yet we still have to reverse the order of current list if all values are negative.
00483                                 if(UniqueVal>=128)
00484                                 {
00485                                         if(INVALID_RANKS)
00486                                         {
00487                                                 // ###Possible?
00488                                                 for(udword i=0;i<nb;i++)        mRanks2[i] = nb-i-1;
00489                                                 VALIDATE_RANKS;
00490                                         }
00491                                         else
00492                                         {
00493                                                 for(udword i=0;i<nb;i++)        mRanks2[i] = mRanks[nb-i-1];
00494                                         }
00495 
00496                                         // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
00497                                         udword* Tmp     = mRanks;       mRanks = mRanks2; mRanks2 = Tmp;
00498                                 }
00499                         }
00500                 }
00501         }
00502         return *this;
00503 }
00504 
00506 
00510 
00511 udword RadixSort::GetUsedRam() const
00512 {
00513         udword UsedRam = sizeof(RadixSort);
00514 #ifndef RADIX_LOCAL_RAM
00515         UsedRam += 256*4*sizeof(udword);                        // Histograms
00516         UsedRam += 256*sizeof(udword);                          // Offsets
00517 #endif
00518         UsedRam += 2*CURRENT_SIZE*sizeof(udword);       // 2 lists of indices
00519         return UsedRam;
00520 }


openhrp3
Author(s): AIST, General Robotix Inc., Nakamura Lab of Dept. of Mechano Informatics at University of Tokyo
autogenerated on Sun Apr 2 2017 03:43:54