win_bzip2: blocksort.c Source File

Go to the documentation of this file.
00001 
00002 /*-------------------------------------------------------------*/
00003 /*--- Block sorting machinery                               ---*/
00004 /*---                                           blocksort.c ---*/
00005 /*-------------------------------------------------------------*/
00006 
00007 /* ------------------------------------------------------------------
00008    This file is part of bzip2/libbzip2, a program and library for
00009    lossless, block-sorting data compression.
00010 
00011    bzip2/libbzip2 version 1.0.6 of 6 September 2010
00012    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
00013 
00014    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
00015    README file.
00016 
00017    This program is released under the terms of the license contained
00018    in the file LICENSE.
00019    ------------------------------------------------------------------ */
00020 
00021 
00022 #include "bzlib_private.h"
00023 
00024 /*---------------------------------------------*/
00025 /*--- Fallback O(N log(N)^2) sorting        ---*/
00026 /*--- algorithm, for repetitive blocks      ---*/
00027 /*---------------------------------------------*/
00028 
00029 /*---------------------------------------------*/
00030 static 
00031 __inline__
00032 void fallbackSimpleSort ( UInt32* fmap, 
00033                           UInt32* eclass, 
00034                           Int32   lo, 
00035                           Int32   hi )
00036 {
00037    Int32 i, j, tmp;
00038    UInt32 ec_tmp;
00039 
00040    if (lo == hi) return;
00041 
00042    if (hi - lo > 3) {
00043       for ( i = hi-4; i >= lo; i-- ) {
00044          tmp = fmap[i];
00045          ec_tmp = eclass[tmp];
00046          for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
00047             fmap[j-4] = fmap[j];
00048          fmap[j-4] = tmp;
00049       }
00050    }
00051 
00052    for ( i = hi-1; i >= lo; i-- ) {
00053       tmp = fmap[i];
00054       ec_tmp = eclass[tmp];
00055       for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
00056          fmap[j-1] = fmap[j];
00057       fmap[j-1] = tmp;
00058    }
00059 }
00060 
00061 
00062 /*---------------------------------------------*/
00063 #define fswap(zz1, zz2) \
00064    { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
00065 
00066 #define fvswap(zzp1, zzp2, zzn)       \
00067 {                                     \
00068    Int32 yyp1 = (zzp1);               \
00069    Int32 yyp2 = (zzp2);               \
00070    Int32 yyn  = (zzn);                \
00071    while (yyn > 0) {                  \
00072       fswap(fmap[yyp1], fmap[yyp2]);  \
00073       yyp1++; yyp2++; yyn--;          \
00074    }                                  \
00075 }
00076 
00077 
00078 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
00079 
00080 #define fpush(lz,hz) { stackLo[sp] = lz; \
00081                        stackHi[sp] = hz; \
00082                        sp++; }
00083 
00084 #define fpop(lz,hz) { sp--;              \
00085                       lz = stackLo[sp];  \
00086                       hz = stackHi[sp]; }
00087 
00088 #define FALLBACK_QSORT_SMALL_THRESH 10
00089 #define FALLBACK_QSORT_STACK_SIZE   100
00090 
00091 
00092 static
00093 void fallbackQSort3 ( UInt32* fmap, 
00094                       UInt32* eclass,
00095                       Int32   loSt, 
00096                       Int32   hiSt )
00097 {
00098    Int32 unLo, unHi, ltLo, gtHi, n, m;
00099    Int32 sp, lo, hi;
00100    UInt32 med, r, r3;
00101    Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
00102    Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
00103 
00104    r = 0;
00105 
00106    sp = 0;
00107    fpush ( loSt, hiSt );
00108 
00109    while (sp > 0) {
00110 
00111       AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
00112 
00113       fpop ( lo, hi );
00114       if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
00115          fallbackSimpleSort ( fmap, eclass, lo, hi );
00116          continue;
00117       }
00118 
00119       /* Random partitioning.  Median of 3 sometimes fails to
00120          avoid bad cases.  Median of 9 seems to help but 
00121          looks rather expensive.  This too seems to work but
00122          is cheaper.  Guidance for the magic constants 
00123          7621 and 32768 is taken from Sedgewick's algorithms
00124          book, chapter 35.
00125       */
00126       r = ((r * 7621) + 1) % 32768;
00127       r3 = r % 3;
00128       if (r3 == 0) med = eclass[fmap[lo]]; else
00129       if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
00130                    med = eclass[fmap[hi]];
00131 
00132       unLo = ltLo = lo;
00133       unHi = gtHi = hi;
00134 
00135       while (1) {
00136          while (1) {
00137             if (unLo > unHi) break;
00138             n = (Int32)eclass[fmap[unLo]] - (Int32)med;
00139             if (n == 0) { 
00140                fswap(fmap[unLo], fmap[ltLo]); 
00141                ltLo++; unLo++; 
00142                continue; 
00143             };
00144             if (n > 0) break;
00145             unLo++;
00146          }
00147          while (1) {
00148             if (unLo > unHi) break;
00149             n = (Int32)eclass[fmap[unHi]] - (Int32)med;
00150             if (n == 0) { 
00151                fswap(fmap[unHi], fmap[gtHi]); 
00152                gtHi--; unHi--; 
00153                continue; 
00154             };
00155             if (n < 0) break;
00156             unHi--;
00157          }
00158          if (unLo > unHi) break;
00159          fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
00160       }
00161 
00162       AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
00163 
00164       if (gtHi < ltLo) continue;
00165 
00166       n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
00167       m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
00168 
00169       n = lo + unLo - ltLo - 1;
00170       m = hi - (gtHi - unHi) + 1;
00171 
00172       if (n - lo > hi - m) {
00173          fpush ( lo, n );
00174          fpush ( m, hi );
00175       } else {
00176          fpush ( m, hi );
00177          fpush ( lo, n );
00178       }
00179    }
00180 }
00181 
00182 #undef fmin
00183 #undef fpush
00184 #undef fpop
00185 #undef fswap
00186 #undef fvswap
00187 #undef FALLBACK_QSORT_SMALL_THRESH
00188 #undef FALLBACK_QSORT_STACK_SIZE
00189 
00190 
00191 /*---------------------------------------------*/
00192 /* Pre:
00193       nblock > 0
00194       eclass exists for [0 .. nblock-1]
00195       ((UChar*)eclass) [0 .. nblock-1] holds block
00196       ptr exists for [0 .. nblock-1]
00197 
00198    Post:
00199       ((UChar*)eclass) [0 .. nblock-1] holds block
00200       All other areas of eclass destroyed
00201       fmap [0 .. nblock-1] holds sorted order
00202       bhtab [ 0 .. 2+(nblock/32) ] destroyed
00203 */
00204 
00205 #define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
00206 #define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
00207 #define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
00208 #define      WORD_BH(zz)  bhtab[(zz) >> 5]
00209 #define UNALIGNED_BH(zz)  ((zz) & 0x01f)
00210 
00211 static
00212 void fallbackSort ( UInt32* fmap, 
00213                     UInt32* eclass, 
00214                     UInt32* bhtab,
00215                     Int32   nblock,
00216                     Int32   verb )
00217 {
00218    Int32 ftab[257];
00219    Int32 ftabCopy[256];
00220    Int32 H, i, j, k, l, r, cc, cc1;
00221    Int32 nNotDone;
00222    Int32 nBhtab;
00223    UChar* eclass8 = (UChar*)eclass;
00224 
00225    /*--
00226       Initial 1-char radix sort to generate
00227       initial fmap and initial BH bits.
00228    --*/
00229    if (verb >= 4)
00230       VPrintf0 ( "        bucket sorting ...\n" );
00231    for (i = 0; i < 257;    i++) ftab[i] = 0;
00232    for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
00233    for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
00234    for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
00235 
00236    for (i = 0; i < nblock; i++) {
00237       j = eclass8[i];
00238       k = ftab[j] - 1;
00239       ftab[j] = k;
00240       fmap[k] = i;
00241    }
00242 
00243    nBhtab = 2 + (nblock / 32);
00244    for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
00245    for (i = 0; i < 256; i++) SET_BH(ftab[i]);
00246 
00247    /*--
00248       Inductively refine the buckets.  Kind-of an
00249       "exponential radix sort" (!), inspired by the
00250       Manber-Myers suffix array construction algorithm.
00251    --*/
00252 
00253    /*-- set sentinel bits for block-end detection --*/
00254    for (i = 0; i < 32; i++) { 
00255       SET_BH(nblock + 2*i);
00256       CLEAR_BH(nblock + 2*i + 1);
00257    }
00258 
00259    /*-- the log(N) loop --*/
00260    H = 1;
00261    while (1) {
00262 
00263       if (verb >= 4) 
00264          VPrintf1 ( "        depth %6d has ", H );
00265 
00266       j = 0;
00267       for (i = 0; i < nblock; i++) {
00268          if (ISSET_BH(i)) j = i;
00269          k = fmap[i] - H; if (k < 0) k += nblock;
00270          eclass[k] = j;
00271       }
00272 
00273       nNotDone = 0;
00274       r = -1;
00275       while (1) {
00276 
00277          /*-- find the next non-singleton bucket --*/
00278          k = r + 1;
00279          while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
00280          if (ISSET_BH(k)) {
00281             while (WORD_BH(k) == 0xffffffff) k += 32;
00282             while (ISSET_BH(k)) k++;
00283          }
00284          l = k - 1;
00285          if (l >= nblock) break;
00286          while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
00287          if (!ISSET_BH(k)) {
00288             while (WORD_BH(k) == 0x00000000) k += 32;
00289             while (!ISSET_BH(k)) k++;
00290          }
00291          r = k - 1;
00292          if (r >= nblock) break;
00293 
00294          /*-- now [l, r] bracket current bucket --*/
00295          if (r > l) {
00296             nNotDone += (r - l + 1);
00297             fallbackQSort3 ( fmap, eclass, l, r );
00298 
00299             /*-- scan bucket and generate header bits-- */
00300             cc = -1;
00301             for (i = l; i <= r; i++) {
00302                cc1 = eclass[fmap[i]];
00303                if (cc != cc1) { SET_BH(i); cc = cc1; };
00304             }
00305          }
00306       }
00307 
00308       if (verb >= 4) 
00309          VPrintf1 ( "%6d unresolved strings\n", nNotDone );
00310 
00311       H *= 2;
00312       if (H > nblock || nNotDone == 0) break;
00313    }
00314 
00315    /*-- 
00316       Reconstruct the original block in
00317       eclass8 [0 .. nblock-1], since the
00318       previous phase destroyed it.
00319    --*/
00320    if (verb >= 4)
00321       VPrintf0 ( "        reconstructing block ...\n" );
00322    j = 0;
00323    for (i = 0; i < nblock; i++) {
00324       while (ftabCopy[j] == 0) j++;
00325       ftabCopy[j]--;
00326       eclass8[fmap[i]] = (UChar)j;
00327    }
00328    AssertH ( j < 256, 1005 );
00329 }
00330 
00331 #undef       SET_BH
00332 #undef     CLEAR_BH
00333 #undef     ISSET_BH
00334 #undef      WORD_BH
00335 #undef UNALIGNED_BH
00336 
00337 
00338 /*---------------------------------------------*/
00339 /*--- The main, O(N^2 log(N)) sorting       ---*/
00340 /*--- algorithm.  Faster for "normal"       ---*/
00341 /*--- non-repetitive blocks.                ---*/
00342 /*---------------------------------------------*/
00343 
00344 /*---------------------------------------------*/
00345 static
00346 __inline__
00347 Bool mainGtU ( UInt32  i1, 
00348                UInt32  i2,
00349                UChar*  block, 
00350                UInt16* quadrant,
00351                UInt32  nblock,
00352                Int32*  budget )
00353 {
00354    Int32  k;
00355    UChar  c1, c2;
00356    UInt16 s1, s2;
00357 
00358    AssertD ( i1 != i2, "mainGtU" );
00359    /* 1 */
00360    c1 = block[i1]; c2 = block[i2];
00361    if (c1 != c2) return (c1 > c2);
00362    i1++; i2++;
00363    /* 2 */
00364    c1 = block[i1]; c2 = block[i2];
00365    if (c1 != c2) return (c1 > c2);
00366    i1++; i2++;
00367    /* 3 */
00368    c1 = block[i1]; c2 = block[i2];
00369    if (c1 != c2) return (c1 > c2);
00370    i1++; i2++;
00371    /* 4 */
00372    c1 = block[i1]; c2 = block[i2];
00373    if (c1 != c2) return (c1 > c2);
00374    i1++; i2++;
00375    /* 5 */
00376    c1 = block[i1]; c2 = block[i2];
00377    if (c1 != c2) return (c1 > c2);
00378    i1++; i2++;
00379    /* 6 */
00380    c1 = block[i1]; c2 = block[i2];
00381    if (c1 != c2) return (c1 > c2);
00382    i1++; i2++;
00383    /* 7 */
00384    c1 = block[i1]; c2 = block[i2];
00385    if (c1 != c2) return (c1 > c2);
00386    i1++; i2++;
00387    /* 8 */
00388    c1 = block[i1]; c2 = block[i2];
00389    if (c1 != c2) return (c1 > c2);
00390    i1++; i2++;
00391    /* 9 */
00392    c1 = block[i1]; c2 = block[i2];
00393    if (c1 != c2) return (c1 > c2);
00394    i1++; i2++;
00395    /* 10 */
00396    c1 = block[i1]; c2 = block[i2];
00397    if (c1 != c2) return (c1 > c2);
00398    i1++; i2++;
00399    /* 11 */
00400    c1 = block[i1]; c2 = block[i2];
00401    if (c1 != c2) return (c1 > c2);
00402    i1++; i2++;
00403    /* 12 */
00404    c1 = block[i1]; c2 = block[i2];
00405    if (c1 != c2) return (c1 > c2);
00406    i1++; i2++;
00407 
00408    k = nblock + 8;
00409 
00410    do {
00411       /* 1 */
00412       c1 = block[i1]; c2 = block[i2];
00413       if (c1 != c2) return (c1 > c2);
00414       s1 = quadrant[i1]; s2 = quadrant[i2];
00415       if (s1 != s2) return (s1 > s2);
00416       i1++; i2++;
00417       /* 2 */
00418       c1 = block[i1]; c2 = block[i2];
00419       if (c1 != c2) return (c1 > c2);
00420       s1 = quadrant[i1]; s2 = quadrant[i2];
00421       if (s1 != s2) return (s1 > s2);
00422       i1++; i2++;
00423       /* 3 */
00424       c1 = block[i1]; c2 = block[i2];
00425       if (c1 != c2) return (c1 > c2);
00426       s1 = quadrant[i1]; s2 = quadrant[i2];
00427       if (s1 != s2) return (s1 > s2);
00428       i1++; i2++;
00429       /* 4 */
00430       c1 = block[i1]; c2 = block[i2];
00431       if (c1 != c2) return (c1 > c2);
00432       s1 = quadrant[i1]; s2 = quadrant[i2];
00433       if (s1 != s2) return (s1 > s2);
00434       i1++; i2++;
00435       /* 5 */
00436       c1 = block[i1]; c2 = block[i2];
00437       if (c1 != c2) return (c1 > c2);
00438       s1 = quadrant[i1]; s2 = quadrant[i2];
00439       if (s1 != s2) return (s1 > s2);
00440       i1++; i2++;
00441       /* 6 */
00442       c1 = block[i1]; c2 = block[i2];
00443       if (c1 != c2) return (c1 > c2);
00444       s1 = quadrant[i1]; s2 = quadrant[i2];
00445       if (s1 != s2) return (s1 > s2);
00446       i1++; i2++;
00447       /* 7 */
00448       c1 = block[i1]; c2 = block[i2];
00449       if (c1 != c2) return (c1 > c2);
00450       s1 = quadrant[i1]; s2 = quadrant[i2];
00451       if (s1 != s2) return (s1 > s2);
00452       i1++; i2++;
00453       /* 8 */
00454       c1 = block[i1]; c2 = block[i2];
00455       if (c1 != c2) return (c1 > c2);
00456       s1 = quadrant[i1]; s2 = quadrant[i2];
00457       if (s1 != s2) return (s1 > s2);
00458       i1++; i2++;
00459 
00460       if (i1 >= nblock) i1 -= nblock;
00461       if (i2 >= nblock) i2 -= nblock;
00462 
00463       k -= 8;
00464       (*budget)--;
00465    }
00466       while (k >= 0);
00467 
00468    return False;
00469 }
00470 
00471 
00472 /*---------------------------------------------*/
00473 /*--
00474    Knuth's increments seem to work better
00475    than Incerpi-Sedgewick here.  Possibly
00476    because the number of elems to sort is
00477    usually small, typically <= 20.
00478 --*/
00479 static
00480 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
00481                    9841, 29524, 88573, 265720,
00482                    797161, 2391484 };
00483 
00484 static
00485 void mainSimpleSort ( UInt32* ptr,
00486                       UChar*  block,
00487                       UInt16* quadrant,
00488                       Int32   nblock,
00489                       Int32   lo, 
00490                       Int32   hi, 
00491                       Int32   d,
00492                       Int32*  budget )
00493 {
00494    Int32 i, j, h, bigN, hp;
00495    UInt32 v;
00496 
00497    bigN = hi - lo + 1;
00498    if (bigN < 2) return;
00499 
00500    hp = 0;
00501    while (incs[hp] < bigN) hp++;
00502    hp--;
00503 
00504    for (; hp >= 0; hp--) {
00505       h = incs[hp];
00506 
00507       i = lo + h;
00508       while (True) {
00509 
00510          /*-- copy 1 --*/
00511          if (i > hi) break;
00512          v = ptr[i];
00513          j = i;
00514          while ( mainGtU ( 
00515                     ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
00516                  ) ) {
00517             ptr[j] = ptr[j-h];
00518             j = j - h;
00519             if (j <= (lo + h - 1)) break;
00520          }
00521          ptr[j] = v;
00522          i++;
00523 
00524          /*-- copy 2 --*/
00525          if (i > hi) break;
00526          v = ptr[i];
00527          j = i;
00528          while ( mainGtU ( 
00529                     ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
00530                  ) ) {
00531             ptr[j] = ptr[j-h];
00532             j = j - h;
00533             if (j <= (lo + h - 1)) break;
00534          }
00535          ptr[j] = v;
00536          i++;
00537 
00538          /*-- copy 3 --*/
00539          if (i > hi) break;
00540          v = ptr[i];
00541          j = i;
00542          while ( mainGtU ( 
00543                     ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
00544                  ) ) {
00545             ptr[j] = ptr[j-h];
00546             j = j - h;
00547             if (j <= (lo + h - 1)) break;
00548          }
00549          ptr[j] = v;
00550          i++;
00551 
00552          if (*budget < 0) return;
00553       }
00554    }
00555 }
00556 
00557 
00558 /*---------------------------------------------*/
00559 /*--
00560    The following is an implementation of
00561    an elegant 3-way quicksort for strings,
00562    described in a paper "Fast Algorithms for
00563    Sorting and Searching Strings", by Robert
00564    Sedgewick and Jon L. Bentley.
00565 --*/
00566 
00567 #define mswap(zz1, zz2) \
00568    { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
00569 
00570 #define mvswap(zzp1, zzp2, zzn)       \
00571 {                                     \
00572    Int32 yyp1 = (zzp1);               \
00573    Int32 yyp2 = (zzp2);               \
00574    Int32 yyn  = (zzn);                \
00575    while (yyn > 0) {                  \
00576       mswap(ptr[yyp1], ptr[yyp2]);    \
00577       yyp1++; yyp2++; yyn--;          \
00578    }                                  \
00579 }
00580 
00581 static 
00582 __inline__
00583 UChar mmed3 ( UChar a, UChar b, UChar c )
00584 {
00585    UChar t;
00586    if (a > b) { t = a; a = b; b = t; };
00587    if (b > c) { 
00588       b = c;
00589       if (a > b) b = a;
00590    }
00591    return b;
00592 }
00593 
00594 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
00595 
00596 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
00597                           stackHi[sp] = hz; \
00598                           stackD [sp] = dz; \
00599                           sp++; }
00600 
00601 #define mpop(lz,hz,dz) { sp--;             \
00602                          lz = stackLo[sp]; \
00603                          hz = stackHi[sp]; \
00604                          dz = stackD [sp]; }
00605 
00606 
00607 #define mnextsize(az) (nextHi[az]-nextLo[az])
00608 
00609 #define mnextswap(az,bz)                                        \
00610    { Int32 tz;                                                  \
00611      tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
00612      tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
00613      tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
00614 
00615 
00616 #define MAIN_QSORT_SMALL_THRESH 20
00617 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
00618 #define MAIN_QSORT_STACK_SIZE 100
00619 
00620 static
00621 void mainQSort3 ( UInt32* ptr,
00622                   UChar*  block,
00623                   UInt16* quadrant,
00624                   Int32   nblock,
00625                   Int32   loSt, 
00626                   Int32   hiSt, 
00627                   Int32   dSt,
00628                   Int32*  budget )
00629 {
00630    Int32 unLo, unHi, ltLo, gtHi, n, m, med;
00631    Int32 sp, lo, hi, d;
00632 
00633    Int32 stackLo[MAIN_QSORT_STACK_SIZE];
00634    Int32 stackHi[MAIN_QSORT_STACK_SIZE];
00635    Int32 stackD [MAIN_QSORT_STACK_SIZE];
00636 
00637    Int32 nextLo[3];
00638    Int32 nextHi[3];
00639    Int32 nextD [3];
00640 
00641    sp = 0;
00642    mpush ( loSt, hiSt, dSt );
00643 
00644    while (sp > 0) {
00645 
00646       AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
00647 
00648       mpop ( lo, hi, d );
00649       if (hi - lo < MAIN_QSORT_SMALL_THRESH || 
00650           d > MAIN_QSORT_DEPTH_THRESH) {
00651          mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
00652          if (*budget < 0) return;
00653          continue;
00654       }
00655 
00656       med = (Int32) 
00657             mmed3 ( block[ptr[ lo         ]+d],
00658                     block[ptr[ hi         ]+d],
00659                     block[ptr[ (lo+hi)>>1 ]+d] );
00660 
00661       unLo = ltLo = lo;
00662       unHi = gtHi = hi;
00663 
00664       while (True) {
00665          while (True) {
00666             if (unLo > unHi) break;
00667             n = ((Int32)block[ptr[unLo]+d]) - med;
00668             if (n == 0) { 
00669                mswap(ptr[unLo], ptr[ltLo]); 
00670                ltLo++; unLo++; continue; 
00671             };
00672             if (n >  0) break;
00673             unLo++;
00674          }
00675          while (True) {
00676             if (unLo > unHi) break;
00677             n = ((Int32)block[ptr[unHi]+d]) - med;
00678             if (n == 0) { 
00679                mswap(ptr[unHi], ptr[gtHi]); 
00680                gtHi--; unHi--; continue; 
00681             };
00682             if (n <  0) break;
00683             unHi--;
00684          }
00685          if (unLo > unHi) break;
00686          mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
00687       }
00688 
00689       AssertD ( unHi == unLo-1, "mainQSort3(2)" );
00690 
00691       if (gtHi < ltLo) {
00692          mpush(lo, hi, d+1 );
00693          continue;
00694       }
00695 
00696       n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
00697       m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
00698 
00699       n = lo + unLo - ltLo - 1;
00700       m = hi - (gtHi - unHi) + 1;
00701 
00702       nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
00703       nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
00704       nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
00705 
00706       if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
00707       if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
00708       if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
00709 
00710       AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
00711       AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
00712 
00713       mpush (nextLo[0], nextHi[0], nextD[0]);
00714       mpush (nextLo[1], nextHi[1], nextD[1]);
00715       mpush (nextLo[2], nextHi[2], nextD[2]);
00716    }
00717 }
00718 
00719 #undef mswap
00720 #undef mvswap
00721 #undef mpush
00722 #undef mpop
00723 #undef mmin
00724 #undef mnextsize
00725 #undef mnextswap
00726 #undef MAIN_QSORT_SMALL_THRESH
00727 #undef MAIN_QSORT_DEPTH_THRESH
00728 #undef MAIN_QSORT_STACK_SIZE
00729 
00730 
00731 /*---------------------------------------------*/
00732 /* Pre:
00733       nblock > N_OVERSHOOT
00734       block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
00735       ((UChar*)block32) [0 .. nblock-1] holds block
00736       ptr exists for [0 .. nblock-1]
00737 
00738    Post:
00739       ((UChar*)block32) [0 .. nblock-1] holds block
00740       All other areas of block32 destroyed
00741       ftab [0 .. 65536 ] destroyed
00742       ptr [0 .. nblock-1] holds sorted order
00743       if (*budget < 0), sorting was abandoned
00744 */
00745 
00746 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
00747 #define SETMASK (1 << 21)
00748 #define CLEARMASK (~(SETMASK))
00749 
00750 static
00751 void mainSort ( UInt32* ptr, 
00752                 UChar*  block,
00753                 UInt16* quadrant, 
00754                 UInt32* ftab,
00755                 Int32   nblock,
00756                 Int32   verb,
00757                 Int32*  budget )
00758 {
00759    Int32  i, j, k, ss, sb;
00760    Int32  runningOrder[256];
00761    Bool   bigDone[256];
00762    Int32  copyStart[256];
00763    Int32  copyEnd  [256];
00764    UChar  c1;
00765    Int32  numQSorted;
00766    UInt16 s;
00767    if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
00768 
00769    /*-- set up the 2-byte frequency table --*/
00770    for (i = 65536; i >= 0; i--) ftab[i] = 0;
00771 
00772    j = block[0] << 8;
00773    i = nblock-1;
00774    for (; i >= 3; i -= 4) {
00775       quadrant[i] = 0;
00776       j = (j >> 8) | ( ((UInt16)block[i]) << 8);
00777       ftab[j]++;
00778       quadrant[i-1] = 0;
00779       j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
00780       ftab[j]++;
00781       quadrant[i-2] = 0;
00782       j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
00783       ftab[j]++;
00784       quadrant[i-3] = 0;
00785       j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
00786       ftab[j]++;
00787    }
00788    for (; i >= 0; i--) {
00789       quadrant[i] = 0;
00790       j = (j >> 8) | ( ((UInt16)block[i]) << 8);
00791       ftab[j]++;
00792    }
00793 
00794    /*-- (emphasises close relationship of block & quadrant) --*/
00795    for (i = 0; i < BZ_N_OVERSHOOT; i++) {
00796       block   [nblock+i] = block[i];
00797       quadrant[nblock+i] = 0;
00798    }
00799 
00800    if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
00801 
00802    /*-- Complete the initial radix sort --*/
00803    for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
00804 
00805    s = block[0] << 8;
00806    i = nblock-1;
00807    for (; i >= 3; i -= 4) {
00808       s = (s >> 8) | (block[i] << 8);
00809       j = ftab[s] -1;
00810       ftab[s] = j;
00811       ptr[j] = i;
00812       s = (s >> 8) | (block[i-1] << 8);
00813       j = ftab[s] -1;
00814       ftab[s] = j;
00815       ptr[j] = i-1;
00816       s = (s >> 8) | (block[i-2] << 8);
00817       j = ftab[s] -1;
00818       ftab[s] = j;
00819       ptr[j] = i-2;
00820       s = (s >> 8) | (block[i-3] << 8);
00821       j = ftab[s] -1;
00822       ftab[s] = j;
00823       ptr[j] = i-3;
00824    }
00825    for (; i >= 0; i--) {
00826       s = (s >> 8) | (block[i] << 8);
00827       j = ftab[s] -1;
00828       ftab[s] = j;
00829       ptr[j] = i;
00830    }
00831 
00832    /*--
00833       Now ftab contains the first loc of every small bucket.
00834       Calculate the running order, from smallest to largest
00835       big bucket.
00836    --*/
00837    for (i = 0; i <= 255; i++) {
00838       bigDone     [i] = False;
00839       runningOrder[i] = i;
00840    }
00841 
00842    {
00843       Int32 vv;
00844       Int32 h = 1;
00845       do h = 3 * h + 1; while (h <= 256);
00846       do {
00847          h = h / 3;
00848          for (i = h; i <= 255; i++) {
00849             vv = runningOrder[i];
00850             j = i;
00851             while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
00852                runningOrder[j] = runningOrder[j-h];
00853                j = j - h;
00854                if (j <= (h - 1)) goto zero;
00855             }
00856             zero:
00857             runningOrder[j] = vv;
00858          }
00859       } while (h != 1);
00860    }
00861 
00862    /*--
00863       The main sorting loop.
00864    --*/
00865 
00866    numQSorted = 0;
00867 
00868    for (i = 0; i <= 255; i++) {
00869 
00870       /*--
00871          Process big buckets, starting with the least full.
00872          Basically this is a 3-step process in which we call
00873          mainQSort3 to sort the small buckets [ss, j], but
00874          also make a big effort to avoid the calls if we can.
00875       --*/
00876       ss = runningOrder[i];
00877 
00878       /*--
00879          Step 1:
00880          Complete the big bucket [ss] by quicksorting
00881          any unsorted small buckets [ss, j], for j != ss.  
00882          Hopefully previous pointer-scanning phases have already
00883          completed many of the small buckets [ss, j], so
00884          we don't have to sort them at all.
00885       --*/
00886       for (j = 0; j <= 255; j++) {
00887          if (j != ss) {
00888             sb = (ss << 8) + j;
00889             if ( ! (ftab[sb] & SETMASK) ) {
00890                Int32 lo = ftab[sb]   & CLEARMASK;
00891                Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
00892                if (hi > lo) {
00893                   if (verb >= 4)
00894                      VPrintf4 ( "        qsort [0x%x, 0x%x]   "
00895                                 "done %d   this %d\n",
00896                                 ss, j, numQSorted, hi - lo + 1 );
00897                   mainQSort3 ( 
00898                      ptr, block, quadrant, nblock, 
00899                      lo, hi, BZ_N_RADIX, budget 
00900                   );   
00901                   numQSorted += (hi - lo + 1);
00902                   if (*budget < 0) return;
00903                }
00904             }
00905             ftab[sb] |= SETMASK;
00906          }
00907       }
00908 
00909       AssertH ( !bigDone[ss], 1006 );
00910 
00911       /*--
00912          Step 2:
00913          Now scan this big bucket [ss] so as to synthesise the
00914          sorted order for small buckets [t, ss] for all t,
00915          including, magically, the bucket [ss,ss] too.
00916          This will avoid doing Real Work in subsequent Step 1's.
00917       --*/
00918       {
00919          for (j = 0; j <= 255; j++) {
00920             copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
00921             copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
00922          }
00923          for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
00924             k = ptr[j]-1; if (k < 0) k += nblock;
00925             c1 = block[k];
00926             if (!bigDone[c1])
00927                ptr[ copyStart[c1]++ ] = k;
00928          }
00929          for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
00930             k = ptr[j]-1; if (k < 0) k += nblock;
00931             c1 = block[k];
00932             if (!bigDone[c1]) 
00933                ptr[ copyEnd[c1]-- ] = k;
00934          }
00935       }
00936 
00937       AssertH ( (copyStart[ss]-1 == copyEnd[ss])
00938                 || 
00939                 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
00940                    Necessity for this case is demonstrated by compressing 
00941                    a sequence of approximately 48.5 million of character 
00942                    251; 1.0.0/1.0.1 will then die here. */
00943                 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
00944                 1007 )
00945 
00946       for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
00947 
00948       /*--
00949          Step 3:
00950          The [ss] big bucket is now done.  Record this fact,
00951          and update the quadrant descriptors.  Remember to
00952          update quadrants in the overshoot area too, if
00953          necessary.  The "if (i < 255)" test merely skips
00954          this updating for the last bucket processed, since
00955          updating for the last bucket is pointless.
00956 
00957          The quadrant array provides a way to incrementally
00958          cache sort orderings, as they appear, so as to 
00959          make subsequent comparisons in fullGtU() complete
00960          faster.  For repetitive blocks this makes a big
00961          difference (but not big enough to be able to avoid
00962          the fallback sorting mechanism, exponential radix sort).
00963 
00964          The precise meaning is: at all times:
00965 
00966             for 0 <= i < nblock and 0 <= j <= nblock
00967 
00968             if block[i] != block[j], 
00969 
00970                then the relative values of quadrant[i] and 
00971                     quadrant[j] are meaningless.
00972 
00973                else {
00974                   if quadrant[i] < quadrant[j]
00975                      then the string starting at i lexicographically
00976                      precedes the string starting at j
00977 
00978                   else if quadrant[i] > quadrant[j]
00979                      then the string starting at j lexicographically
00980                      precedes the string starting at i
00981 
00982                   else
00983                      the relative ordering of the strings starting
00984                      at i and j has not yet been determined.
00985                }
00986       --*/
00987       bigDone[ss] = True;
00988 
00989       if (i < 255) {
00990          Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
00991          Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
00992          Int32 shifts   = 0;
00993 
00994          while ((bbSize >> shifts) > 65534) shifts++;
00995 
00996          for (j = bbSize-1; j >= 0; j--) {
00997             Int32 a2update     = ptr[bbStart + j];
00998             UInt16 qVal        = (UInt16)(j >> shifts);
00999             quadrant[a2update] = qVal;
01000             if (a2update < BZ_N_OVERSHOOT)
01001                quadrant[a2update + nblock] = qVal;
01002          }
01003          AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
01004       }
01005 
01006    }
01007 
01008    if (verb >= 4)
01009       VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
01010                  nblock, numQSorted, nblock - numQSorted );
01011 }
01012 
01013 #undef BIGFREQ
01014 #undef SETMASK
01015 #undef CLEARMASK
01016 
01017 
01018 /*---------------------------------------------*/
01019 /* Pre:
01020       nblock > 0
01021       arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
01022       ((UChar*)arr2)  [0 .. nblock-1] holds block
01023       arr1 exists for [0 .. nblock-1]
01024 
01025    Post:
01026       ((UChar*)arr2) [0 .. nblock-1] holds block
01027       All other areas of block destroyed
01028       ftab [ 0 .. 65536 ] destroyed
01029       arr1 [0 .. nblock-1] holds sorted order
01030 */
01031 void BZ2_blockSort ( EState* s )
01032 {
01033    UInt32* ptr    = s->ptr; 
01034    UChar*  block  = s->block;
01035    UInt32* ftab   = s->ftab;
01036    Int32   nblock = s->nblock;
01037    Int32   verb   = s->verbosity;
01038    Int32   wfact  = s->workFactor;
01039    UInt16* quadrant;
01040    Int32   budget;
01041    Int32   budgetInit;
01042    Int32   i;
01043 
01044    if (nblock < 10000) {
01045       fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
01046    } else {
01047       /* Calculate the location for quadrant, remembering to get
01048          the alignment right.  Assumes that &(block[0]) is at least
01049          2-byte aligned -- this should be ok since block is really
01050          the first section of arr2.
01051       */
01052       i = nblock+BZ_N_OVERSHOOT;
01053       if (i & 1) i++;
01054       quadrant = (UInt16*)(&(block[i]));
01055 
01056       /* (wfact-1) / 3 puts the default-factor-30
01057          transition point at very roughly the same place as 
01058          with v0.1 and v0.9.0.  
01059          Not that it particularly matters any more, since the
01060          resulting compressed stream is now the same regardless
01061          of whether or not we use the main sort or fallback sort.
01062       */
01063       if (wfact < 1  ) wfact = 1;
01064       if (wfact > 100) wfact = 100;
01065       budgetInit = nblock * ((wfact-1) / 3);
01066       budget = budgetInit;
01067 
01068       mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
01069       if (verb >= 3) 
01070          VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
01071                     budgetInit - budget,
01072                     nblock, 
01073                     (float)(budgetInit - budget) /
01074                     (float)(nblock==0 ? 1 : nblock) ); 
01075       if (budget < 0) {
01076          if (verb >= 2) 
01077             VPrintf0 ( "    too repetitive; using fallback"
01078                        " sorting algorithm\n" );
01079          fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
01080       }
01081    }
01082 
01083    s->origPtr = -1;
01084    for (i = 0; i < s->nblock; i++)
01085       if (ptr[i] == 0)
01086          { s->origPtr = i; break; };
01087 
01088    AssertH( s->origPtr != -1, 1003 );
01089 }
01090 
01091 
01092 /*-------------------------------------------------------------*/
01093 /*--- end                                       blocksort.c ---*/
01094 /*-------------------------------------------------------------*/