lzf.cpp
Go to the documentation of this file.
00001 /*
00002  * Software License Agreement (BSD License)
00003  *
00004  * Point Cloud Library (PCL) - www.pointclouds.org
00005  * Copyright (c) 2000-2010 Marc Alexander Lehmann <schmorp@schmorp.de>
00006  * Copyright (c) 2010-2011, Willow Garage, Inc.
00007  * 
00008  *  All rights reserved.
00009  *
00010  *  Redistribution and use in source and binary forms, with or without
00011  *  modification, are permitted provided that the following conditions
00012  *  are met:
00013  *
00014  *   * Redistributions of source code must retain the above copyright
00015  *     notice, this list of conditions and the following disclaimer.
00016  *   * Redistributions in binary form must reproduce the above
00017  *     copyright notice, this list of conditions and the following
00018  *     disclaimer in the documentation and/or other materials provided
00019  *     with the distribution.
00020  * 
00021  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00024  *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
00025  *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00026  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00027  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00028  *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00029  *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00030  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00031  *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00032  *  POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * $Id$
00035  *
00036  */
00037 
00038 #include <pcl/io/lzf.h>
00039 #include <cstring>
00040 #include <climits>
00041 #include <pcl/console/print.h>
00042 #include <errno.h>
00043 
00044 /*
00045  * Size of hashtable is (1 << HLOG) * sizeof (char *)
00046  * decompression is independent of the hash table size
00047  * the difference between 15 and 14 is very small
00048  * for small blocks (and 14 is usually a bit faster).
00049  * For a low-memory/faster configuration, use HLOG == 13;
00050  * For best compression, use 15 or 16 (or more, up to 22).
00051  */
00052 #define HLOG 13
00053 
00054 typedef unsigned int LZF_HSLOT;
00055 typedef unsigned int LZF_STATE[1 << (HLOG)];
00056 
00057 #define STRICT_ALIGN !(defined(__i386) || defined (__amd64))
00058 #if !STRICT_ALIGN
00059 /* for unaligned accesses we need a 16 bit datatype. */
00060 # if USHRT_MAX == 65535
00061     typedef unsigned short u16;
00062 # elif UINT_MAX == 65535
00063     typedef unsigned int u16;
00064 # else
00065 #  undef STRICT_ALIGN
00066 #  define STRICT_ALIGN 1
00067 # endif
00068 #endif
00069 
00070 // IDX works because it is very similar to a multiplicative hash, e.g.
00071 // ((h * 57321 >> (3*8 - HLOG)) & ((1 << (HLOG)) - 1))
00072 #define IDX(h) ((( h >> (3*8 - HLOG)) - h  ) & ((1 << (HLOG)) - 1))
00073 
00075 //
00076 // compressed format
00077 //
00078 // 000LLLLL <L+1>    ; literal, L+1=1..33 octets
00079 // LLLooooo oooooooo ; backref L+1=1..7 octets, o+1=1..4096 offset
00080 // 111ooooo LLLLLLLL oooooooo ; backref L+8 octets, o+1=1..4096 offset
00081 //
00082 //
00083 unsigned int
00084 pcl::lzfCompress (const void *const in_data, unsigned int in_len,
00085                   void *out_data, unsigned int out_len)
00086 {
00087   LZF_STATE htab;
00088   const unsigned char *ip = static_cast<const unsigned char *> (in_data);
00089         unsigned char *op = static_cast<unsigned char *> (out_data);
00090   const unsigned char *in_end  = ip + in_len;
00091         unsigned char *out_end = op + out_len;
00092   const unsigned char *ref;
00093 
00094   // off requires a type wide enough to hold a general pointer difference.
00095   // ISO C doesn't have that (size_t might not be enough and ptrdiff_t only
00096   // works for differences within a single object). We also assume that no
00097   // no bit pattern traps. Since the only platform that is both non-POSIX
00098   // and fails to support both assumptions is windows 64 bit, we make a
00099   // special workaround for it.
00100 #if defined (WIN32) && defined (_M_X64)
00101   // workaround for missing POSIX compliance
00102   unsigned _int64 off; 
00103 #else
00104   unsigned long off;
00105 #endif
00106   unsigned int hval;
00107   int lit;
00108 
00109   if (!in_len || !out_len)
00110   {
00111     PCL_WARN ("[pcl::lzf_compress] Input or output has 0 size!\n");
00112     return (0);
00113   }
00114 
00115   // Initialize the htab
00116   memset (htab, 0, sizeof (htab));
00117 
00118   // Start run
00119   lit = 0; op++;
00120 
00121   hval = (ip[0] << 8) | ip[1];
00122   while (ip < in_end - 2)
00123   {
00124     unsigned int *hslot;
00125 
00126     hval = (hval << 8) | ip[2];
00127     hslot = htab + IDX (hval);
00128     ref = *hslot + (static_cast<const unsigned char*> (in_data)); 
00129     *hslot = static_cast<unsigned int> (ip - (static_cast<const unsigned char*> (in_data)));
00130 
00131     if (
00132         // The next test will actually take care of this, but this is faster if htab is initialized
00133         ref < ip 
00134         && (off = ip - ref - 1) < (1 << 13)
00135         && ref > static_cast<const unsigned char *> (in_data)
00136         && ref[2] == ip[2]
00137 #if STRICT_ALIGN
00138         && ((ref[1] << 8) | ref[0]) == ((ip[1] << 8) | ip[0])
00139 #else
00140         && *reinterpret_cast<const u16 *> (ref) == *reinterpret_cast<const u16 *> (ip)
00141 #endif
00142       )
00143     {
00144       // Match found at *ref++
00145       unsigned int len = 2;
00146       ptrdiff_t maxlen = in_end - ip - len;
00147       maxlen = maxlen > ((1 << 8) + (1 << 3)) ? ((1 << 8) + (1 << 3)) : maxlen;
00148 
00149       // First a faster conservative test
00150       if (op + 3 + 1 >= out_end)
00151       {
00152         // Second the exact but rare test
00153         if (op - !lit + 3 + 1 >= out_end)
00154         {
00155           PCL_WARN ("[pcl::lzf_compress] Attempting to write data outside the output buffer!\n");
00156           return (0);
00157         }
00158       }
00159 
00160       // Stop run
00161       op [- lit - 1] = static_cast<unsigned char>(lit - 1);
00162       // Undo run if length is zero
00163       op -= !lit;
00164 
00165       while (true)
00166       {
00167         if (maxlen > 16)
00168         {
00169           len++; if (ref [len] != ip [len]) break;
00170           len++; if (ref [len] != ip [len]) break;
00171           len++; if (ref [len] != ip [len]) break;
00172           len++; if (ref [len] != ip [len]) break;
00173 
00174           len++; if (ref [len] != ip [len]) break;
00175           len++; if (ref [len] != ip [len]) break;
00176           len++; if (ref [len] != ip [len]) break;
00177           len++; if (ref [len] != ip [len]) break;
00178 
00179           len++; if (ref [len] != ip [len]) break;
00180           len++; if (ref [len] != ip [len]) break;
00181           len++; if (ref [len] != ip [len]) break;
00182           len++; if (ref [len] != ip [len]) break;
00183 
00184           len++; if (ref [len] != ip [len]) break;
00185           len++; if (ref [len] != ip [len]) break;
00186           len++; if (ref [len] != ip [len]) break;
00187           len++; if (ref [len] != ip [len]) break;
00188         }
00189 
00190         do
00191           len++;
00192         while (len < static_cast<unsigned int> (maxlen) && ref[len] == ip[len]);
00193 
00194         break;
00195       }
00196 
00197       // Len is now #octets - 1
00198       len -= 2;
00199       ip++;
00200 
00201       if (len < 7)
00202       {
00203         *op++ = static_cast<unsigned char> ((off >> 8) + (len << 5));
00204       }
00205       else
00206       {
00207         *op++ = static_cast<unsigned char> ((off >> 8) + (  7 << 5));
00208         *op++ = static_cast<unsigned char> (len - 7);
00209       }
00210 
00211       *op++ = static_cast<unsigned char> (off);
00212 
00213       // Start run
00214       lit = 0; op++;
00215 
00216       ip += len + 1;
00217 
00218       if (ip >= in_end - 2)
00219         break;
00220 
00221       --ip;
00222       hval = (ip[0] << 8) | ip[1];
00223 
00224       hval = (hval << 8) | ip[2];
00225       htab[IDX (hval)] = static_cast<unsigned int> (ip - (static_cast<const unsigned char *> (in_data)));
00226       ip++;
00227     }
00228     else
00229     {
00230       // One more literal byte we must copy
00231       if (op >= out_end)
00232       {
00233         PCL_WARN ("[pcl::lzf_compress] Attempting to copy data outside the output buffer!\n");
00234         return (0);
00235       }
00236 
00237       lit++; *op++ = *ip++;
00238 
00239       if (lit == (1 <<  5))
00240       {
00241         // Stop run
00242         op [- lit - 1] = static_cast<unsigned char> (lit - 1);
00243         // Start run
00244         lit = 0; op++;
00245       }
00246     }
00247   }
00248 
00249   // At most 3 bytes can be missing here 
00250   if (op + 3 > out_end)
00251     return (0);
00252 
00253   while (ip < in_end)
00254   {
00255     lit++; *op++ = *ip++;
00256 
00257     if (lit == (1 <<  5))
00258     {
00259       // Stop run
00260       op [- lit - 1] = static_cast<unsigned char> (lit - 1);
00261       // Start run
00262       lit = 0; op++;
00263     }
00264   }
00265 
00266   // End run
00267   op [- lit - 1] = static_cast<unsigned char> (lit - 1);
00268   // Undo run if length is zero
00269   op -= !lit;
00270 
00271   return (static_cast<unsigned int> (op - static_cast<unsigned char *> (out_data)));
00272 }
00273 
00275 unsigned int 
00276 pcl::lzfDecompress (const void *const in_data,  unsigned int in_len,
00277                     void             *out_data, unsigned int out_len)
00278 {
00279   unsigned char const *ip = static_cast<const unsigned char *> (in_data);
00280   unsigned char       *op = static_cast<unsigned char *> (out_data);
00281   unsigned char const *const in_end  = ip + in_len;
00282   unsigned char       *const out_end = op + out_len;
00283 
00284   do
00285   {
00286     unsigned int ctrl = *ip++;
00287 
00288     // Literal run
00289     if (ctrl < (1 << 5))
00290     {
00291       ctrl++;
00292 
00293       if (op + ctrl > out_end)
00294       {
00295         errno = E2BIG;
00296         return (0);
00297       }
00298 
00299       // Check for overflow
00300       if (ip + ctrl > in_end)
00301       {
00302         errno = EINVAL;
00303         return (0);
00304       }
00305       switch (ctrl)
00306       {
00307         case 32: *op++ = *ip++; case 31: *op++ = *ip++; case 30: *op++ = *ip++; case 29: *op++ = *ip++;
00308         case 28: *op++ = *ip++; case 27: *op++ = *ip++; case 26: *op++ = *ip++; case 25: *op++ = *ip++;
00309         case 24: *op++ = *ip++; case 23: *op++ = *ip++; case 22: *op++ = *ip++; case 21: *op++ = *ip++;
00310         case 20: *op++ = *ip++; case 19: *op++ = *ip++; case 18: *op++ = *ip++; case 17: *op++ = *ip++;
00311         case 16: *op++ = *ip++; case 15: *op++ = *ip++; case 14: *op++ = *ip++; case 13: *op++ = *ip++;
00312         case 12: *op++ = *ip++; case 11: *op++ = *ip++; case 10: *op++ = *ip++; case  9: *op++ = *ip++;
00313         case  8: *op++ = *ip++; case  7: *op++ = *ip++; case  6: *op++ = *ip++; case  5: *op++ = *ip++;
00314         case  4: *op++ = *ip++; case  3: *op++ = *ip++; case  2: *op++ = *ip++; case  1: *op++ = *ip++;
00315       }
00316     }
00317     // Back reference
00318     else
00319     {
00320       unsigned int len = ctrl >> 5;
00321 
00322       unsigned char *ref = op - ((ctrl & 0x1f) << 8) - 1;
00323 
00324       // Check for overflow
00325       if (ip >= in_end)
00326       {
00327         errno = EINVAL;
00328         return (0);
00329       }
00330       if (len == 7)
00331       {
00332         len += *ip++;
00333         // Check for overflow
00334         if (ip >= in_end)
00335         {
00336           errno = EINVAL;
00337           return (0);
00338         }
00339       }
00340       ref -= *ip++;
00341 
00342       if (op + len + 2 > out_end)
00343       {
00344         errno = E2BIG;
00345         return (0);
00346       }
00347 
00348       if (ref < static_cast<unsigned char *> (out_data))
00349       {
00350         errno = EINVAL;
00351         return (0);
00352       }
00353 
00354       switch (len)
00355       {
00356         default:
00357         {
00358           len += 2;
00359 
00360           if (op >= ref + len)
00361           {
00362             // Disjunct areas
00363             memcpy (op, ref, len);
00364             op += len;
00365           }
00366           else
00367           {
00368             // Overlapping, use byte by byte copying
00369             do
00370               *op++ = *ref++;
00371             while (--len);
00372           }
00373 
00374           break;
00375         }
00376         case 9: *op++ = *ref++;
00377         case 8: *op++ = *ref++;
00378         case 7: *op++ = *ref++;
00379         case 6: *op++ = *ref++;
00380         case 5: *op++ = *ref++;
00381         case 4: *op++ = *ref++;
00382         case 3: *op++ = *ref++;
00383         case 2: *op++ = *ref++;
00384         case 1: *op++ = *ref++;
00385         case 0: *op++ = *ref++; // two octets more
00386                 *op++ = *ref++;
00387       }
00388     }
00389   }
00390   while (ip < in_end);
00391 
00392   return (static_cast<unsigned int> (op - static_cast<unsigned char*> (out_data)));
00393 }
00394 


pcl
Author(s): Open Perception
autogenerated on Wed Aug 26 2015 15:25:20