geometric_tools_engine: GteUIntegerALU32.h Source File

Go to the documentation of this file.
 // David Eberly, Geometric Tools, Redmond WA 98052
 // Copyright (c) 1998-2017
 // Distributed under the Boost Software License, Version 1.0.
 // http://www.boost.org/LICENSE_1_0.txt
 // http://www.geometrictools.com/License/Boost/LICENSE_1_0.txt
 // File Version: 3.0.0 (2016/06/19)
 
 #pragma once
 
 #include <Mathematics/GteBitHacks.h>
 #include <algorithm>
 
 // Support for unsigned integer arithmetic in BSNumber and BSRational.  The
 // Curiously Recurring Template Paradigm is used to allow the UInteger
 // types to share code without introducing virtual functions.
 
 namespace gte
 {
 
 template <typename UInteger>
 class UIntegerALU32
 {
 public:
     // Comparisons.  These are not generic.  They rely on their being caled
     // when the two BSNumber arguments to BSNumber::operatorX() are of the
     // form 1.u*2^p and 1.v*2^p.  The comparisons apply to 1.u and 1.v as
     // unsigned integers with their leading 1-bits aligned.
     bool operator==(UInteger const& number) const;
     bool operator!=(UInteger const& number) const;
     bool operator< (UInteger const& number) const;
     bool operator<=(UInteger const& number) const;
     bool operator> (UInteger const& number) const;
     bool operator>=(UInteger const& number) const;
 
     // Arithmetic operations.  These are performed in-place; that is, the
     // result is stored in 'this' object.  The goal is to reduce the number of
     // object copies, much like the goal is for std::move.  The Sub function
     // requires the inputs to satisfy n0 > n1.
     void Add(UInteger const& n0, UInteger const& n1);
     void Sub(UInteger const& n0, UInteger const& n1);
     void Mul(UInteger const& n0, UInteger const& n1);
 
     // The shift is performed in-place; that is, the result is stored in
     // 'this' object.
     void ShiftLeft(UInteger const& number, int32_t shift);
 
     // The 'number' is even and positive.  It is shifted right to become an
     // odd number and the return value is the amount shifted.  The operation
     // is performed in-place; that is, the result is stored in 'this' object.
     int32_t ShiftRightToOdd(UInteger const& number);
 
     // Get a block of numRequested bits starting with the leading 1-bit of the
     // nonzero number.  The returned number has the prefix stored in the
     // high-order bits.  Additional bits are copied and used by the caller for
     // rounding.  This function supports conversions from 'float' and 'double'.
     // The input 'numRequested' is smaller than 64.
     uint64_t GetPrefix(int32_t numRequested) const;
 };
 
 
 template <typename UInteger>
 bool UIntegerALU32<UInteger>::operator==(UInteger const& number) const
 {
     UInteger const& self = *(UInteger const*)this;
     int32_t numBits = self.GetNumBits();
     if (numBits != number.GetNumBits())
     {
         return false;
     }
 
     if (numBits > 0)
     {
         auto const& bits = self.GetBits();
         auto const& nBits = number.GetBits();
         int32_t const last = self.GetSize() - 1;
         for (int32_t i = last; i >= 0; --i)
         {
             if (bits[i] != nBits[i])
             {
                 return false;
             }
         }
     }
     return true;
 }
 
 template <typename UInteger>
 bool UIntegerALU32<UInteger>::operator!=(UInteger const& number) const
 {
     return !operator==(number);
 }
 
 template <typename UInteger>
 bool UIntegerALU32<UInteger>::operator< (UInteger const& number) const
 {
     UInteger const& self = *(UInteger const*)this;
     int32_t nNumBits = number.GetNumBits();
     auto const& nBits = number.GetBits();
 
     int32_t numBits = self.GetNumBits();
     if (numBits > 0 && nNumBits > 0)
     {
         // The numbers must be compared as if they are left-aligned with
         // each other.  We got here because we had self = 1.u * 2^p and
         // number = 1.v * 2^p.  Although they have the same exponent, it is
         // possible that 'self < number' but 'numBits(1u) > numBits(1v)'.
         // Compare the bits one 32-bit block at a time.
         auto const& bits = self.GetBits();
         int bitIndex0 = numBits - 1;
         int bitIndex1 = nNumBits - 1;
         int block0 = bitIndex0 / 32;
         int block1 = bitIndex1 / 32;
         int numBlockBits0 = 1 + (bitIndex0 % 32);
         int numBlockBits1 = 1 + (bitIndex1 % 32);
         uint64_t n0shift = bits[block0];
         uint64_t n1shift = nBits[block1];
         while (block0 >= 0 && block1 >= 0)
         {
             // Shift the bits in the leading blocks to the high-order bit.
             uint32_t value0 =
                 GTE_GET_LO_U64(n0shift << (32 - numBlockBits0));
             uint32_t value1 =
                 GTE_GET_LO_U64(n1shift << (32 - numBlockBits1));
 
             // Shift bits in the next block (if any) to fill the current
             // block.
             if (--block0 >= 0)
             {
                 n0shift = bits[block0];
                 value0 |= GTE_GET_LO_U64(n0shift >> numBlockBits0);
             }
             if (--block1 >= 0)
             {
                 n1shift = nBits[block1];
                 value1 |= GTE_GET_LO_U64(n1shift >> numBlockBits1);
             }
             if (value0 < value1)
             {
                 return true;
             }
             if (value0 > value1)
             {
                 return false;
             }
         }
         return block0 < block1;
     }
     else
     {
         // One or both numbers are negative.  The only time 'less than' is
         // 'true' is when 'number' is positive.
         return (nNumBits > 0);
     }
 }
 
 template <typename UInteger>
 bool UIntegerALU32<UInteger>::operator<=(UInteger const& number) const
 {
     return operator<(number) || operator==(number);
 }
 
 template <typename UInteger>
 bool UIntegerALU32<UInteger>::operator> (UInteger const& number) const
 {
     return !operator<=(number);
 }
 
 template <typename UInteger>
 bool UIntegerALU32<UInteger>::operator>=(UInteger const& number) const
 {
     return !operator<(number);
 }
 
 template <typename UInteger>
 void UIntegerALU32<UInteger>::Add(UInteger const& n0, UInteger const& n1)
 {
     UInteger& self = *(UInteger*)this;
     int32_t n0NumBits = n0.GetNumBits();
     int32_t n1NumBits = n1.GetNumBits();
 
     // Add the numbers considered as positive integers.  Set the last block to
     // zero in case no carry-out occurs.
     int numBits = std::max(n0NumBits, n1NumBits) + 1;
     self.SetNumBits(numBits);
     self.SetBack(0);
 
     // Get the input array sizes.
     int32_t numElements0 = n0.GetSize();
     int32_t numElements1 = n1.GetSize();
 
     // Order the inputs so that the first has the most blocks.
     auto const& u0 =
         (numElements0 >= numElements1 ? n0.GetBits() : n1.GetBits());
     auto const& u1 =
         (numElements0 >= numElements1 ? n1.GetBits() : n0.GetBits());
     auto numElements = std::minmax(numElements0, numElements1);
 
     // Add the u1-blocks to u0-blocks.
     auto& bits = self.GetBits();
     uint64_t carry = 0, sum;
     int32_t i;
     for (i = 0; i < numElements.first; ++i)
     {
         sum = u0[i] + (u1[i] + carry);
         bits[i] = GTE_GET_LO_U64(sum);
         carry = (sum >> 32);
     }
 
     // We have no more u1-blocks. Propagate the carry-out, if there is one, or
     // copy the remaining blocks if there is not.
     if (carry > 0)
     {
         for (; i < numElements.second; ++i)
         {
             sum = u0[i] + carry;
             bits[i] = GTE_GET_LO_U64(sum);
             carry = (sum >> 32);
         }
         if (carry > 0)
         {
             bits[i] = GTE_GET_LO_U64(carry);
         }
     }
     else
     {
         for (; i < numElements.second; ++i)
         {
             bits[i] = u0[i];
         }
     }
 
     // Reduce the number of bits if there was not a carry-out.
     uint32_t firstBitIndex = (numBits - 1) % 32;
     uint32_t mask = (1 << firstBitIndex);
     if ((mask & self.GetBack()) == 0)
     {
         self.SetNumBits(--numBits);
     }
 }
 
 template <typename UInteger>
 void UIntegerALU32<UInteger>::Sub(UInteger const& n0, UInteger const& n1)
 {
     UInteger& self = *(UInteger*)this;
     int32_t n0NumBits = n0.GetNumBits();
     auto const& n0Bits = n0.GetBits();
     auto const& n1Bits = n1.GetBits();
 
     // Subtract the numbers considered as positive integers.  We know that
     // n0 > n1, so create a number n2 that has the same number of bits as
     // n0 and use two's-complement to generate -n2, and then add n0 and -n2.
     // The result is nonnegative, so we do not need to apply two's complement
     // to a negative result to extract the sign and absolute value.
 
     // Get the input array sizes.  We know numElements0 >= numElements1.
     int32_t numElements0 = n0.GetSize();
     int32_t numElements1 = n1.GetSize();
 
     // Create the two's-complement number n2.  We know n2.GetNumElements() is
     // the same as numElements0.
     UInteger n2(n0NumBits);
     auto& n2Bits = n2.GetBits();
     int32_t i;
     for (i = 0; i < numElements1; ++i)
     {
         n2Bits[i] = ~n1Bits[i];
     }
     for (; i < numElements0; ++i)
     {
         n2Bits[i] = ~0u;
     }
 
     // Now add 1 to the bit-negated result to obtain -n1.
     uint64_t carry = 1, sum;
     for (i = 0; i < numElements0; ++i)
     {
         sum = n2Bits[i] + carry;
         n2Bits[i] = GTE_GET_LO_U64(sum);
         carry = (sum >> 32);
     }
 
     // Add the numbers as positive integers.  Set the last block to zero in
     // case no carry-out occurs.
     self.SetNumBits(n0NumBits + 1);
     self.SetBack(0);
 
     // Add the n0-blocks to n2-blocks.
     auto& bits = self.GetBits();
     for (i = 0, carry = 0; i < numElements0; ++i)
     {
         sum = n2Bits[i] + (n0Bits[i] + carry);
         bits[i] = GTE_GET_LO_U64(sum);
         carry = (sum >> 32);
     }
 
     // Strip off the bits introduced by two's-complement.
     int32_t block;
     for (block = numElements0 - 1; block >= 0; --block)
     {
         if (bits[block] > 0)
         {
             break;
         }
     }
 
     self.SetNumBits(32 * block + GetLeadingBit(bits[block]) + 1);
 }
 
 template <typename UInteger>
 void UIntegerALU32<UInteger>::Mul(UInteger const& n0, UInteger const& n1)
 {
     UInteger& self = *(UInteger*)this;
     int32_t n0NumBits = n0.GetNumBits();
     int32_t n1NumBits = n1.GetNumBits();
     auto const& n0Bits = n0.GetBits();
     auto const& n1Bits = n1.GetBits();
 
     // The number of bits is at most this, possibly one bit smaller.
     int numBits = n0NumBits + n1NumBits;
     self.SetNumBits(numBits);
     auto& bits = self.GetBits();
 
     // Product of a single-block number with a multiple-block number.
     UInteger product(numBits);
     auto& pBits = product.GetBits();
 
     // Get the array sizes.
     int32_t const numElements0 = n0.GetSize();
     int32_t const numElements1 = n1.GetSize();
     int32_t const numElements = self.GetSize();
 
     // Compute the product v = u0*u1.
     int32_t i0, i1, i2;
     uint64_t term, sum;
 
     // The case i0 == 0 is handled separately to initialize the accumulator
     // with u0[0]*v.  This avoids having to fill the bytes of 'bits' with
     // zeros outside the double loop, something that can be a performance
     // issue when 'numBits' is large.
     uint64_t block0 = n0Bits[0];
     uint64_t carry = 0;
     for (i1 = 0; i1 < numElements1; ++i1)
     {
         term = block0 * n1Bits[i1] + carry;
         bits[i1] = GTE_GET_LO_U64(term);
         carry = (term >> 32);
     }
     if (i1 < numElements)
     {
         bits[i1] = GTE_GET_LO_U64(carry);
     }
 
     for (i0 = 1; i0 < numElements0; ++i0)
     {
         // Compute the product p = u0[i0]*u1.
         block0 = n0Bits[i0];
         carry = 0;
         for (i1 = 0, i2 = i0; i1 < numElements1; ++i1, ++i2)
         {
             term = block0 * n1Bits[i1] + carry;
             pBits[i2] = GTE_GET_LO_U64(term);
             carry = (term >> 32);
         }
         if (i2 < numElements)
         {
             pBits[i2] = GTE_GET_LO_U64(carry);
         }
 
         // Add p to the accumulator v.
         carry = 0;
         for (i1 = 0, i2 = i0; i1 < numElements1; ++i1, ++i2)
         {
             sum = pBits[i2] + (bits[i2] + carry);
             bits[i2] = GTE_GET_LO_U64(sum);
             carry = (sum >> 32);
         }
         if (i2 < numElements)
         {
             sum = pBits[i2] + carry;
             bits[i2] = GTE_GET_LO_U64(sum);
         }
     }
 
     // Reduce the number of bits if there was not a carry-out.
     uint32_t firstBitIndex = (numBits - 1) % 32;
     uint32_t mask = (1 << firstBitIndex);
     if ((mask & self.GetBack()) == 0)
     {
         self.SetNumBits(--numBits);
     }
 }
 
 template <typename UInteger>
 void UIntegerALU32<UInteger>::ShiftLeft(UInteger const& number, int32_t shift)
 {
     UInteger& self = *(UInteger*)this;
     int32_t nNumBits = number.GetNumBits();
     auto const& nBits = number.GetBits();
 
     // Shift the 'number' considered as an odd positive integer.
     self.SetNumBits(nNumBits + shift);
 
     // Set the low-order bits to zero.
     auto& bits = self.GetBits();
     int32_t const shiftBlock = shift / 32;
     for (int32_t i = 0; i < shiftBlock; ++i)
     {
         bits[i] = 0;
     }
 
     // Get the location of the low-order 1-bit within the result.
     int32_t const numInElements = number.GetSize();
     int32_t const lshift = shift % 32;
     int32_t i, j;
     if (lshift > 0)
     {
         // The trailing 1-bits for source and target are at different
         // relative indices.  Each shifted source block straddles a boundary
         // between two target blocks, so we must extract the subblocks and
         // copy accordingly.
         int32_t const rshift = 32 - lshift;
         uint32_t prev = 0, curr;
         for (i = shiftBlock, j = 0; j < numInElements; ++i, ++j)
         {
             curr = nBits[j];
             bits[i] = (curr << lshift) | (prev >> rshift);
             prev = curr;
         }
         if (i < self.GetSize())
         {
             // The leading 1-bit of the source is at a relative index such
             // that when you add the shift amount, that bit occurs in a new
             // block.
             bits[i] = (prev >> rshift);
         }
     }
     else
     {
         // The trailing 1-bits for source and target are at the same relative
         // index.  The shift reduces to a block copy.
         for (i = shiftBlock, j = 0; j < numInElements; ++i, ++j)
         {
             bits[i] = nBits[j];
         }
     }
 }
 
 template <typename UInteger>
 int32_t UIntegerALU32<UInteger>::ShiftRightToOdd(UInteger const& number)
 {
     UInteger& self = *(UInteger*)this;
     auto const& nBits = number.GetBits();
 
     // Get the leading 1-bit.
     int32_t const numElements = number.GetSize();
     int32_t const numM1 = numElements - 1;
     int32_t firstBitIndex = 32 * numM1 + GetLeadingBit(nBits[numM1]);
 
     // Get the trailing 1-bit.
     int32_t lastBitIndex = -1;
     for (int32_t block = 0; block < numElements; ++block)
     {
         uint32_t value = nBits[block];
         if (value > 0)
         {
             lastBitIndex = 32 * block + GetTrailingBit(value);
             break;
         }
     }
 
     // The right-shifted result.
     self.SetNumBits(firstBitIndex - lastBitIndex + 1);
     auto& bits = self.GetBits();
     int32_t const numBlocks = self.GetSize();
 
     // Get the location of the low-order 1-bit within the result.
     int32_t const shiftBlock = lastBitIndex / 32;
     int32_t rshift = lastBitIndex % 32;
     if (rshift > 0)
     {
         int32_t const lshift = 32 - rshift;
         int32_t i, j = shiftBlock;
         uint32_t curr = nBits[j++];
         for (i = 0; j < numElements; ++i, ++j)
         {
             uint32_t next = nBits[j];
             bits[i] = (curr >> rshift) | (next << lshift);
             curr = next;
         }
         if (i < numBlocks)
         {
             bits[i] = (curr >> rshift);
         }
     }
     else
     {
         for (int32_t i = 0, j = shiftBlock; i < numBlocks; ++i, ++j)
         {
             bits[i] = nBits[j];
         }
     }
 
     return rshift + 32 * shiftBlock;
 }
 
 template <typename UInteger>
 uint64_t UIntegerALU32<UInteger>::GetPrefix(int32_t numRequested) const
 {
     UInteger const& self = *(UInteger const*)this;
     auto const& bits = self.GetBits();
 
     // Copy to 'prefix' the leading 32-bit block that is nonzero.
     int32_t bitIndex = self.GetNumBits() - 1;
     int32_t blockIndex = bitIndex / 32;
     uint64_t prefix = bits[blockIndex];
 
     // Get the number of bits in the block starting with the leading 1-bit.
     int32_t firstBitIndex = bitIndex % 32;
     int32_t numBlockBits = firstBitIndex + 1;
 
     // Shift the leading 1-bit to bit-63 of prefix.  We have consumed
     // numBlockBits, which might not be the entire budget.
     int32_t targetIndex = 63;
     prefix <<= targetIndex - firstBitIndex;
     numRequested -= numBlockBits;
     targetIndex -= numBlockBits;
 
     if (numRequested > 0 && --blockIndex >= 0)
     {
         // More bits are available.  Copy and shift the entire 32-bit next
         // block and OR it into the 'prefix'.  For 'float', we will have
         // consumed the entire budget.  For 'double', we might have to get
         // bits from a third block.
         uint64_t nextBlock = bits[blockIndex];
         nextBlock <<= targetIndex - 31;  // Shift amount is positive.
         prefix |= nextBlock;
         numRequested -= 32;
         targetIndex -= 32;
 
         if (numRequested > 0 && --blockIndex >= 0)
         {
             // We know that targetIndex > 0; only 'double' allows us to get
             // here, so numRequested is at most 53.  We also know that
             // targetIndex < 32 because we started with 63 and subtracted
             // at least 32 from it.  Thus, the shift amount is positive.
             nextBlock = bits[blockIndex];
             nextBlock >>= 31 - targetIndex;
             prefix |= nextBlock;
         }
     }
 
     return prefix;
 }
 
 
 }