abseil_cpp: charconv_bigint.h Source File

Go to the documentation of this file.
00001 // Copyright 2018 The Abseil Authors.
00002 //
00003 // Licensed under the Apache License, Version 2.0 (the "License");
00004 // you may not use this file except in compliance with the License.
00005 // You may obtain a copy of the License at
00006 //
00007 //      https://www.apache.org/licenses/LICENSE-2.0
00008 //
00009 // Unless required by applicable law or agreed to in writing, software
00010 // distributed under the License is distributed on an "AS IS" BASIS,
00011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 // See the License for the specific language governing permissions and
00013 // limitations under the License.
00014 
00015 #ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
00016 #define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
00017 
00018 #include <algorithm>
00019 #include <cstdint>
00020 #include <iostream>
00021 #include <string>
00022 
00023 #include "absl/strings/ascii.h"
00024 #include "absl/strings/internal/charconv_parse.h"
00025 #include "absl/strings/string_view.h"
00026 
00027 namespace absl {
00028 namespace strings_internal {
00029 
00030 // The largest power that 5 that can be raised to, and still fit in a uint32_t.
00031 constexpr int kMaxSmallPowerOfFive = 13;
00032 // The largest power that 10 that can be raised to, and still fit in a uint32_t.
00033 constexpr int kMaxSmallPowerOfTen = 9;
00034 
00035 extern const uint32_t kFiveToNth[kMaxSmallPowerOfFive + 1];
00036 extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1];
00037 
00038 // Large, fixed-width unsigned integer.
00039 //
00040 // Exact rounding for decimal-to-binary floating point conversion requires very
00041 // large integer math, but a design goal of absl::from_chars is to avoid
00042 // allocating memory.  The integer precision needed for decimal-to-binary
00043 // conversions is large but bounded, so a huge fixed-width integer class
00044 // suffices.
00045 //
00046 // This is an intentionally limited big integer class.  Only needed operations
00047 // are implemented.  All storage lives in an array data member, and all
00048 // arithmetic is done in-place, to avoid requiring separate storage for operand
00049 // and result.
00050 //
00051 // This is an internal class.  Some methods live in the .cc file, and are
00052 // instantiated only for the values of max_words we need.
00053 template <int max_words>
00054 class BigUnsigned {
00055  public:
00056   static_assert(max_words == 4 || max_words == 84,
00057                 "unsupported max_words value");
00058 
00059   BigUnsigned() : size_(0), words_{} {}
00060   explicit constexpr BigUnsigned(uint64_t v)
00061       : size_((v >> 32) ? 2 : v ? 1 : 0),
00062         words_{static_cast<uint32_t>(v & 0xffffffffu),
00063                static_cast<uint32_t>(v >> 32)} {}
00064 
00065   // Constructs a BigUnsigned from the given string_view containing a decimal
00066   // value.  If the input std::string is not a decimal integer, constructs a 0
00067   // instead.
00068   explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} {
00069     // Check for valid input, returning a 0 otherwise.  This is reasonable
00070     // behavior only because this constructor is for unit tests.
00071     if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() ||
00072         sv.empty()) {
00073       return;
00074     }
00075     int exponent_adjust =
00076         ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1);
00077     if (exponent_adjust > 0) {
00078       MultiplyByTenToTheNth(exponent_adjust);
00079     }
00080   }
00081 
00082   // Loads the mantissa value of a previously-parsed float.
00083   //
00084   // Returns the associated decimal exponent.  The value of the parsed float is
00085   // exactly *this * 10**exponent.
00086   int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits);
00087 
00088   // Returns the number of decimal digits of precision this type provides.  All
00089   // numbers with this many decimal digits or fewer are representable by this
00090   // type.
00091   //
00092   // Analagous to std::numeric_limits<BigUnsigned>::digits10.
00093   static constexpr int Digits10() {
00094     // 9975007/1035508 is very slightly less than log10(2**32).
00095     return static_cast<uint64_t>(max_words) * 9975007 / 1035508;
00096   }
00097 
00098   // Shifts left by the given number of bits.
00099   void ShiftLeft(int count) {
00100     if (count > 0) {
00101       const int word_shift = count / 32;
00102       if (word_shift >= max_words) {
00103         SetToZero();
00104         return;
00105       }
00106       size_ = (std::min)(size_ + word_shift, max_words);
00107       count %= 32;
00108       if (count == 0) {
00109         std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_);
00110       } else {
00111         for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) {
00112           words_[i] = (words_[i - word_shift] << count) |
00113                       (words_[i - word_shift - 1] >> (32 - count));
00114         }
00115         words_[word_shift] = words_[0] << count;
00116         // Grow size_ if necessary.
00117         if (size_ < max_words && words_[size_]) {
00118           ++size_;
00119         }
00120       }
00121       std::fill(words_, words_ + word_shift, 0u);
00122     }
00123   }
00124 
00125 
00126   // Multiplies by v in-place.
00127   void MultiplyBy(uint32_t v) {
00128     if (size_ == 0 || v == 1) {
00129       return;
00130     }
00131     if (v == 0) {
00132       SetToZero();
00133       return;
00134     }
00135     const uint64_t factor = v;
00136     uint64_t window = 0;
00137     for (int i = 0; i < size_; ++i) {
00138       window += factor * words_[i];
00139       words_[i] = window & 0xffffffff;
00140       window >>= 32;
00141     }
00142     // If carry bits remain and there's space for them, grow size_.
00143     if (window && size_ < max_words) {
00144       words_[size_] = window & 0xffffffff;
00145       ++size_;
00146     }
00147   }
00148 
00149   void MultiplyBy(uint64_t v) {
00150     uint32_t words[2];
00151     words[0] = static_cast<uint32_t>(v);
00152     words[1] = static_cast<uint32_t>(v >> 32);
00153     if (words[1] == 0) {
00154       MultiplyBy(words[0]);
00155     } else {
00156       MultiplyBy(2, words);
00157     }
00158   }
00159 
00160   // Multiplies in place by 5 to the power of n.  n must be non-negative.
00161   void MultiplyByFiveToTheNth(int n) {
00162     while (n >= kMaxSmallPowerOfFive) {
00163       MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]);
00164       n -= kMaxSmallPowerOfFive;
00165     }
00166     if (n > 0) {
00167       MultiplyBy(kFiveToNth[n]);
00168     }
00169   }
00170 
00171   // Multiplies in place by 10 to the power of n.  n must be non-negative.
00172   void MultiplyByTenToTheNth(int n) {
00173     if (n > kMaxSmallPowerOfTen) {
00174       // For large n, raise to a power of 5, then shift left by the same amount.
00175       // (10**n == 5**n * 2**n.)  This requires fewer multiplications overall.
00176       MultiplyByFiveToTheNth(n);
00177       ShiftLeft(n);
00178     } else if (n > 0) {
00179       // We can do this more quickly for very small N by using a single
00180       // multiplication.
00181       MultiplyBy(kTenToNth[n]);
00182     }
00183   }
00184 
00185   // Returns the value of 5**n, for non-negative n.  This implementation uses
00186   // a lookup table, and is faster then seeding a BigUnsigned with 1 and calling
00187   // MultiplyByFiveToTheNth().
00188   static BigUnsigned FiveToTheNth(int n);
00189 
00190   // Multiplies by another BigUnsigned, in-place.
00191   template <int M>
00192   void MultiplyBy(const BigUnsigned<M>& other) {
00193     MultiplyBy(other.size(), other.words());
00194   }
00195 
00196   void SetToZero() {
00197     std::fill(words_, words_ + size_, 0u);
00198     size_ = 0;
00199   }
00200 
00201   // Returns the value of the nth word of this BigUnsigned.  This is
00202   // range-checked, and returns 0 on out-of-bounds accesses.
00203   uint32_t GetWord(int index) const {
00204     if (index < 0 || index >= size_) {
00205       return 0;
00206     }
00207     return words_[index];
00208   }
00209 
00210   // Returns this integer as a decimal std::string.  This is not used in the decimal-
00211   // to-binary conversion; it is intended to aid in testing.
00212   std::string ToString() const;
00213 
00214   int size() const { return size_; }
00215   const uint32_t* words() const { return words_; }
00216 
00217  private:
00218   // Reads the number between [begin, end), possibly containing a decimal point,
00219   // into this BigUnsigned.
00220   //
00221   // Callers are required to ensure [begin, end) contains a valid number, with
00222   // one or more decimal digits and at most one decimal point.  This routine
00223   // will behave unpredictably if these preconditions are not met.
00224   //
00225   // Only the first `significant_digits` digits are read.  Digits beyond this
00226   // limit are "sticky": If the final significant digit is 0 or 5, and if any
00227   // dropped digit is nonzero, then that final significant digit is adjusted up
00228   // to 1 or 6.  This adjustment allows for precise rounding.
00229   //
00230   // Returns `exponent_adjustment`, a power-of-ten exponent adjustment to
00231   // account for the decimal point and for dropped significant digits.  After
00232   // this function returns,
00233   //   actual_value_of_parsed_string ~= *this * 10**exponent_adjustment.
00234   int ReadDigits(const char* begin, const char* end, int significant_digits);
00235 
00236   // Performs a step of big integer multiplication.  This computes the full
00237   // (64-bit-wide) values that should be added at the given index (step), and
00238   // adds to that location in-place.
00239   //
00240   // Because our math all occurs in place, we must multiply starting from the
00241   // highest word working downward.  (This is a bit more expensive due to the
00242   // extra carries involved.)
00243   //
00244   // This must be called in steps, for each word to be calculated, starting from
00245   // the high end and working down to 0.  The first value of `step` should be
00246   //   `std::min(original_size + other.size_ - 2, max_words - 1)`.
00247   // The reason for this expression is that multiplying the i'th word from one
00248   // multiplicand and the j'th word of another multiplicand creates a
00249   // two-word-wide value to be stored at the (i+j)'th element.  The highest
00250   // word indices we will access are `original_size - 1` from this object, and
00251   // `other.size_ - 1` from our operand.  Therefore,
00252   // `original_size + other.size_ - 2` is the first step we should calculate,
00253   // but limited on an upper bound by max_words.
00254 
00255   // Working from high-to-low ensures that we do not overwrite the portions of
00256   // the initial value of *this which are still needed for later steps.
00257   //
00258   // Once called with step == 0, *this contains the result of the
00259   // multiplication.
00260   //
00261   // `original_size` is the size_ of *this before the first call to
00262   // MultiplyStep().  `other_words` and `other_size` are the contents of our
00263   // operand.  `step` is the step to perform, as described above.
00264   void MultiplyStep(int original_size, const uint32_t* other_words,
00265                     int other_size, int step);
00266 
00267   void MultiplyBy(int other_size, const uint32_t* other_words) {
00268     const int original_size = size_;
00269     const int first_step =
00270         (std::min)(original_size + other_size - 2, max_words - 1);
00271     for (int step = first_step; step >= 0; --step) {
00272       MultiplyStep(original_size, other_words, other_size, step);
00273     }
00274   }
00275 
00276   // Adds a 32-bit value to the index'th word, with carry.
00277   void AddWithCarry(int index, uint32_t value) {
00278     if (value) {
00279       while (index < max_words && value > 0) {
00280         words_[index] += value;
00281         // carry if we overflowed in this word:
00282         if (value > words_[index]) {
00283           value = 1;
00284           ++index;
00285         } else {
00286           value = 0;
00287         }
00288       }
00289       size_ = (std::min)(max_words, (std::max)(index + 1, size_));
00290     }
00291   }
00292 
00293   void AddWithCarry(int index, uint64_t value) {
00294     if (value && index < max_words) {
00295       uint32_t high = value >> 32;
00296       uint32_t low = value & 0xffffffff;
00297       words_[index] += low;
00298       if (words_[index] < low) {
00299         ++high;
00300         if (high == 0) {
00301           // Carry from the low word caused our high word to overflow.
00302           // Short circuit here to do the right thing.
00303           AddWithCarry(index + 2, static_cast<uint32_t>(1));
00304           return;
00305         }
00306       }
00307       if (high > 0) {
00308         AddWithCarry(index + 1, high);
00309       } else {
00310         // Normally 32-bit AddWithCarry() sets size_, but since we don't call
00311         // it when `high` is 0, do it ourselves here.
00312         size_ = (std::min)(max_words, (std::max)(index + 1, size_));
00313       }
00314     }
00315   }
00316 
00317   // Divide this in place by a constant divisor.  Returns the remainder of the
00318   // division.
00319   template <uint32_t divisor>
00320   uint32_t DivMod() {
00321     uint64_t accumulator = 0;
00322     for (int i = size_ - 1; i >= 0; --i) {
00323       accumulator <<= 32;
00324       accumulator += words_[i];
00325       // accumulator / divisor will never overflow an int32_t in this loop
00326       words_[i] = static_cast<uint32_t>(accumulator / divisor);
00327       accumulator = accumulator % divisor;
00328     }
00329     while (size_ > 0 && words_[size_ - 1] == 0) {
00330       --size_;
00331     }
00332     return static_cast<uint32_t>(accumulator);
00333   }
00334 
00335   // The number of elements in words_ that may carry significant values.
00336   // All elements beyond this point are 0.
00337   //
00338   // When size_ is 0, this BigUnsigned stores the value 0.
00339   // When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is
00340   // nonzero.  This can occur due to overflow truncation.
00341   // In particular, x.size_ != y.size_ does *not* imply x != y.
00342   int size_;
00343   uint32_t words_[max_words];
00344 };
00345 
00346 // Compares two big integer instances.
00347 //
00348 // Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs.
00349 template <int N, int M>
00350 int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
00351   int limit = (std::max)(lhs.size(), rhs.size());
00352   for (int i = limit - 1; i >= 0; --i) {
00353     const uint32_t lhs_word = lhs.GetWord(i);
00354     const uint32_t rhs_word = rhs.GetWord(i);
00355     if (lhs_word < rhs_word) {
00356       return -1;
00357     } else if (lhs_word > rhs_word) {
00358       return 1;
00359     }
00360   }
00361   return 0;
00362 }
00363 
00364 template <int N, int M>
00365 bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
00366   int limit = (std::max)(lhs.size(), rhs.size());
00367   for (int i = 0; i < limit; ++i) {
00368     if (lhs.GetWord(i) != rhs.GetWord(i)) {
00369       return false;
00370     }
00371   }
00372   return true;
00373 }
00374 
00375 template <int N, int M>
00376 bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
00377   return !(lhs == rhs);
00378 }
00379 
00380 template <int N, int M>
00381 bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
00382   return Compare(lhs, rhs) == -1;
00383 }
00384 
00385 template <int N, int M>
00386 bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
00387   return rhs < lhs;
00388 }
00389 template <int N, int M>
00390 bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
00391   return !(rhs < lhs);
00392 }
00393 template <int N, int M>
00394 bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
00395   return !(lhs < rhs);
00396 }
00397 
00398 // Output operator for BigUnsigned, for testing purposes only.
00399 template <int N>
00400 std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) {
00401   return os << num.ToString();
00402 }
00403 
00404 // Explicit instantiation declarations for the sizes of BigUnsigned that we
00405 // are using.
00406 //
00407 // For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is
00408 // still bigger than an int128, and 84 is a large value we will want to use
00409 // in the from_chars implementation.
00410 //
00411 // Comments justifying the use of 84 belong in the from_chars implementation,
00412 // and will be added in a follow-up CL.
00413 extern template class BigUnsigned<4>;
00414 extern template class BigUnsigned<84>;
00415 
00416 }  // namespace strings_internal
00417 }  // namespace absl
00418 
00419 #endif  // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_