numbers.cc
Go to the documentation of this file.
00001 // Copyright 2017 The Abseil Authors.
00002 //
00003 // Licensed under the Apache License, Version 2.0 (the "License");
00004 // you may not use this file except in compliance with the License.
00005 // You may obtain a copy of the License at
00006 //
00007 //      https://www.apache.org/licenses/LICENSE-2.0
00008 //
00009 // Unless required by applicable law or agreed to in writing, software
00010 // distributed under the License is distributed on an "AS IS" BASIS,
00011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 // See the License for the specific language governing permissions and
00013 // limitations under the License.
00014 
00015 // This file contains string processing functions related to
00016 // numeric values.
00017 
00018 #include "absl/strings/numbers.h"
00019 
00020 #include <algorithm>
00021 #include <cassert>
00022 #include <cfloat>          // for DBL_DIG and FLT_DIG
00023 #include <cmath>           // for HUGE_VAL
00024 #include <cstdint>
00025 #include <cstdio>
00026 #include <cstdlib>
00027 #include <cstring>
00028 #include <iterator>
00029 #include <limits>
00030 #include <memory>
00031 #include <utility>
00032 
00033 #include "absl/base/internal/bits.h"
00034 #include "absl/base/internal/raw_logging.h"
00035 #include "absl/strings/ascii.h"
00036 #include "absl/strings/charconv.h"
00037 #include "absl/strings/internal/memutil.h"
00038 #include "absl/strings/match.h"
00039 #include "absl/strings/str_cat.h"
00040 
00041 namespace absl {
00042 
00043 bool SimpleAtof(absl::string_view str, float* out) {
00044   *out = 0.0;
00045   str = StripAsciiWhitespace(str);
00046   if (!str.empty() && str[0] == '+') {
00047     str.remove_prefix(1);
00048   }
00049   auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
00050   if (result.ec == std::errc::invalid_argument) {
00051     return false;
00052   }
00053   if (result.ptr != str.data() + str.size()) {
00054     // not all non-whitespace characters consumed
00055     return false;
00056   }
00057   // from_chars() with DR 3081's current wording will return max() on
00058   // overflow.  SimpleAtof returns infinity instead.
00059   if (result.ec == std::errc::result_out_of_range) {
00060     if (*out > 1.0) {
00061       *out = std::numeric_limits<float>::infinity();
00062     } else if (*out < -1.0) {
00063       *out = -std::numeric_limits<float>::infinity();
00064     }
00065   }
00066   return true;
00067 }
00068 
00069 bool SimpleAtod(absl::string_view str, double* out) {
00070   *out = 0.0;
00071   str = StripAsciiWhitespace(str);
00072   if (!str.empty() && str[0] == '+') {
00073     str.remove_prefix(1);
00074   }
00075   auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
00076   if (result.ec == std::errc::invalid_argument) {
00077     return false;
00078   }
00079   if (result.ptr != str.data() + str.size()) {
00080     // not all non-whitespace characters consumed
00081     return false;
00082   }
00083   // from_chars() with DR 3081's current wording will return max() on
00084   // overflow.  SimpleAtod returns infinity instead.
00085   if (result.ec == std::errc::result_out_of_range) {
00086     if (*out > 1.0) {
00087       *out = std::numeric_limits<double>::infinity();
00088     } else if (*out < -1.0) {
00089       *out = -std::numeric_limits<double>::infinity();
00090     }
00091   }
00092   return true;
00093 }
00094 
00095 namespace {
00096 
00097 // Writes a two-character representation of 'i' to 'buf'. 'i' must be in the
00098 // range 0 <= i < 100, and buf must have space for two characters. Example:
00099 //   char buf[2];
00100 //   PutTwoDigits(42, buf);
00101 //   // buf[0] == '4'
00102 //   // buf[1] == '2'
00103 inline void PutTwoDigits(size_t i, char* buf) {
00104   static const char two_ASCII_digits[100][2] = {
00105     {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'},
00106     {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'},
00107     {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'},
00108     {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'},
00109     {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'},
00110     {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
00111     {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'},
00112     {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'},
00113     {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'},
00114     {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'},
00115     {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'},
00116     {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
00117     {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'},
00118     {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'},
00119     {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'},
00120     {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'},
00121     {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'},
00122     {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
00123     {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'},
00124     {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}
00125   };
00126   assert(i < 100);
00127   memcpy(buf, two_ASCII_digits[i], 2);
00128 }
00129 
00130 }  // namespace
00131 
00132 bool SimpleAtob(absl::string_view str, bool* out) {
00133   ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr.");
00134   if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") ||
00135       EqualsIgnoreCase(str, "yes") || EqualsIgnoreCase(str, "y") ||
00136       EqualsIgnoreCase(str, "1")) {
00137     *out = true;
00138     return true;
00139   }
00140   if (EqualsIgnoreCase(str, "false") || EqualsIgnoreCase(str, "f") ||
00141       EqualsIgnoreCase(str, "no") || EqualsIgnoreCase(str, "n") ||
00142       EqualsIgnoreCase(str, "0")) {
00143     *out = false;
00144     return true;
00145   }
00146   return false;
00147 }
00148 
00149 // ----------------------------------------------------------------------
00150 // FastIntToBuffer() overloads
00151 //
00152 // Like the Fast*ToBuffer() functions above, these are intended for speed.
00153 // Unlike the Fast*ToBuffer() functions, however, these functions write
00154 // their output to the beginning of the buffer.  The caller is responsible
00155 // for ensuring that the buffer has enough space to hold the output.
00156 //
00157 // Returns a pointer to the end of the string (i.e. the null character
00158 // terminating the string).
00159 // ----------------------------------------------------------------------
00160 
00161 namespace {
00162 
00163 // Used to optimize printing a decimal number's final digit.
00164 const char one_ASCII_final_digits[10][2] {
00165   {'0', 0}, {'1', 0}, {'2', 0}, {'3', 0}, {'4', 0},
00166   {'5', 0}, {'6', 0}, {'7', 0}, {'8', 0}, {'9', 0},
00167 };
00168 
00169 }  // namespace
00170 
00171 char* numbers_internal::FastIntToBuffer(uint32_t i, char* buffer) {
00172   uint32_t digits;
00173   // The idea of this implementation is to trim the number of divides to as few
00174   // as possible, and also reducing memory stores and branches, by going in
00175   // steps of two digits at a time rather than one whenever possible.
00176   // The huge-number case is first, in the hopes that the compiler will output
00177   // that case in one branch-free block of code, and only output conditional
00178   // branches into it from below.
00179   if (i >= 1000000000) {     // >= 1,000,000,000
00180     digits = i / 100000000;  //      100,000,000
00181     i -= digits * 100000000;
00182     PutTwoDigits(digits, buffer);
00183     buffer += 2;
00184   lt100_000_000:
00185     digits = i / 1000000;  // 1,000,000
00186     i -= digits * 1000000;
00187     PutTwoDigits(digits, buffer);
00188     buffer += 2;
00189   lt1_000_000:
00190     digits = i / 10000;  // 10,000
00191     i -= digits * 10000;
00192     PutTwoDigits(digits, buffer);
00193     buffer += 2;
00194   lt10_000:
00195     digits = i / 100;
00196     i -= digits * 100;
00197     PutTwoDigits(digits, buffer);
00198     buffer += 2;
00199  lt100:
00200     digits = i;
00201     PutTwoDigits(digits, buffer);
00202     buffer += 2;
00203     *buffer = 0;
00204     return buffer;
00205   }
00206 
00207   if (i < 100) {
00208     digits = i;
00209     if (i >= 10) goto lt100;
00210     memcpy(buffer, one_ASCII_final_digits[i], 2);
00211     return buffer + 1;
00212   }
00213   if (i < 10000) {  //    10,000
00214     if (i >= 1000) goto lt10_000;
00215     digits = i / 100;
00216     i -= digits * 100;
00217     *buffer++ = '0' + digits;
00218     goto lt100;
00219   }
00220   if (i < 1000000) {  //    1,000,000
00221     if (i >= 100000) goto lt1_000_000;
00222     digits = i / 10000;  //    10,000
00223     i -= digits * 10000;
00224     *buffer++ = '0' + digits;
00225     goto lt10_000;
00226   }
00227   if (i < 100000000) {  //    100,000,000
00228     if (i >= 10000000) goto lt100_000_000;
00229     digits = i / 1000000;  //   1,000,000
00230     i -= digits * 1000000;
00231     *buffer++ = '0' + digits;
00232     goto lt1_000_000;
00233   }
00234   // we already know that i < 1,000,000,000
00235   digits = i / 100000000;  //   100,000,000
00236   i -= digits * 100000000;
00237   *buffer++ = '0' + digits;
00238   goto lt100_000_000;
00239 }
00240 
00241 char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) {
00242   uint32_t u = i;
00243   if (i < 0) {
00244     *buffer++ = '-';
00245     // We need to do the negation in modular (i.e., "unsigned")
00246     // arithmetic; MSVC++ apprently warns for plain "-u", so
00247     // we write the equivalent expression "0 - u" instead.
00248     u = 0 - u;
00249   }
00250   return numbers_internal::FastIntToBuffer(u, buffer);
00251 }
00252 
00253 char* numbers_internal::FastIntToBuffer(uint64_t i, char* buffer) {
00254   uint32_t u32 = static_cast<uint32_t>(i);
00255   if (u32 == i) return numbers_internal::FastIntToBuffer(u32, buffer);
00256 
00257   // Here we know i has at least 10 decimal digits.
00258   uint64_t top_1to11 = i / 1000000000;
00259   u32 = static_cast<uint32_t>(i - top_1to11 * 1000000000);
00260   uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11);
00261 
00262   if (top_1to11_32 == top_1to11) {
00263     buffer = numbers_internal::FastIntToBuffer(top_1to11_32, buffer);
00264   } else {
00265     // top_1to11 has more than 32 bits too; print it in two steps.
00266     uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / 100);
00267     uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * 100);
00268     buffer = numbers_internal::FastIntToBuffer(top_8to9, buffer);
00269     PutTwoDigits(mid_2, buffer);
00270     buffer += 2;
00271   }
00272 
00273   // We have only 9 digits now, again the maximum uint32_t can handle fully.
00274   uint32_t digits = u32 / 10000000;  // 10,000,000
00275   u32 -= digits * 10000000;
00276   PutTwoDigits(digits, buffer);
00277   buffer += 2;
00278   digits = u32 / 100000;  // 100,000
00279   u32 -= digits * 100000;
00280   PutTwoDigits(digits, buffer);
00281   buffer += 2;
00282   digits = u32 / 1000;  // 1,000
00283   u32 -= digits * 1000;
00284   PutTwoDigits(digits, buffer);
00285   buffer += 2;
00286   digits = u32 / 10;
00287   u32 -= digits * 10;
00288   PutTwoDigits(digits, buffer);
00289   buffer += 2;
00290   memcpy(buffer, one_ASCII_final_digits[u32], 2);
00291   return buffer + 1;
00292 }
00293 
00294 char* numbers_internal::FastIntToBuffer(int64_t i, char* buffer) {
00295   uint64_t u = i;
00296   if (i < 0) {
00297     *buffer++ = '-';
00298     u = 0 - u;
00299   }
00300   return numbers_internal::FastIntToBuffer(u, buffer);
00301 }
00302 
00303 // Given a 128-bit number expressed as a pair of uint64_t, high half first,
00304 // return that number multiplied by the given 32-bit value.  If the result is
00305 // too large to fit in a 128-bit number, divide it by 2 until it fits.
00306 static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num,
00307                                            uint32_t mul) {
00308   uint64_t bits0_31 = num.second & 0xFFFFFFFF;
00309   uint64_t bits32_63 = num.second >> 32;
00310   uint64_t bits64_95 = num.first & 0xFFFFFFFF;
00311   uint64_t bits96_127 = num.first >> 32;
00312 
00313   // The picture so far: each of these 64-bit values has only the lower 32 bits
00314   // filled in.
00315   // bits96_127:          [ 00000000 xxxxxxxx ]
00316   // bits64_95:                    [ 00000000 xxxxxxxx ]
00317   // bits32_63:                             [ 00000000 xxxxxxxx ]
00318   // bits0_31:                                       [ 00000000 xxxxxxxx ]
00319 
00320   bits0_31 *= mul;
00321   bits32_63 *= mul;
00322   bits64_95 *= mul;
00323   bits96_127 *= mul;
00324 
00325   // Now the top halves may also have value, though all 64 of their bits will
00326   // never be set at the same time, since they are a result of a 32x32 bit
00327   // multiply.  This makes the carry calculation slightly easier.
00328   // bits96_127:          [ mmmmmmmm | mmmmmmmm ]
00329   // bits64_95:                    [ | mmmmmmmm mmmmmmmm | ]
00330   // bits32_63:                      |        [ mmmmmmmm | mmmmmmmm ]
00331   // bits0_31:                       |                 [ | mmmmmmmm mmmmmmmm ]
00332   // eventually:        [ bits128_up | ...bits64_127.... | ..bits0_63... ]
00333 
00334   uint64_t bits0_63 = bits0_31 + (bits32_63 << 32);
00335   uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) +
00336                         (bits0_63 < bits0_31);
00337   uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95);
00338   if (bits128_up == 0) return {bits64_127, bits0_63};
00339 
00340   int shift = 64 - base_internal::CountLeadingZeros64(bits128_up);
00341   uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift));
00342   uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift));
00343   return {hi, lo};
00344 }
00345 
00346 // Compute num * 5 ^ expfive, and return the first 128 bits of the result,
00347 // where the first bit is always a one.  So PowFive(1, 0) starts 0b100000,
00348 // PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc.
00349 static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) {
00350   std::pair<uint64_t, uint64_t> result = {num, 0};
00351   while (expfive >= 13) {
00352     // 5^13 is the highest power of five that will fit in a 32-bit integer.
00353     result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5);
00354     expfive -= 13;
00355   }
00356   constexpr int powers_of_five[13] = {
00357       1,
00358       5,
00359       5 * 5,
00360       5 * 5 * 5,
00361       5 * 5 * 5 * 5,
00362       5 * 5 * 5 * 5 * 5,
00363       5 * 5 * 5 * 5 * 5 * 5,
00364       5 * 5 * 5 * 5 * 5 * 5 * 5,
00365       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
00366       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
00367       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
00368       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
00369       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5};
00370   result = Mul32(result, powers_of_five[expfive & 15]);
00371   int shift = base_internal::CountLeadingZeros64(result.first);
00372   if (shift != 0) {
00373     result.first = (result.first << shift) + (result.second >> (64 - shift));
00374     result.second = (result.second << shift);
00375   }
00376   return result;
00377 }
00378 
00379 struct ExpDigits {
00380   int32_t exponent;
00381   char digits[6];
00382 };
00383 
00384 // SplitToSix converts value, a positive double-precision floating-point number,
00385 // into a base-10 exponent and 6 ASCII digits, where the first digit is never
00386 // zero.  For example, SplitToSix(1) returns an exponent of zero and a digits
00387 // array of {'1', '0', '0', '0', '0', '0'}.  If value is exactly halfway between
00388 // two possible representations, e.g. value = 100000.5, then "round to even" is
00389 // performed.
00390 static ExpDigits SplitToSix(const double value) {
00391   ExpDigits exp_dig;
00392   int exp = 5;
00393   double d = value;
00394   // First step: calculate a close approximation of the output, where the
00395   // value d will be between 100,000 and 999,999, representing the digits
00396   // in the output ASCII array, and exp is the base-10 exponent.  It would be
00397   // faster to use a table here, and to look up the base-2 exponent of value,
00398   // however value is an IEEE-754 64-bit number, so the table would have 2,000
00399   // entries, which is not cache-friendly.
00400   if (d >= 999999.5) {
00401     if (d >= 1e+261) exp += 256, d *= 1e-256;
00402     if (d >= 1e+133) exp += 128, d *= 1e-128;
00403     if (d >= 1e+69) exp += 64, d *= 1e-64;
00404     if (d >= 1e+37) exp += 32, d *= 1e-32;
00405     if (d >= 1e+21) exp += 16, d *= 1e-16;
00406     if (d >= 1e+13) exp += 8, d *= 1e-8;
00407     if (d >= 1e+9) exp += 4, d *= 1e-4;
00408     if (d >= 1e+7) exp += 2, d *= 1e-2;
00409     if (d >= 1e+6) exp += 1, d *= 1e-1;
00410   } else {
00411     if (d < 1e-250) exp -= 256, d *= 1e256;
00412     if (d < 1e-122) exp -= 128, d *= 1e128;
00413     if (d < 1e-58) exp -= 64, d *= 1e64;
00414     if (d < 1e-26) exp -= 32, d *= 1e32;
00415     if (d < 1e-10) exp -= 16, d *= 1e16;
00416     if (d < 1e-2) exp -= 8, d *= 1e8;
00417     if (d < 1e+2) exp -= 4, d *= 1e4;
00418     if (d < 1e+4) exp -= 2, d *= 1e2;
00419     if (d < 1e+5) exp -= 1, d *= 1e1;
00420   }
00421   // At this point, d is in the range [99999.5..999999.5) and exp is in the
00422   // range [-324..308]. Since we need to round d up, we want to add a half
00423   // and truncate.
00424   // However, the technique above may have lost some precision, due to its
00425   // repeated multiplication by constants that each may be off by half a bit
00426   // of precision.  This only matters if we're close to the edge though.
00427   // Since we'd like to know if the fractional part of d is close to a half,
00428   // we multiply it by 65536 and see if the fractional part is close to 32768.
00429   // (The number doesn't have to be a power of two,but powers of two are faster)
00430   uint64_t d64k = d * 65536;
00431   int dddddd;  // A 6-digit decimal integer.
00432   if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) {
00433     // OK, it's fairly likely that precision was lost above, which is
00434     // not a surprise given only 52 mantissa bits are available.  Therefore
00435     // redo the calculation using 128-bit numbers.  (64 bits are not enough).
00436 
00437     // Start out with digits rounded down; maybe add one below.
00438     dddddd = static_cast<int>(d64k / 65536);
00439 
00440     // mantissa is a 64-bit integer representing M.mmm... * 2^63.  The actual
00441     // value we're representing, of course, is M.mmm... * 2^exp2.
00442     int exp2;
00443     double m = std::frexp(value, &exp2);
00444     uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0);
00445     // std::frexp returns an m value in the range [0.5, 1.0), however we
00446     // can't multiply it by 2^64 and convert to an integer because some FPUs
00447     // throw an exception when converting an number higher than 2^63 into an
00448     // integer - even an unsigned 64-bit integer!  Fortunately it doesn't matter
00449     // since m only has 52 significant bits anyway.
00450     mantissa <<= 1;
00451     exp2 -= 64;  // not needed, but nice for debugging
00452 
00453     // OK, we are here to compare:
00454     //     (dddddd + 0.5) * 10^(exp-5)  vs.  mantissa * 2^exp2
00455     // so we can round up dddddd if appropriate.  Those values span the full
00456     // range of 600 orders of magnitude of IEE 64-bit floating-point.
00457     // Fortunately, we already know they are very close, so we don't need to
00458     // track the base-2 exponent of both sides.  This greatly simplifies the
00459     // the math since the 2^exp2 calculation is unnecessary and the power-of-10
00460     // calculation can become a power-of-5 instead.
00461 
00462     std::pair<uint64_t, uint64_t> edge, val;
00463     if (exp >= 6) {
00464       // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa
00465       // Since we're tossing powers of two, 2 * dddddd + 1 is the
00466       // same as dddddd + 0.5
00467       edge = PowFive(2 * dddddd + 1, exp - 5);
00468 
00469       val.first = mantissa;
00470       val.second = 0;
00471     } else {
00472       // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did
00473       // above because (exp - 5) is negative.  So we compare (dddddd + 0.5) to
00474       // mantissa * 5 ^ (5 - exp)
00475       edge = PowFive(2 * dddddd + 1, 0);
00476 
00477       val = PowFive(mantissa, 5 - exp);
00478     }
00479     // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first,
00480     //        val.second, edge.first, edge.second);
00481     if (val > edge) {
00482       dddddd++;
00483     } else if (val == edge) {
00484       dddddd += (dddddd & 1);
00485     }
00486   } else {
00487     // Here, we are not close to the edge.
00488     dddddd = static_cast<int>((d64k + 32768) / 65536);
00489   }
00490   if (dddddd == 1000000) {
00491     dddddd = 100000;
00492     exp += 1;
00493   }
00494   exp_dig.exponent = exp;
00495 
00496   int two_digits = dddddd / 10000;
00497   dddddd -= two_digits * 10000;
00498   PutTwoDigits(two_digits, &exp_dig.digits[0]);
00499 
00500   two_digits = dddddd / 100;
00501   dddddd -= two_digits * 100;
00502   PutTwoDigits(two_digits, &exp_dig.digits[2]);
00503 
00504   PutTwoDigits(dddddd, &exp_dig.digits[4]);
00505   return exp_dig;
00506 }
00507 
00508 // Helper function for fast formatting of floating-point.
00509 // The result is the same as "%g", a.k.a. "%.6g".
00510 size_t numbers_internal::SixDigitsToBuffer(double d, char* const buffer) {
00511   static_assert(std::numeric_limits<float>::is_iec559,
00512                 "IEEE-754/IEC-559 support only");
00513 
00514   char* out = buffer;  // we write data to out, incrementing as we go, but
00515                        // FloatToBuffer always returns the address of the buffer
00516                        // passed in.
00517 
00518   if (std::isnan(d)) {
00519     strcpy(out, "nan");  // NOLINT(runtime/printf)
00520     return 3;
00521   }
00522   if (d == 0) {  // +0 and -0 are handled here
00523     if (std::signbit(d)) *out++ = '-';
00524     *out++ = '0';
00525     *out = 0;
00526     return out - buffer;
00527   }
00528   if (d < 0) {
00529     *out++ = '-';
00530     d = -d;
00531   }
00532   if (std::isinf(d)) {
00533     strcpy(out, "inf");  // NOLINT(runtime/printf)
00534     return out + 3 - buffer;
00535   }
00536 
00537   auto exp_dig = SplitToSix(d);
00538   int exp = exp_dig.exponent;
00539   const char* digits = exp_dig.digits;
00540   out[0] = '0';
00541   out[1] = '.';
00542   switch (exp) {
00543     case 5:
00544       memcpy(out, &digits[0], 6), out += 6;
00545       *out = 0;
00546       return out - buffer;
00547     case 4:
00548       memcpy(out, &digits[0], 5), out += 5;
00549       if (digits[5] != '0') {
00550         *out++ = '.';
00551         *out++ = digits[5];
00552       }
00553       *out = 0;
00554       return out - buffer;
00555     case 3:
00556       memcpy(out, &digits[0], 4), out += 4;
00557       if ((digits[5] | digits[4]) != '0') {
00558         *out++ = '.';
00559         *out++ = digits[4];
00560         if (digits[5] != '0') *out++ = digits[5];
00561       }
00562       *out = 0;
00563       return out - buffer;
00564     case 2:
00565       memcpy(out, &digits[0], 3), out += 3;
00566       *out++ = '.';
00567       memcpy(out, &digits[3], 3);
00568       out += 3;
00569       while (out[-1] == '0') --out;
00570       if (out[-1] == '.') --out;
00571       *out = 0;
00572       return out - buffer;
00573     case 1:
00574       memcpy(out, &digits[0], 2), out += 2;
00575       *out++ = '.';
00576       memcpy(out, &digits[2], 4);
00577       out += 4;
00578       while (out[-1] == '0') --out;
00579       if (out[-1] == '.') --out;
00580       *out = 0;
00581       return out - buffer;
00582     case 0:
00583       memcpy(out, &digits[0], 1), out += 1;
00584       *out++ = '.';
00585       memcpy(out, &digits[1], 5);
00586       out += 5;
00587       while (out[-1] == '0') --out;
00588       if (out[-1] == '.') --out;
00589       *out = 0;
00590       return out - buffer;
00591     case -4:
00592       out[2] = '0';
00593       ++out;
00594       ABSL_FALLTHROUGH_INTENDED;
00595     case -3:
00596       out[2] = '0';
00597       ++out;
00598       ABSL_FALLTHROUGH_INTENDED;
00599     case -2:
00600       out[2] = '0';
00601       ++out;
00602       ABSL_FALLTHROUGH_INTENDED;
00603     case -1:
00604       out += 2;
00605       memcpy(out, &digits[0], 6);
00606       out += 6;
00607       while (out[-1] == '0') --out;
00608       *out = 0;
00609       return out - buffer;
00610   }
00611   assert(exp < -4 || exp >= 6);
00612   out[0] = digits[0];
00613   assert(out[1] == '.');
00614   out += 2;
00615   memcpy(out, &digits[1], 5), out += 5;
00616   while (out[-1] == '0') --out;
00617   if (out[-1] == '.') --out;
00618   *out++ = 'e';
00619   if (exp > 0) {
00620     *out++ = '+';
00621   } else {
00622     *out++ = '-';
00623     exp = -exp;
00624   }
00625   if (exp > 99) {
00626     int dig1 = exp / 100;
00627     exp -= dig1 * 100;
00628     *out++ = '0' + dig1;
00629   }
00630   PutTwoDigits(exp, out);
00631   out += 2;
00632   *out = 0;
00633   return out - buffer;
00634 }
00635 
00636 namespace {
00637 // Represents integer values of digits.
00638 // Uses 36 to indicate an invalid character since we support
00639 // bases up to 36.
00640 static const int8_t kAsciiToInt[256] = {
00641     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,  // 16 36s.
00642     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
00643     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0,  1,  2,  3,  4,  5,
00644     6,  7,  8,  9,  36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17,
00645     18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
00646     36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
00647     24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36,
00648     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
00649     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
00650     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
00651     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
00652     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
00653     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
00654     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
00655 
00656 // Parse the sign and optional hex or oct prefix in text.
00657 inline bool safe_parse_sign_and_base(absl::string_view* text /*inout*/,
00658                                      int* base_ptr /*inout*/,
00659                                      bool* negative_ptr /*output*/) {
00660   if (text->data() == nullptr) {
00661     return false;
00662   }
00663 
00664   const char* start = text->data();
00665   const char* end = start + text->size();
00666   int base = *base_ptr;
00667 
00668   // Consume whitespace.
00669   while (start < end && absl::ascii_isspace(start[0])) {
00670     ++start;
00671   }
00672   while (start < end && absl::ascii_isspace(end[-1])) {
00673     --end;
00674   }
00675   if (start >= end) {
00676     return false;
00677   }
00678 
00679   // Consume sign.
00680   *negative_ptr = (start[0] == '-');
00681   if (*negative_ptr || start[0] == '+') {
00682     ++start;
00683     if (start >= end) {
00684       return false;
00685     }
00686   }
00687 
00688   // Consume base-dependent prefix.
00689   //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
00690   //  base 16: "0x" -> base 16
00691   // Also validate the base.
00692   if (base == 0) {
00693     if (end - start >= 2 && start[0] == '0' &&
00694         (start[1] == 'x' || start[1] == 'X')) {
00695       base = 16;
00696       start += 2;
00697       if (start >= end) {
00698         // "0x" with no digits after is invalid.
00699         return false;
00700       }
00701     } else if (end - start >= 1 && start[0] == '0') {
00702       base = 8;
00703       start += 1;
00704     } else {
00705       base = 10;
00706     }
00707   } else if (base == 16) {
00708     if (end - start >= 2 && start[0] == '0' &&
00709         (start[1] == 'x' || start[1] == 'X')) {
00710       start += 2;
00711       if (start >= end) {
00712         // "0x" with no digits after is invalid.
00713         return false;
00714       }
00715     }
00716   } else if (base >= 2 && base <= 36) {
00717     // okay
00718   } else {
00719     return false;
00720   }
00721   *text = absl::string_view(start, end - start);
00722   *base_ptr = base;
00723   return true;
00724 }
00725 
00726 // Consume digits.
00727 //
00728 // The classic loop:
00729 //
00730 //   for each digit
00731 //     value = value * base + digit
00732 //   value *= sign
00733 //
00734 // The classic loop needs overflow checking.  It also fails on the most
00735 // negative integer, -2147483648 in 32-bit two's complement representation.
00736 //
00737 // My improved loop:
00738 //
00739 //  if (!negative)
00740 //    for each digit
00741 //      value = value * base
00742 //      value = value + digit
00743 //  else
00744 //    for each digit
00745 //      value = value * base
00746 //      value = value - digit
00747 //
00748 // Overflow checking becomes simple.
00749 
00750 // Lookup tables per IntType:
00751 // vmax/base and vmin/base are precomputed because division costs at least 8ns.
00752 // TODO(junyer): Doing this per base instead (i.e. an array of structs, not a
00753 // struct of arrays) would probably be better in terms of d-cache for the most
00754 // commonly used bases.
00755 template <typename IntType>
00756 struct LookupTables {
00757   static const IntType kVmaxOverBase[];
00758   static const IntType kVminOverBase[];
00759 };
00760 
00761 // An array initializer macro for X/base where base in [0, 36].
00762 // However, note that lookups for base in [0, 1] should never happen because
00763 // base has been validated to be in [2, 36] by safe_parse_sign_and_base().
00764 #define X_OVER_BASE_INITIALIZER(X)                                        \
00765   {                                                                       \
00766     0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \
00767         X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18,   \
00768         X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26,   \
00769         X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34,   \
00770         X / 35, X / 36,                                                   \
00771   }
00772 
00773 template <typename IntType>
00774 const IntType LookupTables<IntType>::kVmaxOverBase[] =
00775     X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max());
00776 
00777 template <typename IntType>
00778 const IntType LookupTables<IntType>::kVminOverBase[] =
00779     X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min());
00780 
00781 #undef X_OVER_BASE_INITIALIZER
00782 
00783 template <typename IntType>
00784 inline bool safe_parse_positive_int(absl::string_view text, int base,
00785                                     IntType* value_p) {
00786   IntType value = 0;
00787   const IntType vmax = std::numeric_limits<IntType>::max();
00788   assert(vmax > 0);
00789   assert(base >= 0);
00790   assert(vmax >= static_cast<IntType>(base));
00791   const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base];
00792   const char* start = text.data();
00793   const char* end = start + text.size();
00794   // loop over digits
00795   for (; start < end; ++start) {
00796     unsigned char c = static_cast<unsigned char>(start[0]);
00797     int digit = kAsciiToInt[c];
00798     if (digit >= base) {
00799       *value_p = value;
00800       return false;
00801     }
00802     if (value > vmax_over_base) {
00803       *value_p = vmax;
00804       return false;
00805     }
00806     value *= base;
00807     if (value > vmax - digit) {
00808       *value_p = vmax;
00809       return false;
00810     }
00811     value += digit;
00812   }
00813   *value_p = value;
00814   return true;
00815 }
00816 
00817 template <typename IntType>
00818 inline bool safe_parse_negative_int(absl::string_view text, int base,
00819                                     IntType* value_p) {
00820   IntType value = 0;
00821   const IntType vmin = std::numeric_limits<IntType>::min();
00822   assert(vmin < 0);
00823   assert(vmin <= 0 - base);
00824   IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base];
00825   // 2003 c++ standard [expr.mul]
00826   // "... the sign of the remainder is implementation-defined."
00827   // Although (vmin/base)*base + vmin%base is always vmin.
00828   // 2011 c++ standard tightens the spec but we cannot rely on it.
00829   // TODO(junyer): Handle this in the lookup table generation.
00830   if (vmin % base > 0) {
00831     vmin_over_base += 1;
00832   }
00833   const char* start = text.data();
00834   const char* end = start + text.size();
00835   // loop over digits
00836   for (; start < end; ++start) {
00837     unsigned char c = static_cast<unsigned char>(start[0]);
00838     int digit = kAsciiToInt[c];
00839     if (digit >= base) {
00840       *value_p = value;
00841       return false;
00842     }
00843     if (value < vmin_over_base) {
00844       *value_p = vmin;
00845       return false;
00846     }
00847     value *= base;
00848     if (value < vmin + digit) {
00849       *value_p = vmin;
00850       return false;
00851     }
00852     value -= digit;
00853   }
00854   *value_p = value;
00855   return true;
00856 }
00857 
00858 // Input format based on POSIX.1-2008 strtol
00859 // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
00860 template <typename IntType>
00861 inline bool safe_int_internal(absl::string_view text, IntType* value_p,
00862                               int base) {
00863   *value_p = 0;
00864   bool negative;
00865   if (!safe_parse_sign_and_base(&text, &base, &negative)) {
00866     return false;
00867   }
00868   if (!negative) {
00869     return safe_parse_positive_int(text, base, value_p);
00870   } else {
00871     return safe_parse_negative_int(text, base, value_p);
00872   }
00873 }
00874 
00875 template <typename IntType>
00876 inline bool safe_uint_internal(absl::string_view text, IntType* value_p,
00877                                int base) {
00878   *value_p = 0;
00879   bool negative;
00880   if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) {
00881     return false;
00882   }
00883   return safe_parse_positive_int(text, base, value_p);
00884 }
00885 }  // anonymous namespace
00886 
00887 namespace numbers_internal {
00888 bool safe_strto32_base(absl::string_view text, int32_t* value, int base) {
00889   return safe_int_internal<int32_t>(text, value, base);
00890 }
00891 
00892 bool safe_strto64_base(absl::string_view text, int64_t* value, int base) {
00893   return safe_int_internal<int64_t>(text, value, base);
00894 }
00895 
00896 bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) {
00897   return safe_uint_internal<uint32_t>(text, value, base);
00898 }
00899 
00900 bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) {
00901   return safe_uint_internal<uint64_t>(text, value, base);
00902 }
00903 }  // namespace numbers_internal
00904 
00905 }  // namespace absl


abseil_cpp
Author(s):
autogenerated on Wed Jun 19 2019 19:42:15