numbers.cc
Go to the documentation of this file.
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // This file contains string processing functions related to
16 // numeric values.
17 
18 #include "absl/strings/numbers.h"
19 
20 #include <algorithm>
21 #include <cassert>
22 #include <cfloat> // for DBL_DIG and FLT_DIG
23 #include <cmath> // for HUGE_VAL
24 #include <cstdint>
25 #include <cstdio>
26 #include <cstdlib>
27 #include <cstring>
28 #include <iterator>
29 #include <limits>
30 #include <memory>
31 #include <utility>
32 
35 #include "absl/strings/ascii.h"
36 #include "absl/strings/charconv.h"
38 #include "absl/strings/match.h"
39 #include "absl/strings/str_cat.h"
40 
41 namespace absl {
42 
43 bool SimpleAtof(absl::string_view str, float* out) {
44  *out = 0.0;
45  str = StripAsciiWhitespace(str);
46  if (!str.empty() && str[0] == '+') {
47  str.remove_prefix(1);
48  }
49  auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
50  if (result.ec == std::errc::invalid_argument) {
51  return false;
52  }
53  if (result.ptr != str.data() + str.size()) {
54  // not all non-whitespace characters consumed
55  return false;
56  }
57  // from_chars() with DR 3081's current wording will return max() on
58  // overflow. SimpleAtof returns infinity instead.
59  if (result.ec == std::errc::result_out_of_range) {
60  if (*out > 1.0) {
61  *out = std::numeric_limits<float>::infinity();
62  } else if (*out < -1.0) {
63  *out = -std::numeric_limits<float>::infinity();
64  }
65  }
66  return true;
67 }
68 
69 bool SimpleAtod(absl::string_view str, double* out) {
70  *out = 0.0;
71  str = StripAsciiWhitespace(str);
72  if (!str.empty() && str[0] == '+') {
73  str.remove_prefix(1);
74  }
75  auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
76  if (result.ec == std::errc::invalid_argument) {
77  return false;
78  }
79  if (result.ptr != str.data() + str.size()) {
80  // not all non-whitespace characters consumed
81  return false;
82  }
83  // from_chars() with DR 3081's current wording will return max() on
84  // overflow. SimpleAtod returns infinity instead.
85  if (result.ec == std::errc::result_out_of_range) {
86  if (*out > 1.0) {
87  *out = std::numeric_limits<double>::infinity();
88  } else if (*out < -1.0) {
89  *out = -std::numeric_limits<double>::infinity();
90  }
91  }
92  return true;
93 }
94 
95 namespace {
96 
97 // Writes a two-character representation of 'i' to 'buf'. 'i' must be in the
98 // range 0 <= i < 100, and buf must have space for two characters. Example:
99 // char buf[2];
100 // PutTwoDigits(42, buf);
101 // // buf[0] == '4'
102 // // buf[1] == '2'
103 inline void PutTwoDigits(size_t i, char* buf) {
104  static const char two_ASCII_digits[100][2] = {
105  {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'},
106  {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'},
107  {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'},
108  {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'},
109  {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'},
110  {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
111  {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'},
112  {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'},
113  {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'},
114  {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'},
115  {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'},
116  {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
117  {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'},
118  {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'},
119  {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'},
120  {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'},
121  {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'},
122  {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
123  {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'},
124  {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}
125  };
126  assert(i < 100);
127  memcpy(buf, two_ASCII_digits[i], 2);
128 }
129 
130 } // namespace
131 
132 bool SimpleAtob(absl::string_view str, bool* out) {
133  ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr.");
134  if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") ||
135  EqualsIgnoreCase(str, "yes") || EqualsIgnoreCase(str, "y") ||
136  EqualsIgnoreCase(str, "1")) {
137  *out = true;
138  return true;
139  }
140  if (EqualsIgnoreCase(str, "false") || EqualsIgnoreCase(str, "f") ||
141  EqualsIgnoreCase(str, "no") || EqualsIgnoreCase(str, "n") ||
142  EqualsIgnoreCase(str, "0")) {
143  *out = false;
144  return true;
145  }
146  return false;
147 }
148 
149 // ----------------------------------------------------------------------
150 // FastIntToBuffer() overloads
151 //
152 // Like the Fast*ToBuffer() functions above, these are intended for speed.
153 // Unlike the Fast*ToBuffer() functions, however, these functions write
154 // their output to the beginning of the buffer. The caller is responsible
155 // for ensuring that the buffer has enough space to hold the output.
156 //
157 // Returns a pointer to the end of the string (i.e. the null character
158 // terminating the string).
159 // ----------------------------------------------------------------------
160 
161 namespace {
162 
163 // Used to optimize printing a decimal number's final digit.
164 const char one_ASCII_final_digits[10][2] {
165  {'0', 0}, {'1', 0}, {'2', 0}, {'3', 0}, {'4', 0},
166  {'5', 0}, {'6', 0}, {'7', 0}, {'8', 0}, {'9', 0},
167 };
168 
169 } // namespace
170 
171 char* numbers_internal::FastIntToBuffer(uint32_t i, char* buffer) {
172  uint32_t digits;
173  // The idea of this implementation is to trim the number of divides to as few
174  // as possible, and also reducing memory stores and branches, by going in
175  // steps of two digits at a time rather than one whenever possible.
176  // The huge-number case is first, in the hopes that the compiler will output
177  // that case in one branch-free block of code, and only output conditional
178  // branches into it from below.
179  if (i >= 1000000000) { // >= 1,000,000,000
180  digits = i / 100000000; // 100,000,000
181  i -= digits * 100000000;
182  PutTwoDigits(digits, buffer);
183  buffer += 2;
184  lt100_000_000:
185  digits = i / 1000000; // 1,000,000
186  i -= digits * 1000000;
187  PutTwoDigits(digits, buffer);
188  buffer += 2;
189  lt1_000_000:
190  digits = i / 10000; // 10,000
191  i -= digits * 10000;
192  PutTwoDigits(digits, buffer);
193  buffer += 2;
194  lt10_000:
195  digits = i / 100;
196  i -= digits * 100;
197  PutTwoDigits(digits, buffer);
198  buffer += 2;
199  lt100:
200  digits = i;
201  PutTwoDigits(digits, buffer);
202  buffer += 2;
203  *buffer = 0;
204  return buffer;
205  }
206 
207  if (i < 100) {
208  digits = i;
209  if (i >= 10) goto lt100;
210  memcpy(buffer, one_ASCII_final_digits[i], 2);
211  return buffer + 1;
212  }
213  if (i < 10000) { // 10,000
214  if (i >= 1000) goto lt10_000;
215  digits = i / 100;
216  i -= digits * 100;
217  *buffer++ = '0' + digits;
218  goto lt100;
219  }
220  if (i < 1000000) { // 1,000,000
221  if (i >= 100000) goto lt1_000_000;
222  digits = i / 10000; // 10,000
223  i -= digits * 10000;
224  *buffer++ = '0' + digits;
225  goto lt10_000;
226  }
227  if (i < 100000000) { // 100,000,000
228  if (i >= 10000000) goto lt100_000_000;
229  digits = i / 1000000; // 1,000,000
230  i -= digits * 1000000;
231  *buffer++ = '0' + digits;
232  goto lt1_000_000;
233  }
234  // we already know that i < 1,000,000,000
235  digits = i / 100000000; // 100,000,000
236  i -= digits * 100000000;
237  *buffer++ = '0' + digits;
238  goto lt100_000_000;
239 }
240 
241 char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) {
242  uint32_t u = i;
243  if (i < 0) {
244  *buffer++ = '-';
245  // We need to do the negation in modular (i.e., "unsigned")
246  // arithmetic; MSVC++ apprently warns for plain "-u", so
247  // we write the equivalent expression "0 - u" instead.
248  u = 0 - u;
249  }
250  return numbers_internal::FastIntToBuffer(u, buffer);
251 }
252 
253 char* numbers_internal::FastIntToBuffer(uint64_t i, char* buffer) {
254  uint32_t u32 = static_cast<uint32_t>(i);
255  if (u32 == i) return numbers_internal::FastIntToBuffer(u32, buffer);
256 
257  // Here we know i has at least 10 decimal digits.
258  uint64_t top_1to11 = i / 1000000000;
259  u32 = static_cast<uint32_t>(i - top_1to11 * 1000000000);
260  uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11);
261 
262  if (top_1to11_32 == top_1to11) {
263  buffer = numbers_internal::FastIntToBuffer(top_1to11_32, buffer);
264  } else {
265  // top_1to11 has more than 32 bits too; print it in two steps.
266  uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / 100);
267  uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * 100);
268  buffer = numbers_internal::FastIntToBuffer(top_8to9, buffer);
269  PutTwoDigits(mid_2, buffer);
270  buffer += 2;
271  }
272 
273  // We have only 9 digits now, again the maximum uint32_t can handle fully.
274  uint32_t digits = u32 / 10000000; // 10,000,000
275  u32 -= digits * 10000000;
276  PutTwoDigits(digits, buffer);
277  buffer += 2;
278  digits = u32 / 100000; // 100,000
279  u32 -= digits * 100000;
280  PutTwoDigits(digits, buffer);
281  buffer += 2;
282  digits = u32 / 1000; // 1,000
283  u32 -= digits * 1000;
284  PutTwoDigits(digits, buffer);
285  buffer += 2;
286  digits = u32 / 10;
287  u32 -= digits * 10;
288  PutTwoDigits(digits, buffer);
289  buffer += 2;
290  memcpy(buffer, one_ASCII_final_digits[u32], 2);
291  return buffer + 1;
292 }
293 
294 char* numbers_internal::FastIntToBuffer(int64_t i, char* buffer) {
295  uint64_t u = i;
296  if (i < 0) {
297  *buffer++ = '-';
298  u = 0 - u;
299  }
300  return numbers_internal::FastIntToBuffer(u, buffer);
301 }
302 
303 // Given a 128-bit number expressed as a pair of uint64_t, high half first,
304 // return that number multiplied by the given 32-bit value. If the result is
305 // too large to fit in a 128-bit number, divide it by 2 until it fits.
306 static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num,
307  uint32_t mul) {
308  uint64_t bits0_31 = num.second & 0xFFFFFFFF;
309  uint64_t bits32_63 = num.second >> 32;
310  uint64_t bits64_95 = num.first & 0xFFFFFFFF;
311  uint64_t bits96_127 = num.first >> 32;
312 
313  // The picture so far: each of these 64-bit values has only the lower 32 bits
314  // filled in.
315  // bits96_127: [ 00000000 xxxxxxxx ]
316  // bits64_95: [ 00000000 xxxxxxxx ]
317  // bits32_63: [ 00000000 xxxxxxxx ]
318  // bits0_31: [ 00000000 xxxxxxxx ]
319 
320  bits0_31 *= mul;
321  bits32_63 *= mul;
322  bits64_95 *= mul;
323  bits96_127 *= mul;
324 
325  // Now the top halves may also have value, though all 64 of their bits will
326  // never be set at the same time, since they are a result of a 32x32 bit
327  // multiply. This makes the carry calculation slightly easier.
328  // bits96_127: [ mmmmmmmm | mmmmmmmm ]
329  // bits64_95: [ | mmmmmmmm mmmmmmmm | ]
330  // bits32_63: | [ mmmmmmmm | mmmmmmmm ]
331  // bits0_31: | [ | mmmmmmmm mmmmmmmm ]
332  // eventually: [ bits128_up | ...bits64_127.... | ..bits0_63... ]
333 
334  uint64_t bits0_63 = bits0_31 + (bits32_63 << 32);
335  uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) +
336  (bits0_63 < bits0_31);
337  uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95);
338  if (bits128_up == 0) return {bits64_127, bits0_63};
339 
340  int shift = 64 - base_internal::CountLeadingZeros64(bits128_up);
341  uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift));
342  uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift));
343  return {hi, lo};
344 }
345 
346 // Compute num * 5 ^ expfive, and return the first 128 bits of the result,
347 // where the first bit is always a one. So PowFive(1, 0) starts 0b100000,
348 // PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc.
349 static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) {
350  std::pair<uint64_t, uint64_t> result = {num, 0};
351  while (expfive >= 13) {
352  // 5^13 is the highest power of five that will fit in a 32-bit integer.
353  result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5);
354  expfive -= 13;
355  }
356  constexpr int powers_of_five[13] = {
357  1,
358  5,
359  5 * 5,
360  5 * 5 * 5,
361  5 * 5 * 5 * 5,
362  5 * 5 * 5 * 5 * 5,
363  5 * 5 * 5 * 5 * 5 * 5,
364  5 * 5 * 5 * 5 * 5 * 5 * 5,
365  5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
366  5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
367  5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
368  5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
369  5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5};
370  result = Mul32(result, powers_of_five[expfive & 15]);
371  int shift = base_internal::CountLeadingZeros64(result.first);
372  if (shift != 0) {
373  result.first = (result.first << shift) + (result.second >> (64 - shift));
374  result.second = (result.second << shift);
375  }
376  return result;
377 }
378 
379 struct ExpDigits {
380  int32_t exponent;
381  char digits[6];
382 };
383 
384 // SplitToSix converts value, a positive double-precision floating-point number,
385 // into a base-10 exponent and 6 ASCII digits, where the first digit is never
386 // zero. For example, SplitToSix(1) returns an exponent of zero and a digits
387 // array of {'1', '0', '0', '0', '0', '0'}. If value is exactly halfway between
388 // two possible representations, e.g. value = 100000.5, then "round to even" is
389 // performed.
390 static ExpDigits SplitToSix(const double value) {
391  ExpDigits exp_dig;
392  int exp = 5;
393  double d = value;
394  // First step: calculate a close approximation of the output, where the
395  // value d will be between 100,000 and 999,999, representing the digits
396  // in the output ASCII array, and exp is the base-10 exponent. It would be
397  // faster to use a table here, and to look up the base-2 exponent of value,
398  // however value is an IEEE-754 64-bit number, so the table would have 2,000
399  // entries, which is not cache-friendly.
400  if (d >= 999999.5) {
401  if (d >= 1e+261) exp += 256, d *= 1e-256;
402  if (d >= 1e+133) exp += 128, d *= 1e-128;
403  if (d >= 1e+69) exp += 64, d *= 1e-64;
404  if (d >= 1e+37) exp += 32, d *= 1e-32;
405  if (d >= 1e+21) exp += 16, d *= 1e-16;
406  if (d >= 1e+13) exp += 8, d *= 1e-8;
407  if (d >= 1e+9) exp += 4, d *= 1e-4;
408  if (d >= 1e+7) exp += 2, d *= 1e-2;
409  if (d >= 1e+6) exp += 1, d *= 1e-1;
410  } else {
411  if (d < 1e-250) exp -= 256, d *= 1e256;
412  if (d < 1e-122) exp -= 128, d *= 1e128;
413  if (d < 1e-58) exp -= 64, d *= 1e64;
414  if (d < 1e-26) exp -= 32, d *= 1e32;
415  if (d < 1e-10) exp -= 16, d *= 1e16;
416  if (d < 1e-2) exp -= 8, d *= 1e8;
417  if (d < 1e+2) exp -= 4, d *= 1e4;
418  if (d < 1e+4) exp -= 2, d *= 1e2;
419  if (d < 1e+5) exp -= 1, d *= 1e1;
420  }
421  // At this point, d is in the range [99999.5..999999.5) and exp is in the
422  // range [-324..308]. Since we need to round d up, we want to add a half
423  // and truncate.
424  // However, the technique above may have lost some precision, due to its
425  // repeated multiplication by constants that each may be off by half a bit
426  // of precision. This only matters if we're close to the edge though.
427  // Since we'd like to know if the fractional part of d is close to a half,
428  // we multiply it by 65536 and see if the fractional part is close to 32768.
429  // (The number doesn't have to be a power of two,but powers of two are faster)
430  uint64_t d64k = d * 65536;
431  int dddddd; // A 6-digit decimal integer.
432  if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) {
433  // OK, it's fairly likely that precision was lost above, which is
434  // not a surprise given only 52 mantissa bits are available. Therefore
435  // redo the calculation using 128-bit numbers. (64 bits are not enough).
436 
437  // Start out with digits rounded down; maybe add one below.
438  dddddd = static_cast<int>(d64k / 65536);
439 
440  // mantissa is a 64-bit integer representing M.mmm... * 2^63. The actual
441  // value we're representing, of course, is M.mmm... * 2^exp2.
442  int exp2;
443  double m = std::frexp(value, &exp2);
444  uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0);
445  // std::frexp returns an m value in the range [0.5, 1.0), however we
446  // can't multiply it by 2^64 and convert to an integer because some FPUs
447  // throw an exception when converting an number higher than 2^63 into an
448  // integer - even an unsigned 64-bit integer! Fortunately it doesn't matter
449  // since m only has 52 significant bits anyway.
450  mantissa <<= 1;
451  exp2 -= 64; // not needed, but nice for debugging
452 
453  // OK, we are here to compare:
454  // (dddddd + 0.5) * 10^(exp-5) vs. mantissa * 2^exp2
455  // so we can round up dddddd if appropriate. Those values span the full
456  // range of 600 orders of magnitude of IEE 64-bit floating-point.
457  // Fortunately, we already know they are very close, so we don't need to
458  // track the base-2 exponent of both sides. This greatly simplifies the
459  // the math since the 2^exp2 calculation is unnecessary and the power-of-10
460  // calculation can become a power-of-5 instead.
461 
462  std::pair<uint64_t, uint64_t> edge, val;
463  if (exp >= 6) {
464  // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa
465  // Since we're tossing powers of two, 2 * dddddd + 1 is the
466  // same as dddddd + 0.5
467  edge = PowFive(2 * dddddd + 1, exp - 5);
468 
469  val.first = mantissa;
470  val.second = 0;
471  } else {
472  // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did
473  // above because (exp - 5) is negative. So we compare (dddddd + 0.5) to
474  // mantissa * 5 ^ (5 - exp)
475  edge = PowFive(2 * dddddd + 1, 0);
476 
477  val = PowFive(mantissa, 5 - exp);
478  }
479  // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first,
480  // val.second, edge.first, edge.second);
481  if (val > edge) {
482  dddddd++;
483  } else if (val == edge) {
484  dddddd += (dddddd & 1);
485  }
486  } else {
487  // Here, we are not close to the edge.
488  dddddd = static_cast<int>((d64k + 32768) / 65536);
489  }
490  if (dddddd == 1000000) {
491  dddddd = 100000;
492  exp += 1;
493  }
494  exp_dig.exponent = exp;
495 
496  int two_digits = dddddd / 10000;
497  dddddd -= two_digits * 10000;
498  PutTwoDigits(two_digits, &exp_dig.digits[0]);
499 
500  two_digits = dddddd / 100;
501  dddddd -= two_digits * 100;
502  PutTwoDigits(two_digits, &exp_dig.digits[2]);
503 
504  PutTwoDigits(dddddd, &exp_dig.digits[4]);
505  return exp_dig;
506 }
507 
508 // Helper function for fast formatting of floating-point.
509 // The result is the same as "%g", a.k.a. "%.6g".
510 size_t numbers_internal::SixDigitsToBuffer(double d, char* const buffer) {
511  static_assert(std::numeric_limits<float>::is_iec559,
512  "IEEE-754/IEC-559 support only");
513 
514  char* out = buffer; // we write data to out, incrementing as we go, but
515  // FloatToBuffer always returns the address of the buffer
516  // passed in.
517 
518  if (std::isnan(d)) {
519  strcpy(out, "nan"); // NOLINT(runtime/printf)
520  return 3;
521  }
522  if (d == 0) { // +0 and -0 are handled here
523  if (std::signbit(d)) *out++ = '-';
524  *out++ = '0';
525  *out = 0;
526  return out - buffer;
527  }
528  if (d < 0) {
529  *out++ = '-';
530  d = -d;
531  }
532  if (std::isinf(d)) {
533  strcpy(out, "inf"); // NOLINT(runtime/printf)
534  return out + 3 - buffer;
535  }
536 
537  auto exp_dig = SplitToSix(d);
538  int exp = exp_dig.exponent;
539  const char* digits = exp_dig.digits;
540  out[0] = '0';
541  out[1] = '.';
542  switch (exp) {
543  case 5:
544  memcpy(out, &digits[0], 6), out += 6;
545  *out = 0;
546  return out - buffer;
547  case 4:
548  memcpy(out, &digits[0], 5), out += 5;
549  if (digits[5] != '0') {
550  *out++ = '.';
551  *out++ = digits[5];
552  }
553  *out = 0;
554  return out - buffer;
555  case 3:
556  memcpy(out, &digits[0], 4), out += 4;
557  if ((digits[5] | digits[4]) != '0') {
558  *out++ = '.';
559  *out++ = digits[4];
560  if (digits[5] != '0') *out++ = digits[5];
561  }
562  *out = 0;
563  return out - buffer;
564  case 2:
565  memcpy(out, &digits[0], 3), out += 3;
566  *out++ = '.';
567  memcpy(out, &digits[3], 3);
568  out += 3;
569  while (out[-1] == '0') --out;
570  if (out[-1] == '.') --out;
571  *out = 0;
572  return out - buffer;
573  case 1:
574  memcpy(out, &digits[0], 2), out += 2;
575  *out++ = '.';
576  memcpy(out, &digits[2], 4);
577  out += 4;
578  while (out[-1] == '0') --out;
579  if (out[-1] == '.') --out;
580  *out = 0;
581  return out - buffer;
582  case 0:
583  memcpy(out, &digits[0], 1), out += 1;
584  *out++ = '.';
585  memcpy(out, &digits[1], 5);
586  out += 5;
587  while (out[-1] == '0') --out;
588  if (out[-1] == '.') --out;
589  *out = 0;
590  return out - buffer;
591  case -4:
592  out[2] = '0';
593  ++out;
595  case -3:
596  out[2] = '0';
597  ++out;
599  case -2:
600  out[2] = '0';
601  ++out;
603  case -1:
604  out += 2;
605  memcpy(out, &digits[0], 6);
606  out += 6;
607  while (out[-1] == '0') --out;
608  *out = 0;
609  return out - buffer;
610  }
611  assert(exp < -4 || exp >= 6);
612  out[0] = digits[0];
613  assert(out[1] == '.');
614  out += 2;
615  memcpy(out, &digits[1], 5), out += 5;
616  while (out[-1] == '0') --out;
617  if (out[-1] == '.') --out;
618  *out++ = 'e';
619  if (exp > 0) {
620  *out++ = '+';
621  } else {
622  *out++ = '-';
623  exp = -exp;
624  }
625  if (exp > 99) {
626  int dig1 = exp / 100;
627  exp -= dig1 * 100;
628  *out++ = '0' + dig1;
629  }
630  PutTwoDigits(exp, out);
631  out += 2;
632  *out = 0;
633  return out - buffer;
634 }
635 
636 namespace {
637 // Represents integer values of digits.
638 // Uses 36 to indicate an invalid character since we support
639 // bases up to 36.
640 static const int8_t kAsciiToInt[256] = {
641  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
642  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
643  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5,
644  6, 7, 8, 9, 36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17,
645  18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
646  36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
647  24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36,
648  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
649  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
650  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
651  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
652  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
653  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
654  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
655 
656 // Parse the sign and optional hex or oct prefix in text.
657 inline bool safe_parse_sign_and_base(absl::string_view* text /*inout*/,
658  int* base_ptr /*inout*/,
659  bool* negative_ptr /*output*/) {
660  if (text->data() == nullptr) {
661  return false;
662  }
663 
664  const char* start = text->data();
665  const char* end = start + text->size();
666  int base = *base_ptr;
667 
668  // Consume whitespace.
669  while (start < end && absl::ascii_isspace(start[0])) {
670  ++start;
671  }
672  while (start < end && absl::ascii_isspace(end[-1])) {
673  --end;
674  }
675  if (start >= end) {
676  return false;
677  }
678 
679  // Consume sign.
680  *negative_ptr = (start[0] == '-');
681  if (*negative_ptr || start[0] == '+') {
682  ++start;
683  if (start >= end) {
684  return false;
685  }
686  }
687 
688  // Consume base-dependent prefix.
689  // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
690  // base 16: "0x" -> base 16
691  // Also validate the base.
692  if (base == 0) {
693  if (end - start >= 2 && start[0] == '0' &&
694  (start[1] == 'x' || start[1] == 'X')) {
695  base = 16;
696  start += 2;
697  if (start >= end) {
698  // "0x" with no digits after is invalid.
699  return false;
700  }
701  } else if (end - start >= 1 && start[0] == '0') {
702  base = 8;
703  start += 1;
704  } else {
705  base = 10;
706  }
707  } else if (base == 16) {
708  if (end - start >= 2 && start[0] == '0' &&
709  (start[1] == 'x' || start[1] == 'X')) {
710  start += 2;
711  if (start >= end) {
712  // "0x" with no digits after is invalid.
713  return false;
714  }
715  }
716  } else if (base >= 2 && base <= 36) {
717  // okay
718  } else {
719  return false;
720  }
721  *text = absl::string_view(start, end - start);
722  *base_ptr = base;
723  return true;
724 }
725 
726 // Consume digits.
727 //
728 // The classic loop:
729 //
730 // for each digit
731 // value = value * base + digit
732 // value *= sign
733 //
734 // The classic loop needs overflow checking. It also fails on the most
735 // negative integer, -2147483648 in 32-bit two's complement representation.
736 //
737 // My improved loop:
738 //
739 // if (!negative)
740 // for each digit
741 // value = value * base
742 // value = value + digit
743 // else
744 // for each digit
745 // value = value * base
746 // value = value - digit
747 //
748 // Overflow checking becomes simple.
749 
750 // Lookup tables per IntType:
751 // vmax/base and vmin/base are precomputed because division costs at least 8ns.
752 // TODO(junyer): Doing this per base instead (i.e. an array of structs, not a
753 // struct of arrays) would probably be better in terms of d-cache for the most
754 // commonly used bases.
755 template <typename IntType>
756 struct LookupTables {
757  static const IntType kVmaxOverBase[];
758  static const IntType kVminOverBase[];
759 };
760 
761 // An array initializer macro for X/base where base in [0, 36].
762 // However, note that lookups for base in [0, 1] should never happen because
763 // base has been validated to be in [2, 36] by safe_parse_sign_and_base().
764 #define X_OVER_BASE_INITIALIZER(X) \
765  { \
766  0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \
767  X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18, \
768  X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26, \
769  X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34, \
770  X / 35, X / 36, \
771  }
772 
773 template <typename IntType>
774 const IntType LookupTables<IntType>::kVmaxOverBase[] =
775  X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max());
776 
777 template <typename IntType>
778 const IntType LookupTables<IntType>::kVminOverBase[] =
779  X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min());
780 
781 #undef X_OVER_BASE_INITIALIZER
782 
783 template <typename IntType>
784 inline bool safe_parse_positive_int(absl::string_view text, int base,
785  IntType* value_p) {
786  IntType value = 0;
787  const IntType vmax = std::numeric_limits<IntType>::max();
788  assert(vmax > 0);
789  assert(base >= 0);
790  assert(vmax >= static_cast<IntType>(base));
791  const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base];
792  const char* start = text.data();
793  const char* end = start + text.size();
794  // loop over digits
795  for (; start < end; ++start) {
796  unsigned char c = static_cast<unsigned char>(start[0]);
797  int digit = kAsciiToInt[c];
798  if (digit >= base) {
799  *value_p = value;
800  return false;
801  }
802  if (value > vmax_over_base) {
803  *value_p = vmax;
804  return false;
805  }
806  value *= base;
807  if (value > vmax - digit) {
808  *value_p = vmax;
809  return false;
810  }
811  value += digit;
812  }
813  *value_p = value;
814  return true;
815 }
816 
817 template <typename IntType>
818 inline bool safe_parse_negative_int(absl::string_view text, int base,
819  IntType* value_p) {
820  IntType value = 0;
821  const IntType vmin = std::numeric_limits<IntType>::min();
822  assert(vmin < 0);
823  assert(vmin <= 0 - base);
824  IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base];
825  // 2003 c++ standard [expr.mul]
826  // "... the sign of the remainder is implementation-defined."
827  // Although (vmin/base)*base + vmin%base is always vmin.
828  // 2011 c++ standard tightens the spec but we cannot rely on it.
829  // TODO(junyer): Handle this in the lookup table generation.
830  if (vmin % base > 0) {
831  vmin_over_base += 1;
832  }
833  const char* start = text.data();
834  const char* end = start + text.size();
835  // loop over digits
836  for (; start < end; ++start) {
837  unsigned char c = static_cast<unsigned char>(start[0]);
838  int digit = kAsciiToInt[c];
839  if (digit >= base) {
840  *value_p = value;
841  return false;
842  }
843  if (value < vmin_over_base) {
844  *value_p = vmin;
845  return false;
846  }
847  value *= base;
848  if (value < vmin + digit) {
849  *value_p = vmin;
850  return false;
851  }
852  value -= digit;
853  }
854  *value_p = value;
855  return true;
856 }
857 
858 // Input format based on POSIX.1-2008 strtol
859 // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
860 template <typename IntType>
861 inline bool safe_int_internal(absl::string_view text, IntType* value_p,
862  int base) {
863  *value_p = 0;
864  bool negative;
865  if (!safe_parse_sign_and_base(&text, &base, &negative)) {
866  return false;
867  }
868  if (!negative) {
869  return safe_parse_positive_int(text, base, value_p);
870  } else {
871  return safe_parse_negative_int(text, base, value_p);
872  }
873 }
874 
875 template <typename IntType>
876 inline bool safe_uint_internal(absl::string_view text, IntType* value_p,
877  int base) {
878  *value_p = 0;
879  bool negative;
880  if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) {
881  return false;
882  }
883  return safe_parse_positive_int(text, base, value_p);
884 }
885 } // anonymous namespace
886 
887 namespace numbers_internal {
888 bool safe_strto32_base(absl::string_view text, int32_t* value, int base) {
889  return safe_int_internal<int32_t>(text, value, base);
890 }
891 
892 bool safe_strto64_base(absl::string_view text, int64_t* value, int base) {
893  return safe_int_internal<int64_t>(text, value, base);
894 }
895 
896 bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) {
897  return safe_uint_internal<uint32_t>(text, value, base);
898 }
899 
900 bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) {
901  return safe_uint_internal<uint64_t>(text, value, base);
902 }
903 } // namespace numbers_internal
904 
905 } // namespace absl
bool safe_strtou64_base(absl::string_view text, uint64_t *value, int base)
Definition: numbers.cc:900
char * FastIntToBuffer(int32_t, char *)
Definition: numbers.cc:241
void remove_prefix(size_type n)
Definition: string_view.h:310
static const IntType kVminOverBase[]
Definition: numbers.cc:758
static const IntType kVmaxOverBase[]
Definition: numbers.cc:757
size_t SixDigitsToBuffer(double d, char *buffer)
Definition: numbers.cc:510
bool ascii_isspace(unsigned char c)
Definition: ascii.h:93
uint64_t mantissa
Definition: charconv.cc:238
ABSL_BASE_INTERNAL_FORCEINLINE int CountLeadingZeros64(uint64_t n)
Definition: bits.h:60
char * end
char buf[N]
static std::pair< uint64_t, uint64_t > PowFive(uint64_t num, int expfive)
Definition: numbers.cc:349
Definition: algorithm.h:29
bool safe_strtou32_base(absl::string_view text, uint32_t *value, int base)
Definition: numbers.cc:896
constexpr size_type size() const noexcept
Definition: string_view.h:260
bool SimpleAtod(absl::string_view str, double *out)
Definition: numbers.cc:69
bool SimpleAtof(absl::string_view str, float *out)
Definition: numbers.cc:43
bool safe_strto32_base(absl::string_view text, int32_t *value, int base)
Definition: numbers.cc:888
#define ABSL_RAW_CHECK(condition, message)
Definition: raw_logging.h:57
#define ABSL_FALLTHROUGH_INTENDED
Definition: macros.h:129
size_t value
static ExpDigits SplitToSix(const double value)
Definition: numbers.cc:390
ABSL_MUST_USE_RESULT absl::string_view StripAsciiWhitespace(absl::string_view str)
Definition: ascii.h:223
constexpr bool empty() const noexcept
Definition: string_view.h:277
bool safe_strto64_base(absl::string_view text, int64_t *value, int base)
Definition: numbers.cc:892
static std::pair< uint64_t, uint64_t > Mul32(std::pair< uint64_t, uint64_t > num, uint32_t mul)
Definition: numbers.cc:306
from_chars_result from_chars(const char *first, const char *last, double &value, chars_format fmt)
Definition: charconv.cc:679
constexpr const_pointer data() const noexcept
Definition: string_view.h:302
int32_t exponent
Definition: numbers.cc:380
#define X_OVER_BASE_INITIALIZER(X)
Definition: numbers.cc:764
char * out
Definition: mutex.h:1013
bool EqualsIgnoreCase(absl::string_view piece1, absl::string_view piece2)
Definition: match.cc:21
bool SimpleAtob(absl::string_view str, bool *out)
Definition: numbers.cc:132
char digits[6]
Definition: numbers.cc:381


abseil_cpp
Author(s):
autogenerated on Mon Feb 28 2022 21:31:19