dtoa_milo.h
Go to the documentation of this file.
1 #pragma once
2 #include <assert.h>
3 #include <math.h>
4 
5 #if defined(_MSC_VER)
6 #include "msinttypes/stdint.h"
7 #include <intrin.h>
8 #else
9 #include <stdint.h>
10 #endif
11 
12 #define UINT64_C2(h, l) ((static_cast<uint64_t>(h) << 32) | static_cast<uint64_t>(l))
13 
14 struct DiyFp {
15  DiyFp() {}
16 
17  DiyFp(uint64_t f, int e) : f(f), e(e) {}
18 
19  DiyFp(double d) {
20  union {
21  double d;
22  uint64_t u64;
23  } u = { d };
24 
25  int biased_e = (u.u64 & kDpExponentMask) >> kDpSignificandSize;
26  uint64_t significand = (u.u64 & kDpSignificandMask);
27  if (biased_e != 0) {
28  f = significand + kDpHiddenBit;
29  e = biased_e - kDpExponentBias;
30  }
31  else {
32  f = significand;
33  e = kDpMinExponent + 1;
34  }
35  }
36 
37  DiyFp operator-(const DiyFp& rhs) const {
38  assert(e == rhs.e);
39  assert(f >= rhs.f);
40  return DiyFp(f - rhs.f, e);
41  }
42 
43  DiyFp operator*(const DiyFp& rhs) const {
44 #if defined(_MSC_VER) && defined(_M_AMD64)
45  uint64_t h;
46  uint64_t l = _umul128(f, rhs.f, &h);
47  if (l & (uint64_t(1) << 63)) // rounding
48  h++;
49  return DiyFp(h, e + rhs.e + 64);
50 #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
51  unsigned __int128 p = static_cast<unsigned __int128>(f) * static_cast<unsigned __int128>(rhs.f);
52  uint64_t h = p >> 64;
53  uint64_t l = static_cast<uint64_t>(p);
54  if (l & (uint64_t(1) << 63)) // rounding
55  h++;
56  return DiyFp(h, e + rhs.e + 64);
57 #else
58  const uint64_t M32 = 0xFFFFFFFF;
59  const uint64_t a = f >> 32;
60  const uint64_t b = f & M32;
61  const uint64_t c = rhs.f >> 32;
62  const uint64_t d = rhs.f & M32;
63  const uint64_t ac = a * c;
64  const uint64_t bc = b * c;
65  const uint64_t ad = a * d;
66  const uint64_t bd = b * d;
67  uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
68  tmp += 1U << 31;
69  return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
70 #endif
71  }
72 
73  DiyFp Normalize() const {
74 #if defined(_MSC_VER) && defined(_M_AMD64)
75  unsigned long index;
76  _BitScanReverse64(&index, f);
77  return DiyFp(f << (63 - index), e - (63 - index));
78 #elif defined(__GNUC__)
79  int s = __builtin_clzll(f);
80  return DiyFp(f << s, e - s);
81 #else
82  DiyFp res = *this;
83  while (!(res.f & kDpHiddenBit)) {
84  res.f <<= 1;
85  res.e--;
86  }
88  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 1);
89  return res;
90 #endif
91  }
92 
94 #if defined(_MSC_VER) && defined(_M_AMD64)
95  unsigned long index;
96  _BitScanReverse64(&index, f);
97  return DiyFp (f << (63 - index), e - (63 - index));
98 #else
99  DiyFp res = *this;
100  while (!(res.f & (kDpHiddenBit << 1))) {
101  res.f <<= 1;
102  res.e--;
103  }
104  res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
105  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
106  return res;
107 #endif
108  }
109 
110  void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
111  DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
112  DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
113  mi.f <<= mi.e - pl.e;
114  mi.e = pl.e;
115  *plus = pl;
116  *minus = mi;
117  }
118 
119  static const int kDiySignificandSize = 64;
120  static const int kDpSignificandSize = 52;
121  static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
122  static const int kDpMinExponent = -kDpExponentBias;
123  static const uint64_t kDpExponentMask = UINT64_C2(0x7FF00000, 0x00000000);
124  static const uint64_t kDpSignificandMask = UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
125  static const uint64_t kDpHiddenBit = UINT64_C2(0x00100000, 0x00000000);
126 
127  uint64_t f;
128  int e;
129 };
130 
131 inline DiyFp GetCachedPower(int e, int* K) {
132  // 10^-348, 10^-340, ..., 10^340
133  static const uint64_t kCachedPowers_F[] = {
134  UINT64_C2(0xfa8fd5a0, 0x081c0288), UINT64_C2(0xbaaee17f, 0xa23ebf76),
135  UINT64_C2(0x8b16fb20, 0x3055ac76), UINT64_C2(0xcf42894a, 0x5dce35ea),
136  UINT64_C2(0x9a6bb0aa, 0x55653b2d), UINT64_C2(0xe61acf03, 0x3d1a45df),
137  UINT64_C2(0xab70fe17, 0xc79ac6ca), UINT64_C2(0xff77b1fc, 0xbebcdc4f),
138  UINT64_C2(0xbe5691ef, 0x416bd60c), UINT64_C2(0x8dd01fad, 0x907ffc3c),
139  UINT64_C2(0xd3515c28, 0x31559a83), UINT64_C2(0x9d71ac8f, 0xada6c9b5),
140  UINT64_C2(0xea9c2277, 0x23ee8bcb), UINT64_C2(0xaecc4991, 0x4078536d),
141  UINT64_C2(0x823c1279, 0x5db6ce57), UINT64_C2(0xc2109436, 0x4dfb5637),
142  UINT64_C2(0x9096ea6f, 0x3848984f), UINT64_C2(0xd77485cb, 0x25823ac7),
143  UINT64_C2(0xa086cfcd, 0x97bf97f4), UINT64_C2(0xef340a98, 0x172aace5),
144  UINT64_C2(0xb23867fb, 0x2a35b28e), UINT64_C2(0x84c8d4df, 0xd2c63f3b),
145  UINT64_C2(0xc5dd4427, 0x1ad3cdba), UINT64_C2(0x936b9fce, 0xbb25c996),
146  UINT64_C2(0xdbac6c24, 0x7d62a584), UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
147  UINT64_C2(0xf3e2f893, 0xdec3f126), UINT64_C2(0xb5b5ada8, 0xaaff80b8),
148  UINT64_C2(0x87625f05, 0x6c7c4a8b), UINT64_C2(0xc9bcff60, 0x34c13053),
149  UINT64_C2(0x964e858c, 0x91ba2655), UINT64_C2(0xdff97724, 0x70297ebd),
150  UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), UINT64_C2(0xf8a95fcf, 0x88747d94),
151  UINT64_C2(0xb9447093, 0x8fa89bcf), UINT64_C2(0x8a08f0f8, 0xbf0f156b),
152  UINT64_C2(0xcdb02555, 0x653131b6), UINT64_C2(0x993fe2c6, 0xd07b7fac),
153  UINT64_C2(0xe45c10c4, 0x2a2b3b06), UINT64_C2(0xaa242499, 0x697392d3),
154  UINT64_C2(0xfd87b5f2, 0x8300ca0e), UINT64_C2(0xbce50864, 0x92111aeb),
155  UINT64_C2(0x8cbccc09, 0x6f5088cc), UINT64_C2(0xd1b71758, 0xe219652c),
156  UINT64_C2(0x9c400000, 0x00000000), UINT64_C2(0xe8d4a510, 0x00000000),
157  UINT64_C2(0xad78ebc5, 0xac620000), UINT64_C2(0x813f3978, 0xf8940984),
158  UINT64_C2(0xc097ce7b, 0xc90715b3), UINT64_C2(0x8f7e32ce, 0x7bea5c70),
159  UINT64_C2(0xd5d238a4, 0xabe98068), UINT64_C2(0x9f4f2726, 0x179a2245),
160  UINT64_C2(0xed63a231, 0xd4c4fb27), UINT64_C2(0xb0de6538, 0x8cc8ada8),
161  UINT64_C2(0x83c7088e, 0x1aab65db), UINT64_C2(0xc45d1df9, 0x42711d9a),
162  UINT64_C2(0x924d692c, 0xa61be758), UINT64_C2(0xda01ee64, 0x1a708dea),
163  UINT64_C2(0xa26da399, 0x9aef774a), UINT64_C2(0xf209787b, 0xb47d6b85),
164  UINT64_C2(0xb454e4a1, 0x79dd1877), UINT64_C2(0x865b8692, 0x5b9bc5c2),
165  UINT64_C2(0xc83553c5, 0xc8965d3d), UINT64_C2(0x952ab45c, 0xfa97a0b3),
166  UINT64_C2(0xde469fbd, 0x99a05fe3), UINT64_C2(0xa59bc234, 0xdb398c25),
167  UINT64_C2(0xf6c69a72, 0xa3989f5c), UINT64_C2(0xb7dcbf53, 0x54e9bece),
168  UINT64_C2(0x88fcf317, 0xf22241e2), UINT64_C2(0xcc20ce9b, 0xd35c78a5),
169  UINT64_C2(0x98165af3, 0x7b2153df), UINT64_C2(0xe2a0b5dc, 0x971f303a),
170  UINT64_C2(0xa8d9d153, 0x5ce3b396), UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
171  UINT64_C2(0xbb764c4c, 0xa7a44410), UINT64_C2(0x8bab8eef, 0xb6409c1a),
172  UINT64_C2(0xd01fef10, 0xa657842c), UINT64_C2(0x9b10a4e5, 0xe9913129),
173  UINT64_C2(0xe7109bfb, 0xa19c0c9d), UINT64_C2(0xac2820d9, 0x623bf429),
174  UINT64_C2(0x80444b5e, 0x7aa7cf85), UINT64_C2(0xbf21e440, 0x03acdd2d),
175  UINT64_C2(0x8e679c2f, 0x5e44ff8f), UINT64_C2(0xd433179d, 0x9c8cb841),
176  UINT64_C2(0x9e19db92, 0xb4e31ba9), UINT64_C2(0xeb96bf6e, 0xbadf77d9),
177  UINT64_C2(0xaf87023b, 0x9bf0ee6b)
178  };
179  static const int16_t kCachedPowers_E[] = {
180  -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
181  -954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
182  -688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
183  -422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
184  -157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
185  109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
186  375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
187  641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
188  907, 933, 960, 986, 1013, 1039, 1066
189  };
190 
191  //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
192  double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
193  int k = static_cast<int>(dk);
194  if (k != dk)
195  k++;
196 
197  unsigned index = static_cast<unsigned>((k >> 3) + 1);
198  *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
199 
200  assert(index < sizeof(kCachedPowers_F) / sizeof(kCachedPowers_F[0]));
201  return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
202 }
203 
204 inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) {
205  while (rest < wp_w && delta - rest >= ten_kappa &&
206  (rest + ten_kappa < wp_w ||
207  wp_w - rest > rest + ten_kappa - wp_w)) {
208  buffer[len - 1]--;
209  rest += ten_kappa;
210  }
211 }
212 
213 inline unsigned CountDecimalDigit32(uint32_t n) {
214  // Simple pure C++ implementation was faster than __builtin_clz version in this situation.
215  if (n < 10) return 1;
216  if (n < 100) return 2;
217  if (n < 1000) return 3;
218  if (n < 10000) return 4;
219  if (n < 100000) return 5;
220  if (n < 1000000) return 6;
221  if (n < 10000000) return 7;
222  if (n < 100000000) return 8;
223  if (n < 1000000000) return 9;
224  return 10;
225 }
226 
227 inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) {
228  static const uint32_t kPow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
229  const DiyFp one(uint64_t(1) << -Mp.e, Mp.e);
230  const DiyFp wp_w = Mp - W;
231  uint32_t p1 = static_cast<uint32_t>(Mp.f >> -one.e);
232  uint64_t p2 = Mp.f & (one.f - 1);
233  int kappa = static_cast<int>(CountDecimalDigit32(p1));
234  *len = 0;
235 
236  while (kappa > 0) {
237  uint32_t d;
238  switch (kappa) {
239  case 10: d = p1 / 1000000000; p1 %= 1000000000; break;
240  case 9: d = p1 / 100000000; p1 %= 100000000; break;
241  case 8: d = p1 / 10000000; p1 %= 10000000; break;
242  case 7: d = p1 / 1000000; p1 %= 1000000; break;
243  case 6: d = p1 / 100000; p1 %= 100000; break;
244  case 5: d = p1 / 10000; p1 %= 10000; break;
245  case 4: d = p1 / 1000; p1 %= 1000; break;
246  case 3: d = p1 / 100; p1 %= 100; break;
247  case 2: d = p1 / 10; p1 %= 10; break;
248  case 1: d = p1; p1 = 0; break;
249  default:
250 #if defined(_MSC_VER)
251  __assume(0);
252 #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
253  __builtin_unreachable();
254 #else
255  d = 0;
256 #endif
257  }
258  if (d || *len)
259  buffer[(*len)++] = '0' + static_cast<char>(d);
260  kappa--;
261  uint64_t tmp = (static_cast<uint64_t>(p1) << -one.e) + p2;
262  if (tmp <= delta) {
263  *K += kappa;
264  GrisuRound(buffer, *len, delta, tmp, static_cast<uint64_t>(kPow10[kappa]) << -one.e, wp_w.f);
265  return;
266  }
267  }
268 
269  // kappa = 0
270  for (;;) {
271  p2 *= 10;
272  delta *= 10;
273  char d = static_cast<char>(p2 >> -one.e);
274  if (d || *len)
275  buffer[(*len)++] = '0' + d;
276  p2 &= one.f - 1;
277  kappa--;
278  if (p2 < delta) {
279  *K += kappa;
280  GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * kPow10[-kappa]);
281  return;
282  }
283  }
284 }
285 
286 inline void Grisu2(double value, char* buffer, int* length, int* K) {
287  const DiyFp v(value);
288  DiyFp w_m, w_p;
289  v.NormalizedBoundaries(&w_m, &w_p);
290 
291  const DiyFp c_mk = GetCachedPower(w_p.e, K);
292  const DiyFp W = v.Normalize() * c_mk;
293  DiyFp Wp = w_p * c_mk;
294  DiyFp Wm = w_m * c_mk;
295  Wm.f++;
296  Wp.f--;
297  DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K);
298 }
299 
300 inline const char* GetDigitsLut() {
301  static const char cDigitsLut[200] = {
302  '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
303  '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
304  '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
305  '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9',
306  '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
307  '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
308  '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
309  '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9',
310  '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
311  '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'
312  };
313  return cDigitsLut;
314 }
315 
316 inline void WriteExponent(int K, char* buffer) {
317  if (K < 0) {
318  *buffer++ = '-';
319  K = -K;
320  }
321 
322  if (K >= 100) {
323  *buffer++ = '0' + static_cast<char>(K / 100);
324  K %= 100;
325  const char* d = GetDigitsLut() + K * 2;
326  *buffer++ = d[0];
327  *buffer++ = d[1];
328  }
329  else if (K >= 10) {
330  const char* d = GetDigitsLut() + K * 2;
331  *buffer++ = d[0];
332  *buffer++ = d[1];
333  }
334  else
335  *buffer++ = '0' + static_cast<char>(K);
336 
337  *buffer = '\0';
338 }
339 
340 inline void Prettify(char* buffer, int length, int k) {
341  const int kk = length + k; // 10^(kk-1) <= v < 10^kk
342 
343  if (length <= kk && kk <= 21) {
344  // 1234e7 -> 12340000000
345  for (int i = length; i < kk; i++)
346  buffer[i] = '0';
347  buffer[kk] = '.';
348  buffer[kk + 1] = '0';
349  buffer[kk + 2] = '\0';
350  }
351  else if (0 < kk && kk <= 21) {
352  // 1234e-2 -> 12.34
353  memmove(&buffer[kk + 1], &buffer[kk], length - kk);
354  buffer[kk] = '.';
355  buffer[length + 1] = '\0';
356  }
357  else if (-6 < kk && kk <= 0) {
358  // 1234e-6 -> 0.001234
359  const int offset = 2 - kk;
360  memmove(&buffer[offset], &buffer[0], length);
361  buffer[0] = '0';
362  buffer[1] = '.';
363  for (int i = 2; i < offset; i++)
364  buffer[i] = '0';
365  buffer[length + offset] = '\0';
366  }
367  else if (length == 1) {
368  // 1e30
369  buffer[1] = 'e';
370  WriteExponent(kk - 1, &buffer[2]);
371  }
372  else {
373  // 1234e30 -> 1.234e33
374  memmove(&buffer[2], &buffer[1], length - 1);
375  buffer[1] = '.';
376  buffer[length + 1] = 'e';
377  WriteExponent(kk - 1, &buffer[0 + length + 2]);
378  }
379 }
380 
381 inline int dtoa_milo(double value, char* buffer) {
382  // Not handling NaN and inf
383  assert(!isnan(value));
384  assert(!isinf(value));
385 
386  if (value == 0) {
387  buffer[0] = '0';
388  buffer[1] = '.';
389  buffer[2] = '0';
390  buffer[3] = '\0';
391  return 3;
392  }
393  else {
394  if (value < 0) {
395  *buffer++ = '-';
396  value = -value;
397  }
398  int length, K;
399  Grisu2(value, buffer, &length, &K);
400  Prettify(buffer, length, K);
401  return length;
402  }
403 }
d
static const int kDpExponentBias
Definition: dtoa_milo.h:121
uint64_t f
Definition: dtoa_milo.h:127
int dtoa_milo(double value, char *buffer)
Definition: dtoa_milo.h:381
static const int kDiySignificandSize
Definition: dtoa_milo.h:119
DiyFp(double d)
Definition: dtoa_milo.h:19
XmlRpcServer s
GeneratorWrapper< T > value(T &&value)
Definition: catch.hpp:3589
const char * GetDigitsLut()
Definition: dtoa_milo.h:300
void GrisuRound(char *buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w)
Definition: dtoa_milo.h:204
DiyFp()
Definition: dtoa_milo.h:15
DiyFp operator*(const DiyFp &rhs) const
Definition: dtoa_milo.h:43
DiyFp operator-(const DiyFp &rhs) const
Definition: dtoa_milo.h:37
DiyFp Normalize() const
Definition: dtoa_milo.h:73
void Prettify(char *buffer, int length, int k)
Definition: dtoa_milo.h:340
static const uint64_t kDpSignificandMask
Definition: dtoa_milo.h:124
int e
Definition: dtoa_milo.h:128
void Grisu2(double value, char *buffer, int *length, int *K)
Definition: dtoa_milo.h:286
DiyFp NormalizeBoundary() const
Definition: dtoa_milo.h:93
static const uint64_t kDpHiddenBit
Definition: dtoa_milo.h:125
void DigitGen(const DiyFp &W, const DiyFp &Mp, uint64_t delta, char *buffer, int *len, int *K)
Definition: dtoa_milo.h:227
unsigned CountDecimalDigit32(uint32_t n)
Definition: dtoa_milo.h:213
static const uint64_t kDpExponentMask
Definition: dtoa_milo.h:123
DiyFp GetCachedPower(int e, int *K)
Definition: dtoa_milo.h:131
static const int kDpSignificandSize
Definition: dtoa_milo.h:120
static const int kDpMinExponent
Definition: dtoa_milo.h:122
TFSIMD_FORCE_INLINE tfScalar length(const Quaternion &q)
void WriteExponent(int K, char *buffer)
Definition: dtoa_milo.h:316
DiyFp(uint64_t f, int e)
Definition: dtoa_milo.h:17
#define UINT64_C2(h, l)
Definition: dtoa_milo.h:12
void NormalizedBoundaries(DiyFp *minus, DiyFp *plus) const
Definition: dtoa_milo.h:110


inertial_sense_ros
Author(s):
autogenerated on Sun Feb 28 2021 03:17:57