parser.cc
Go to the documentation of this file.
2 
3 #include <assert.h>
4 #include <string.h>
5 #include <wchar.h>
6 #include <cctype>
7 #include <cstdint>
8 
9 #include <algorithm>
10 #include <initializer_list>
11 #include <limits>
12 #include <ostream>
13 #include <string>
14 #include <unordered_set>
15 
16 namespace absl {
17 namespace str_format_internal {
18 
20 using LM = LengthMod::Id;
22  {}, {}, {}, {}, {}, {}, {}, {}, // 00-07
23  {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
24  {}, {}, {}, {}, {}, {}, {}, {}, // 10-17
25  {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
26  {}, {}, {}, {}, {}, {}, {}, {}, // 20-27
27  {}, {}, {}, {}, {}, {}, {}, {}, // 28-2f
28  {}, {}, {}, {}, {}, {}, {}, {}, // 30-37
29  {}, {}, {}, {}, {}, {}, {}, {}, // 38-3f
30  {}, CC::A, {}, CC::C, {}, CC::E, CC::F, CC::G, // @ABCDEFG
31  {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
32  {}, {}, {}, CC::S, {}, {}, {}, {}, // PQRSTUVW
33  CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
34  {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
35  LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
36  CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw
37  CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
38  {}, {}, {}, {}, {}, {}, {}, {}, // 80-87
39  {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
40  {}, {}, {}, {}, {}, {}, {}, {}, // 90-97
41  {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
42  {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
43  {}, {}, {}, {}, {}, {}, {}, {}, // a8-af
44  {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
45  {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
46  {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
47  {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
48  {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
49  {}, {}, {}, {}, {}, {}, {}, {}, // d8-df
50  {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
51  {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
52  {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
53  {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
54 };
55 
56 namespace {
57 
58 bool CheckFastPathSetting(const UnboundConversion& conv) {
59  bool should_be_basic = !conv.flags.left && //
60  !conv.flags.show_pos && //
61  !conv.flags.sign_col && //
62  !conv.flags.alt && //
63  !conv.flags.zero && //
64  (conv.width.value() == -1) &&
65  (conv.precision.value() == -1);
66  if (should_be_basic != conv.flags.basic) {
67  fprintf(stderr,
68  "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
69  "width=%d precision=%d\n",
70  conv.flags.basic, conv.flags.left, conv.flags.show_pos,
71  conv.flags.sign_col, conv.flags.alt, conv.flags.zero,
72  conv.width.value(), conv.precision.value());
73  }
74  return should_be_basic == conv.flags.basic;
75 }
76 
77 template <bool is_positional>
78 const char *ConsumeConversion(const char *pos, const char *const end,
79  UnboundConversion *conv, int *next_arg) {
80  const char* const original_pos = pos;
81  char c;
82  // Read the next char into `c` and update `pos`. Returns false if there are
83  // no more chars to read.
84 #define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
85  do { \
86  if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
87  c = *pos++; \
88  } while (0)
89 
90  const auto parse_digits = [&] {
91  int digits = c - '0';
92  // We do not want to overflow `digits` so we consume at most digits10
93  // digits. If there are more digits the parsing will fail later on when the
94  // digit doesn't match the expected characters.
95  int num_digits = std::numeric_limits<int>::digits10;
96  for (;;) {
97  if (ABSL_PREDICT_FALSE(pos == end)) break;
98  c = *pos++;
99  if (!std::isdigit(c)) break;
100  --num_digits;
101  if (ABSL_PREDICT_FALSE(!num_digits)) break;
102  digits = 10 * digits + c - '0';
103  }
104  return digits;
105  };
106 
107  if (is_positional) {
109  if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
110  conv->arg_position = parse_digits();
111  assert(conv->arg_position > 0);
112  if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
113  }
114 
116 
117  // We should start with the basic flag on.
118  assert(conv->flags.basic);
119 
120  // Any non alpha character makes this conversion not basic.
121  // This includes flags (-+ #0), width (1-9, *) or precision (.).
122  // All conversion characters and length modifiers are alpha characters.
123  if (c < 'A') {
124  conv->flags.basic = false;
125 
126  for (; c <= '0';) {
127  // FIXME: We might be able to speed this up reusing the lookup table from
128  // above. It might require changing Flags to be a plain integer where we
129  // can |= a value.
130  switch (c) {
131  case '-':
132  conv->flags.left = true;
133  break;
134  case '+':
135  conv->flags.show_pos = true;
136  break;
137  case ' ':
138  conv->flags.sign_col = true;
139  break;
140  case '#':
141  conv->flags.alt = true;
142  break;
143  case '0':
144  conv->flags.zero = true;
145  break;
146  default:
147  goto flags_done;
148  }
150  }
151 flags_done:
152 
153  if (c <= '9') {
154  if (c >= '0') {
155  int maybe_width = parse_digits();
156  if (!is_positional && c == '$') {
157  if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
158  // Positional conversion.
159  *next_arg = -1;
160  conv->flags = Flags();
161  conv->flags.basic = true;
162  return ConsumeConversion<true>(original_pos, end, conv, next_arg);
163  }
164  conv->width.set_value(maybe_width);
165  } else if (c == '*') {
167  if (is_positional) {
168  if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
169  conv->width.set_from_arg(parse_digits());
170  if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
172  } else {
173  conv->width.set_from_arg(++*next_arg);
174  }
175  }
176  }
177 
178  if (c == '.') {
180  if (std::isdigit(c)) {
181  conv->precision.set_value(parse_digits());
182  } else if (c == '*') {
184  if (is_positional) {
185  if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
186  conv->precision.set_from_arg(parse_digits());
187  if (c != '$') return nullptr;
189  } else {
190  conv->precision.set_from_arg(++*next_arg);
191  }
192  } else {
193  conv->precision.set_value(0);
194  }
195  }
196  }
197 
198  auto tag = GetTagForChar(c);
199 
200  if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
201  if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
202 
203  // It is a length modifier.
205  LengthMod length_mod = tag.as_length();
207  if (c == 'h' && length_mod.id() == LengthMod::h) {
210  } else if (c == 'l' && length_mod.id() == LengthMod::l) {
213  } else {
214  conv->length_mod = length_mod;
215  }
216  tag = GetTagForChar(c);
217  if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
218  }
219 
220  assert(CheckFastPathSetting(*conv));
221  (void)(&CheckFastPathSetting);
222 
223  conv->conv = tag.as_conv();
224  if (!is_positional) conv->arg_position = ++*next_arg;
225  return pos;
226 }
227 
228 } // namespace
229 
230 const char *ConsumeUnboundConversion(const char *p, const char *end,
231  UnboundConversion *conv, int *next_arg) {
232  if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
233  return ConsumeConversion<false>(p, end, conv, next_arg);
234 }
235 
237  explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
238  : parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
239 
240  bool Append(string_view s) {
241  if (s.empty()) return true;
242 
243  size_t text_end = AppendText(s);
244 
245  if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) {
246  // Let's extend the existing text run.
247  parsed->items_.back().text_end = text_end;
248  } else {
249  // Let's make a new text run.
250  parsed->items_.push_back({false, text_end, {}});
251  }
252  return true;
253  }
254 
255  bool ConvertOne(const UnboundConversion &conv, string_view s) {
256  size_t text_end = AppendText(s);
257  parsed->items_.push_back({true, text_end, conv});
258  return true;
259  }
260 
261  size_t AppendText(string_view s) {
262  memcpy(data_pos, s.data(), s.size());
263  data_pos += s.size();
264  return static_cast<size_t>(data_pos - parsed->data_.get());
265  }
266 
268  char* data_pos;
269 };
270 
272  std::initializer_list<Conv> convs)
273  : data_(format.empty() ? nullptr : new char[format.size()]) {
274  has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) ||
275  !MatchesConversions(allow_ignored, convs);
276 }
277 
279  bool allow_ignored, std::initializer_list<Conv> convs) const {
280  std::unordered_set<int> used;
281  auto add_if_valid_conv = [&](int pos, char c) {
282  if (static_cast<size_t>(pos) > convs.size() ||
283  !Contains(convs.begin()[pos - 1], c))
284  return false;
285  used.insert(pos);
286  return true;
287  };
288  for (const ConversionItem &item : items_) {
289  if (!item.is_conversion) continue;
290  auto &conv = item.conv;
291  if (conv.precision.is_from_arg() &&
292  !add_if_valid_conv(conv.precision.get_from_arg(), '*'))
293  return false;
294  if (conv.width.is_from_arg() &&
295  !add_if_valid_conv(conv.width.get_from_arg(), '*'))
296  return false;
297  if (!add_if_valid_conv(conv.arg_position, conv.conv.Char())) return false;
298  }
299  return used.size() == convs.size() || allow_ignored;
300 }
301 
302 } // namespace str_format_internal
303 } // namespace absl
std::unique_ptr< char[]> data_
Definition: parser.h:240
#define ABSL_CONST_INIT
Definition: attributes.h:605
static LengthMod FromId(Id id)
Definition: extension.h:154
#define ABSL_PREDICT_FALSE(x)
Definition: optimization.h:177
ParsedFormatBase(string_view format, bool allow_ignored, std::initializer_list< Conv > convs)
Definition: parser.cc:271
#define X(c)
char * end
Definition: algorithm.h:29
constexpr T & get(variant< Types... > &v)
Definition: variant.h:297
#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR()
ParsedFormatConsumer(ParsedFormatBase *parsedformat)
Definition: parser.cc:237
ConvTag GetTagForChar(char c)
Definition: parser.h:102
std::string format(const std::string &, const time_point< seconds > &, const femtoseconds &, const time_zone &)
bool ParseFormatString(string_view src, Consumer consumer)
Definition: parser.h:117
uintptr_t size
UnboundConversion o
Definition: parser_test.cc:86
constexpr bool Contains(Conv set, char c)
Definition: extension.h:381
bool ConvertOne(const UnboundConversion &conv, string_view s)
Definition: parser.cc:255
std::vector< ConversionItem > items_
Definition: parser.h:241
const char * ConsumeUnboundConversion(const char *p, const char *end, UnboundConversion *conv, int *next_arg)
Definition: parser.cc:230
ABSL_CONST_INIT const ConvTag kTags[256]
Definition: parser.cc:21
bool MatchesConversions(bool allow_ignored, std::initializer_list< Conv > convs) const
Definition: parser.cc:278
#define C(x)
Definition: city_test.cc:47


abseil_cpp
Author(s):
autogenerated on Tue Jun 18 2019 19:44:37