parser.h
Go to the documentation of this file.
1 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
2 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
3 
4 #include <limits.h>
5 #include <stddef.h>
6 #include <stdlib.h>
7 
8 #include <cassert>
9 #include <initializer_list>
10 #include <iosfwd>
11 #include <iterator>
12 #include <memory>
13 #include <vector>
14 
17 
18 namespace absl {
19 namespace str_format_internal {
20 
21 // The analyzed properties of a single specified conversion.
24  : flags() /* This is required to zero all the fields of flags. */ {
25  flags.basic = true;
26  }
27 
28  class InputValue {
29  public:
30  void set_value(int value) {
31  assert(value >= 0);
32  value_ = value;
33  }
34  int value() const { return value_; }
35 
36  // Marks the value as "from arg". aka the '*' format.
37  // Requires `value >= 1`.
38  // When set, is_from_arg() return true and get_from_arg() returns the
39  // original value.
40  // `value()`'s return value is unspecfied in this state.
41  void set_from_arg(int value) {
42  assert(value > 0);
43  value_ = -value - 1;
44  }
45  bool is_from_arg() const { return value_ < -1; }
46  int get_from_arg() const {
47  assert(is_from_arg());
48  return -value_ - 1;
49  }
50 
51  private:
52  int value_ = -1;
53  };
54 
55  // No need to initialize. It will always be set in the parser.
57 
60 
64 };
65 
66 // Consume conversion spec prefix (not including '%') of [p, end) if valid.
67 // Examples of valid specs would be e.g.: "s", "d", "-12.6f".
68 // If valid, it returns the first character following the conversion spec,
69 // and the spec part is broken down and returned in 'conv'.
70 // If invalid, returns nullptr.
71 const char* ConsumeUnboundConversion(const char* p, const char* end,
72  UnboundConversion* conv, int* next_arg);
73 
74 // Helper tag class for the table below.
75 // It allows fast `char -> ConversionChar/LengthMod` checking and conversions.
76 class ConvTag {
77  public:
78  constexpr ConvTag(ConversionChar::Id id) : tag_(id) {} // NOLINT
79  // We invert the length modifiers to make them negative so that we can easily
80  // test for them.
81  constexpr ConvTag(LengthMod::Id id) : tag_(~id) {} // NOLINT
82  // Everything else is -128, which is negative to make is_conv() simpler.
83  constexpr ConvTag() : tag_(-128) {}
84 
85  bool is_conv() const { return tag_ >= 0; }
86  bool is_length() const { return tag_ < 0 && tag_ != -128; }
88  assert(is_conv());
89  return ConversionChar::FromId(static_cast<ConversionChar::Id>(tag_));
90  }
91  LengthMod as_length() const {
92  assert(is_length());
93  return LengthMod::FromId(static_cast<LengthMod::Id>(~tag_));
94  }
95 
96  private:
97  std::int8_t tag_;
98 };
99 
100 extern const ConvTag kTags[256];
101 // Keep a single table for all the conversion chars and length modifiers.
102 inline ConvTag GetTagForChar(char c) {
103  return kTags[static_cast<unsigned char>(c)];
104 }
105 
106 // Parse the format string provided in 'src' and pass the identified items into
107 // 'consumer'.
108 // Text runs will be passed by calling
109 // Consumer::Append(string_view);
110 // ConversionItems will be passed by calling
111 // Consumer::ConvertOne(UnboundConversion, string_view);
112 // In the case of ConvertOne, the string_view that is passed is the
113 // portion of the format string corresponding to the conversion, not including
114 // the leading %. On success, it returns true. On failure, it stops and returns
115 // false.
116 template <typename Consumer>
117 bool ParseFormatString(string_view src, Consumer consumer) {
118  int next_arg = 0;
119  const char* p = src.data();
120  const char* const end = p + src.size();
121  while (p != end) {
122  const char* percent = static_cast<const char*>(memchr(p, '%', end - p));
123  if (!percent) {
124  // We found the last substring.
125  return consumer.Append(string_view(p, end - p));
126  }
127  // We found a percent, so push the text run then process the percent.
128  if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) {
129  return false;
130  }
131  if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
132 
133  auto tag = GetTagForChar(percent[1]);
134  if (tag.is_conv()) {
135  if (ABSL_PREDICT_FALSE(next_arg < 0)) {
136  // This indicates an error in the format std::string.
137  // The only way to get `next_arg < 0` here is to have a positional
138  // argument first which sets next_arg to -1 and then a non-positional
139  // argument.
140  return false;
141  }
142  p = percent + 2;
143 
144  // Keep this case separate from the one below.
145  // ConvertOne is more efficient when the compiler can see that the `basic`
146  // flag is set.
148  conv.conv = tag.as_conv();
149  conv.arg_position = ++next_arg;
150  if (ABSL_PREDICT_FALSE(
151  !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
152  return false;
153  }
154  } else if (percent[1] != '%') {
156  p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);
157  if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
158  if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
159  conv, string_view(percent + 1, p - (percent + 1))))) {
160  return false;
161  }
162  } else {
163  if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
164  p = percent + 2;
165  continue;
166  }
167  }
168  return true;
169 }
170 
171 // Always returns true, or fails to compile in a constexpr context if s does not
172 // point to a constexpr char array.
173 constexpr bool EnsureConstexpr(string_view s) {
174  return s.empty() || s[0] == s[0];
175 }
176 
178  public:
179  explicit ParsedFormatBase(string_view format, bool allow_ignored,
180  std::initializer_list<Conv> convs);
181 
182  ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }
183 
184  ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }
185 
187  if (this == &other) return *this;
188  has_error_ = other.has_error_;
189  items_ = other.items_;
190  size_t text_size = items_.empty() ? 0 : items_.back().text_end;
191  data_.reset(new char[text_size]);
192  memcpy(data_.get(), other.data_.get(), text_size);
193  return *this;
194  }
195 
197  if (this == &other) return *this;
198  has_error_ = other.has_error_;
199  data_ = std::move(other.data_);
200  items_ = std::move(other.items_);
201  // Reset the vector to make sure the invariants hold.
202  other.items_.clear();
203  return *this;
204  }
205 
206  template <typename Consumer>
207  bool ProcessFormat(Consumer consumer) const {
208  const char* const base = data_.get();
209  string_view text(base, 0);
210  for (const auto& item : items_) {
211  const char* const end = text.data() + text.size();
212  text = string_view(end, (base + item.text_end) - end);
213  if (item.is_conversion) {
214  if (!consumer.ConvertOne(item.conv, text)) return false;
215  } else {
216  if (!consumer.Append(text)) return false;
217  }
218  }
219  return !has_error_;
220  }
221 
222  bool has_error() const { return has_error_; }
223 
224  private:
225  // Returns whether the conversions match and if !allow_ignored it verifies
226  // that all conversions are used by the format.
227  bool MatchesConversions(bool allow_ignored,
228  std::initializer_list<Conv> convs) const;
229 
230  struct ParsedFormatConsumer;
231 
232  struct ConversionItem {
234  // Points to the past-the-end location of this element in the data_ array.
235  size_t text_end;
237  };
238 
240  std::unique_ptr<char[]> data_;
241  std::vector<ConversionItem> items_;
242 };
243 
244 
245 // A value type representing a preparsed format. These can be created, copied
246 // around, and reused to speed up formatting loops.
247 // The user must specify through the template arguments the conversion
248 // characters used in the format. This will be checked at compile time.
249 //
250 // This class uses Conv enum values to specify each argument.
251 // This allows for more flexibility as you can specify multiple possible
252 // conversion characters for each argument.
253 // ParsedFormat<char...> is a simplified alias for when the user only
254 // needs to specify a single conversion character for each argument.
255 //
256 // Example:
257 // // Extended format supports multiple characters per argument:
258 // using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;
259 // MyFormat GetFormat(bool use_hex) {
260 // if (use_hex) return MyFormat("foo %x bar");
261 // return MyFormat("foo %d bar");
262 // }
263 // // 'format' can be used with any value that supports 'd' and 'x',
264 // // like `int`.
265 // auto format = GetFormat(use_hex);
266 // value = StringF(format, i);
267 //
268 // This class also supports runtime format checking with the ::New() and
269 // ::NewAllowIgnored() factory functions.
270 // This is the only API that allows the user to pass a runtime specified format
271 // string. These factory functions will return NULL if the format does not match
272 // the conversions requested by the user.
273 template <str_format_internal::Conv... C>
275  public:
277 #if ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
278  __attribute__((
279  enable_if(str_format_internal::EnsureConstexpr(format),
280  "Format std::string is not constexpr."),
281  enable_if(str_format_internal::ValidFormatImpl<C...>(format),
282  "Format specified does not match the template arguments.")))
283 #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
284  : ExtendedParsedFormat(format, false) {
285  }
286 
287  // ExtendedParsedFormat factory function.
288  // The user still has to specify the conversion characters, but they will not
289  // be checked at compile time. Instead, it will be checked at runtime.
290  // This delays the checking to runtime, but allows the user to pass
291  // dynamically sourced formats.
292  // It returns NULL if the format does not match the conversion characters.
293  // The user is responsible for checking the return value before using it.
294  //
295  // The 'New' variant will check that all the specified arguments are being
296  // consumed by the format and return NULL if any argument is being ignored.
297  // The 'NewAllowIgnored' variant will not verify this and will allow formats
298  // that ignore arguments.
299  static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {
300  return New(format, false);
301  }
302  static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(
303  string_view format) {
304  return New(format, true);
305  }
306 
307  private:
308  static std::unique_ptr<ExtendedParsedFormat> New(string_view format,
309  bool allow_ignored) {
310  std::unique_ptr<ExtendedParsedFormat> conv(
311  new ExtendedParsedFormat(format, allow_ignored));
312  if (conv->has_error()) return nullptr;
313  return conv;
314  }
315 
316  ExtendedParsedFormat(string_view s, bool allow_ignored)
317  : ParsedFormatBase(s, allow_ignored, {C...}) {}
318 };
319 } // namespace str_format_internal
320 } // namespace absl
321 
322 #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
constexpr ConvTag(ConversionChar::Id id)
Definition: parser.h:78
std::unique_ptr< char[]> data_
Definition: parser.h:240
ParsedFormatBase & operator=(const ParsedFormatBase &other)
Definition: parser.h:186
static std::unique_ptr< ExtendedParsedFormat > New(string_view format)
Definition: parser.h:299
static LengthMod FromId(Id id)
Definition: extension.h:154
#define ABSL_PREDICT_FALSE(x)
Definition: optimization.h:177
ConversionChar as_conv() const
Definition: parser.h:87
ParsedFormatBase & operator=(ParsedFormatBase &&other)
Definition: parser.h:196
char * end
constexpr ConvTag(LengthMod::Id id)
Definition: parser.h:81
Definition: algorithm.h:29
constexpr size_type size() const noexcept
Definition: string_view.h:260
bool ProcessFormat(Consumer consumer) const
Definition: parser.h:207
const char * data_
Definition: test_util.cc:98
ParsedFormatBase(ParsedFormatBase &&other)
Definition: parser.h:184
LengthMod as_length() const
Definition: parser.h:91
static std::unique_ptr< ExtendedParsedFormat > New(string_view format, bool allow_ignored)
Definition: parser.h:308
ConvTag GetTagForChar(char c)
Definition: parser.h:102
std::string format(const std::string &, const time_point< seconds > &, const femtoseconds &, const time_zone &)
absl::string_view tag_
Definition: usage.cc:90
bool ParseFormatString(string_view src, Consumer consumer)
Definition: parser.h:117
static std::unique_ptr< ExtendedParsedFormat > NewAllowIgnored(string_view format)
Definition: parser.h:302
constexpr bool empty() const noexcept
Definition: string_view.h:277
constexpr bool EnsureConstexpr(string_view s)
Definition: parser.h:173
constexpr const_pointer data() const noexcept
Definition: string_view.h:302
static ConversionChar FromId(Id id)
Definition: extension.h:236
std::vector< ConversionItem > items_
Definition: parser.h:241
ExtendedParsedFormat(string_view s, bool allow_ignored)
Definition: parser.h:316
constexpr absl::remove_reference_t< T > && move(T &&t) noexcept
Definition: utility.h:219
const char * ConsumeUnboundConversion(const char *p, const char *end, UnboundConversion *conv, int *next_arg)
Definition: parser.cc:230
ABSL_CONST_INIT const ConvTag kTags[256]
Definition: parser.cc:21
ParsedFormatBase(const ParsedFormatBase &other)
Definition: parser.h:182
#define C(x)
Definition: city_test.cc:47


abseil_cpp
Author(s):
autogenerated on Mon Feb 28 2022 21:31:19