parser.h
Go to the documentation of this file.
00001 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
00002 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
00003 
00004 #include <limits.h>
00005 #include <stddef.h>
00006 #include <stdlib.h>
00007 
00008 #include <cassert>
00009 #include <initializer_list>
00010 #include <iosfwd>
00011 #include <iterator>
00012 #include <memory>
00013 #include <vector>
00014 
00015 #include "absl/strings/internal/str_format/checker.h"
00016 #include "absl/strings/internal/str_format/extension.h"
00017 
00018 namespace absl {
00019 namespace str_format_internal {
00020 
00021 // The analyzed properties of a single specified conversion.
00022 struct UnboundConversion {
00023   UnboundConversion()
00024       : flags() /* This is required to zero all the fields of flags. */ {
00025     flags.basic = true;
00026   }
00027 
00028   class InputValue {
00029    public:
00030     void set_value(int value) {
00031       assert(value >= 0);
00032       value_ = value;
00033     }
00034     int value() const { return value_; }
00035 
00036     // Marks the value as "from arg". aka the '*' format.
00037     // Requires `value >= 1`.
00038     // When set, is_from_arg() return true and get_from_arg() returns the
00039     // original value.
00040     // `value()`'s return value is unspecfied in this state.
00041     void set_from_arg(int value) {
00042       assert(value > 0);
00043       value_ = -value - 1;
00044     }
00045     bool is_from_arg() const { return value_ < -1; }
00046     int get_from_arg() const {
00047       assert(is_from_arg());
00048       return -value_ - 1;
00049     }
00050 
00051    private:
00052     int value_ = -1;
00053   };
00054 
00055   // No need to initialize. It will always be set in the parser.
00056   int arg_position;
00057 
00058   InputValue width;
00059   InputValue precision;
00060 
00061   Flags flags;
00062   LengthMod length_mod;
00063   ConversionChar conv;
00064 };
00065 
00066 // Consume conversion spec prefix (not including '%') of [p, end) if valid.
00067 // Examples of valid specs would be e.g.: "s", "d", "-12.6f".
00068 // If valid, it returns the first character following the conversion spec,
00069 // and the spec part is broken down and returned in 'conv'.
00070 // If invalid, returns nullptr.
00071 const char* ConsumeUnboundConversion(const char* p, const char* end,
00072                                      UnboundConversion* conv, int* next_arg);
00073 
00074 // Helper tag class for the table below.
00075 // It allows fast `char -> ConversionChar/LengthMod` checking and conversions.
00076 class ConvTag {
00077  public:
00078   constexpr ConvTag(ConversionChar::Id id) : tag_(id) {}  // NOLINT
00079   // We invert the length modifiers to make them negative so that we can easily
00080   // test for them.
00081   constexpr ConvTag(LengthMod::Id id) : tag_(~id) {}  // NOLINT
00082   // Everything else is -128, which is negative to make is_conv() simpler.
00083   constexpr ConvTag() : tag_(-128) {}
00084 
00085   bool is_conv() const { return tag_ >= 0; }
00086   bool is_length() const { return tag_ < 0 && tag_ != -128; }
00087   ConversionChar as_conv() const {
00088     assert(is_conv());
00089     return ConversionChar::FromId(static_cast<ConversionChar::Id>(tag_));
00090   }
00091   LengthMod as_length() const {
00092     assert(is_length());
00093     return LengthMod::FromId(static_cast<LengthMod::Id>(~tag_));
00094   }
00095 
00096  private:
00097   std::int8_t tag_;
00098 };
00099 
00100 extern const ConvTag kTags[256];
00101 // Keep a single table for all the conversion chars and length modifiers.
00102 inline ConvTag GetTagForChar(char c) {
00103   return kTags[static_cast<unsigned char>(c)];
00104 }
00105 
00106 // Parse the format string provided in 'src' and pass the identified items into
00107 // 'consumer'.
00108 // Text runs will be passed by calling
00109 //   Consumer::Append(string_view);
00110 // ConversionItems will be passed by calling
00111 //   Consumer::ConvertOne(UnboundConversion, string_view);
00112 // In the case of ConvertOne, the string_view that is passed is the
00113 // portion of the format string corresponding to the conversion, not including
00114 // the leading %. On success, it returns true. On failure, it stops and returns
00115 // false.
00116 template <typename Consumer>
00117 bool ParseFormatString(string_view src, Consumer consumer) {
00118   int next_arg = 0;
00119   const char* p = src.data();
00120   const char* const end = p + src.size();
00121   while (p != end) {
00122     const char* percent = static_cast<const char*>(memchr(p, '%', end - p));
00123     if (!percent) {
00124       // We found the last substring.
00125       return consumer.Append(string_view(p, end - p));
00126     }
00127     // We found a percent, so push the text run then process the percent.
00128     if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) {
00129       return false;
00130     }
00131     if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
00132 
00133     auto tag = GetTagForChar(percent[1]);
00134     if (tag.is_conv()) {
00135       if (ABSL_PREDICT_FALSE(next_arg < 0)) {
00136         // This indicates an error in the format std::string.
00137         // The only way to get `next_arg < 0` here is to have a positional
00138         // argument first which sets next_arg to -1 and then a non-positional
00139         // argument.
00140         return false;
00141       }
00142       p = percent + 2;
00143 
00144       // Keep this case separate from the one below.
00145       // ConvertOne is more efficient when the compiler can see that the `basic`
00146       // flag is set.
00147       UnboundConversion conv;
00148       conv.conv = tag.as_conv();
00149       conv.arg_position = ++next_arg;
00150       if (ABSL_PREDICT_FALSE(
00151               !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
00152         return false;
00153       }
00154     } else if (percent[1] != '%') {
00155       UnboundConversion conv;
00156       p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);
00157       if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
00158       if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
00159           conv, string_view(percent + 1, p - (percent + 1))))) {
00160         return false;
00161       }
00162     } else {
00163       if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
00164       p = percent + 2;
00165       continue;
00166     }
00167   }
00168   return true;
00169 }
00170 
00171 // Always returns true, or fails to compile in a constexpr context if s does not
00172 // point to a constexpr char array.
00173 constexpr bool EnsureConstexpr(string_view s) {
00174   return s.empty() || s[0] == s[0];
00175 }
00176 
00177 class ParsedFormatBase {
00178  public:
00179   explicit ParsedFormatBase(string_view format, bool allow_ignored,
00180                             std::initializer_list<Conv> convs);
00181 
00182   ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }
00183 
00184   ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }
00185 
00186   ParsedFormatBase& operator=(const ParsedFormatBase& other) {
00187     if (this == &other) return *this;
00188     has_error_ = other.has_error_;
00189     items_ = other.items_;
00190     size_t text_size = items_.empty() ? 0 : items_.back().text_end;
00191     data_.reset(new char[text_size]);
00192     memcpy(data_.get(), other.data_.get(), text_size);
00193     return *this;
00194   }
00195 
00196   ParsedFormatBase& operator=(ParsedFormatBase&& other) {
00197     if (this == &other) return *this;
00198     has_error_ = other.has_error_;
00199     data_ = std::move(other.data_);
00200     items_ = std::move(other.items_);
00201     // Reset the vector to make sure the invariants hold.
00202     other.items_.clear();
00203     return *this;
00204   }
00205 
00206   template <typename Consumer>
00207   bool ProcessFormat(Consumer consumer) const {
00208     const char* const base = data_.get();
00209     string_view text(base, 0);
00210     for (const auto& item : items_) {
00211       const char* const end = text.data() + text.size();
00212       text = string_view(end, (base + item.text_end) - end);
00213       if (item.is_conversion) {
00214         if (!consumer.ConvertOne(item.conv, text)) return false;
00215       } else {
00216         if (!consumer.Append(text)) return false;
00217       }
00218     }
00219     return !has_error_;
00220   }
00221 
00222   bool has_error() const { return has_error_; }
00223 
00224  private:
00225   // Returns whether the conversions match and if !allow_ignored it verifies
00226   // that all conversions are used by the format.
00227   bool MatchesConversions(bool allow_ignored,
00228                           std::initializer_list<Conv> convs) const;
00229 
00230   struct ParsedFormatConsumer;
00231 
00232   struct ConversionItem {
00233     bool is_conversion;
00234     // Points to the past-the-end location of this element in the data_ array.
00235     size_t text_end;
00236     UnboundConversion conv;
00237   };
00238 
00239   bool has_error_;
00240   std::unique_ptr<char[]> data_;
00241   std::vector<ConversionItem> items_;
00242 };
00243 
00244 
00245 // A value type representing a preparsed format.  These can be created, copied
00246 // around, and reused to speed up formatting loops.
00247 // The user must specify through the template arguments the conversion
00248 // characters used in the format. This will be checked at compile time.
00249 //
00250 // This class uses Conv enum values to specify each argument.
00251 // This allows for more flexibility as you can specify multiple possible
00252 // conversion characters for each argument.
00253 // ParsedFormat<char...> is a simplified alias for when the user only
00254 // needs to specify a single conversion character for each argument.
00255 //
00256 // Example:
00257 //   // Extended format supports multiple characters per argument:
00258 //   using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;
00259 //   MyFormat GetFormat(bool use_hex) {
00260 //     if (use_hex) return MyFormat("foo %x bar");
00261 //     return MyFormat("foo %d bar");
00262 //   }
00263 //   // 'format' can be used with any value that supports 'd' and 'x',
00264 //   // like `int`.
00265 //   auto format = GetFormat(use_hex);
00266 //   value = StringF(format, i);
00267 //
00268 // This class also supports runtime format checking with the ::New() and
00269 // ::NewAllowIgnored() factory functions.
00270 // This is the only API that allows the user to pass a runtime specified format
00271 // string. These factory functions will return NULL if the format does not match
00272 // the conversions requested by the user.
00273 template <str_format_internal::Conv... C>
00274 class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase {
00275  public:
00276   explicit ExtendedParsedFormat(string_view format)
00277 #if ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
00278       __attribute__((
00279           enable_if(str_format_internal::EnsureConstexpr(format),
00280                     "Format std::string is not constexpr."),
00281           enable_if(str_format_internal::ValidFormatImpl<C...>(format),
00282                     "Format specified does not match the template arguments.")))
00283 #endif  // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
00284       : ExtendedParsedFormat(format, false) {
00285   }
00286 
00287   // ExtendedParsedFormat factory function.
00288   // The user still has to specify the conversion characters, but they will not
00289   // be checked at compile time. Instead, it will be checked at runtime.
00290   // This delays the checking to runtime, but allows the user to pass
00291   // dynamically sourced formats.
00292   // It returns NULL if the format does not match the conversion characters.
00293   // The user is responsible for checking the return value before using it.
00294   //
00295   // The 'New' variant will check that all the specified arguments are being
00296   // consumed by the format and return NULL if any argument is being ignored.
00297   // The 'NewAllowIgnored' variant will not verify this and will allow formats
00298   // that ignore arguments.
00299   static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {
00300     return New(format, false);
00301   }
00302   static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(
00303       string_view format) {
00304     return New(format, true);
00305   }
00306 
00307  private:
00308   static std::unique_ptr<ExtendedParsedFormat> New(string_view format,
00309                                                    bool allow_ignored) {
00310     std::unique_ptr<ExtendedParsedFormat> conv(
00311         new ExtendedParsedFormat(format, allow_ignored));
00312     if (conv->has_error()) return nullptr;
00313     return conv;
00314   }
00315 
00316   ExtendedParsedFormat(string_view s, bool allow_ignored)
00317       : ParsedFormatBase(s, allow_ignored, {C...}) {}
00318 };
00319 }  // namespace str_format_internal
00320 }  // namespace absl
00321 
00322 #endif  // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_


abseil_cpp
Author(s):
autogenerated on Wed Jun 19 2019 19:42:15