parser.cc
Go to the documentation of this file.
00001 #include "absl/strings/internal/str_format/parser.h"
00002 
00003 #include <assert.h>
00004 #include <string.h>
00005 #include <wchar.h>
00006 #include <cctype>
00007 #include <cstdint>
00008 
00009 #include <algorithm>
00010 #include <initializer_list>
00011 #include <limits>
00012 #include <ostream>
00013 #include <string>
00014 #include <unordered_set>
00015 
00016 namespace absl {
00017 namespace str_format_internal {
00018 
00019 using CC = ConversionChar::Id;
00020 using LM = LengthMod::Id;
00021 ABSL_CONST_INIT const ConvTag kTags[256] = {
00022     {},    {},    {},    {},    {},    {},    {},    {},     // 00-07
00023     {},    {},    {},    {},    {},    {},    {},    {},     // 08-0f
00024     {},    {},    {},    {},    {},    {},    {},    {},     // 10-17
00025     {},    {},    {},    {},    {},    {},    {},    {},     // 18-1f
00026     {},    {},    {},    {},    {},    {},    {},    {},     // 20-27
00027     {},    {},    {},    {},    {},    {},    {},    {},     // 28-2f
00028     {},    {},    {},    {},    {},    {},    {},    {},     // 30-37
00029     {},    {},    {},    {},    {},    {},    {},    {},     // 38-3f
00030     {},    CC::A, {},    CC::C, {},    CC::E, CC::F, CC::G,  // @ABCDEFG
00031     {},    {},    {},    {},    LM::L, {},    {},    {},     // HIJKLMNO
00032     {},    {},    {},    CC::S, {},    {},    {},    {},     // PQRSTUVW
00033     CC::X, {},    {},    {},    {},    {},    {},    {},     // XYZ[\]^_
00034     {},    CC::a, {},    CC::c, CC::d, CC::e, CC::f, CC::g,  // `abcdefg
00035     LM::h, CC::i, LM::j, {},    LM::l, {},    CC::n, CC::o,  // hijklmno
00036     CC::p, LM::q, {},    CC::s, LM::t, CC::u, {},    {},     // pqrstuvw
00037     CC::x, {},    LM::z, {},    {},    {},    {},    {},     // xyz{|}!
00038     {},    {},    {},    {},    {},    {},    {},    {},     // 80-87
00039     {},    {},    {},    {},    {},    {},    {},    {},     // 88-8f
00040     {},    {},    {},    {},    {},    {},    {},    {},     // 90-97
00041     {},    {},    {},    {},    {},    {},    {},    {},     // 98-9f
00042     {},    {},    {},    {},    {},    {},    {},    {},     // a0-a7
00043     {},    {},    {},    {},    {},    {},    {},    {},     // a8-af
00044     {},    {},    {},    {},    {},    {},    {},    {},     // b0-b7
00045     {},    {},    {},    {},    {},    {},    {},    {},     // b8-bf
00046     {},    {},    {},    {},    {},    {},    {},    {},     // c0-c7
00047     {},    {},    {},    {},    {},    {},    {},    {},     // c8-cf
00048     {},    {},    {},    {},    {},    {},    {},    {},     // d0-d7
00049     {},    {},    {},    {},    {},    {},    {},    {},     // d8-df
00050     {},    {},    {},    {},    {},    {},    {},    {},     // e0-e7
00051     {},    {},    {},    {},    {},    {},    {},    {},     // e8-ef
00052     {},    {},    {},    {},    {},    {},    {},    {},     // f0-f7
00053     {},    {},    {},    {},    {},    {},    {},    {},     // f8-ff
00054 };
00055 
00056 namespace {
00057 
00058 bool CheckFastPathSetting(const UnboundConversion& conv) {
00059   bool should_be_basic = !conv.flags.left &&      //
00060                          !conv.flags.show_pos &&  //
00061                          !conv.flags.sign_col &&  //
00062                          !conv.flags.alt &&       //
00063                          !conv.flags.zero &&      //
00064                          (conv.width.value() == -1) &&
00065                          (conv.precision.value() == -1);
00066   if (should_be_basic != conv.flags.basic) {
00067     fprintf(stderr,
00068             "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
00069             "width=%d precision=%d\n",
00070             conv.flags.basic, conv.flags.left, conv.flags.show_pos,
00071             conv.flags.sign_col, conv.flags.alt, conv.flags.zero,
00072             conv.width.value(), conv.precision.value());
00073   }
00074   return should_be_basic == conv.flags.basic;
00075 }
00076 
00077 template <bool is_positional>
00078 const char *ConsumeConversion(const char *pos, const char *const end,
00079                               UnboundConversion *conv, int *next_arg) {
00080   const char* const original_pos = pos;
00081   char c;
00082   // Read the next char into `c` and update `pos`. Returns false if there are
00083   // no more chars to read.
00084 #define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR()          \
00085   do {                                                  \
00086     if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
00087     c = *pos++;                                         \
00088   } while (0)
00089 
00090   const auto parse_digits = [&] {
00091     int digits = c - '0';
00092     // We do not want to overflow `digits` so we consume at most digits10
00093     // digits. If there are more digits the parsing will fail later on when the
00094     // digit doesn't match the expected characters.
00095     int num_digits = std::numeric_limits<int>::digits10;
00096     for (;;) {
00097       if (ABSL_PREDICT_FALSE(pos == end)) break;
00098       c = *pos++;
00099       if (!std::isdigit(c)) break;
00100       --num_digits;
00101       if (ABSL_PREDICT_FALSE(!num_digits)) break;
00102       digits = 10 * digits + c - '0';
00103     }
00104     return digits;
00105   };
00106 
00107   if (is_positional) {
00108     ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00109     if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
00110     conv->arg_position = parse_digits();
00111     assert(conv->arg_position > 0);
00112     if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
00113   }
00114 
00115   ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00116 
00117   // We should start with the basic flag on.
00118   assert(conv->flags.basic);
00119 
00120   // Any non alpha character makes this conversion not basic.
00121   // This includes flags (-+ #0), width (1-9, *) or precision (.).
00122   // All conversion characters and length modifiers are alpha characters.
00123   if (c < 'A') {
00124     conv->flags.basic = false;
00125 
00126     for (; c <= '0';) {
00127       // FIXME: We might be able to speed this up reusing the lookup table from
00128       // above. It might require changing Flags to be a plain integer where we
00129       // can |= a value.
00130       switch (c) {
00131         case '-':
00132           conv->flags.left = true;
00133           break;
00134         case '+':
00135           conv->flags.show_pos = true;
00136           break;
00137         case ' ':
00138           conv->flags.sign_col = true;
00139           break;
00140         case '#':
00141           conv->flags.alt = true;
00142           break;
00143         case '0':
00144           conv->flags.zero = true;
00145           break;
00146         default:
00147           goto flags_done;
00148       }
00149       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00150     }
00151 flags_done:
00152 
00153     if (c <= '9') {
00154       if (c >= '0') {
00155         int maybe_width = parse_digits();
00156         if (!is_positional && c == '$') {
00157           if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
00158           // Positional conversion.
00159           *next_arg = -1;
00160           conv->flags = Flags();
00161           conv->flags.basic = true;
00162           return ConsumeConversion<true>(original_pos, end, conv, next_arg);
00163         }
00164         conv->width.set_value(maybe_width);
00165       } else if (c == '*') {
00166         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00167         if (is_positional) {
00168           if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
00169           conv->width.set_from_arg(parse_digits());
00170           if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
00171           ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00172         } else {
00173           conv->width.set_from_arg(++*next_arg);
00174         }
00175       }
00176     }
00177 
00178     if (c == '.') {
00179       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00180       if (std::isdigit(c)) {
00181         conv->precision.set_value(parse_digits());
00182       } else if (c == '*') {
00183         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00184         if (is_positional) {
00185           if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
00186           conv->precision.set_from_arg(parse_digits());
00187           if (c != '$') return nullptr;
00188           ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00189         } else {
00190           conv->precision.set_from_arg(++*next_arg);
00191         }
00192       } else {
00193         conv->precision.set_value(0);
00194       }
00195     }
00196   }
00197 
00198   auto tag = GetTagForChar(c);
00199 
00200   if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
00201     if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
00202 
00203     // It is a length modifier.
00204     using str_format_internal::LengthMod;
00205     LengthMod length_mod = tag.as_length();
00206     ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00207     if (c == 'h' && length_mod.id() == LengthMod::h) {
00208       conv->length_mod = LengthMod::FromId(LengthMod::hh);
00209       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00210     } else if (c == 'l' && length_mod.id() == LengthMod::l) {
00211       conv->length_mod = LengthMod::FromId(LengthMod::ll);
00212       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00213     } else {
00214       conv->length_mod = length_mod;
00215     }
00216     tag = GetTagForChar(c);
00217     if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
00218   }
00219 
00220   assert(CheckFastPathSetting(*conv));
00221   (void)(&CheckFastPathSetting);
00222 
00223   conv->conv = tag.as_conv();
00224   if (!is_positional) conv->arg_position = ++*next_arg;
00225   return pos;
00226 }
00227 
00228 }  // namespace
00229 
00230 const char *ConsumeUnboundConversion(const char *p, const char *end,
00231                                      UnboundConversion *conv, int *next_arg) {
00232   if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
00233   return ConsumeConversion<false>(p, end, conv, next_arg);
00234 }
00235 
00236 struct ParsedFormatBase::ParsedFormatConsumer {
00237   explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
00238       : parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
00239 
00240   bool Append(string_view s) {
00241     if (s.empty()) return true;
00242 
00243     size_t text_end = AppendText(s);
00244 
00245     if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) {
00246       // Let's extend the existing text run.
00247       parsed->items_.back().text_end = text_end;
00248     } else {
00249       // Let's make a new text run.
00250       parsed->items_.push_back({false, text_end, {}});
00251     }
00252     return true;
00253   }
00254 
00255   bool ConvertOne(const UnboundConversion &conv, string_view s) {
00256     size_t text_end = AppendText(s);
00257     parsed->items_.push_back({true, text_end, conv});
00258     return true;
00259   }
00260 
00261   size_t AppendText(string_view s) {
00262     memcpy(data_pos, s.data(), s.size());
00263     data_pos += s.size();
00264     return static_cast<size_t>(data_pos - parsed->data_.get());
00265   }
00266 
00267   ParsedFormatBase *parsed;
00268   char* data_pos;
00269 };
00270 
00271 ParsedFormatBase::ParsedFormatBase(string_view format, bool allow_ignored,
00272                                    std::initializer_list<Conv> convs)
00273     : data_(format.empty() ? nullptr : new char[format.size()]) {
00274   has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) ||
00275                !MatchesConversions(allow_ignored, convs);
00276 }
00277 
00278 bool ParsedFormatBase::MatchesConversions(
00279     bool allow_ignored, std::initializer_list<Conv> convs) const {
00280   std::unordered_set<int> used;
00281   auto add_if_valid_conv = [&](int pos, char c) {
00282       if (static_cast<size_t>(pos) > convs.size() ||
00283           !Contains(convs.begin()[pos - 1], c))
00284         return false;
00285       used.insert(pos);
00286       return true;
00287   };
00288   for (const ConversionItem &item : items_) {
00289     if (!item.is_conversion) continue;
00290     auto &conv = item.conv;
00291     if (conv.precision.is_from_arg() &&
00292         !add_if_valid_conv(conv.precision.get_from_arg(), '*'))
00293       return false;
00294     if (conv.width.is_from_arg() &&
00295         !add_if_valid_conv(conv.width.get_from_arg(), '*'))
00296       return false;
00297     if (!add_if_valid_conv(conv.arg_position, conv.conv.Char())) return false;
00298   }
00299   return used.size() == convs.size() || allow_ignored;
00300 }
00301 
00302 }  // namespace str_format_internal
00303 }  // namespace absl


abseil_cpp
Author(s):
autogenerated on Wed Jun 19 2019 19:42:15