00001 #include "absl/strings/internal/str_format/parser.h"
00002
00003 #include <assert.h>
00004 #include <string.h>
00005 #include <wchar.h>
00006 #include <cctype>
00007 #include <cstdint>
00008
00009 #include <algorithm>
00010 #include <initializer_list>
00011 #include <limits>
00012 #include <ostream>
00013 #include <string>
00014 #include <unordered_set>
00015
00016 namespace absl {
00017 namespace str_format_internal {
00018
00019 using CC = ConversionChar::Id;
00020 using LM = LengthMod::Id;
00021 ABSL_CONST_INIT const ConvTag kTags[256] = {
00022 {}, {}, {}, {}, {}, {}, {}, {},
00023 {}, {}, {}, {}, {}, {}, {}, {},
00024 {}, {}, {}, {}, {}, {}, {}, {},
00025 {}, {}, {}, {}, {}, {}, {}, {},
00026 {}, {}, {}, {}, {}, {}, {}, {},
00027 {}, {}, {}, {}, {}, {}, {}, {},
00028 {}, {}, {}, {}, {}, {}, {}, {},
00029 {}, {}, {}, {}, {}, {}, {}, {},
00030 {}, CC::A, {}, CC::C, {}, CC::E, CC::F, CC::G,
00031 {}, {}, {}, {}, LM::L, {}, {}, {},
00032 {}, {}, {}, CC::S, {}, {}, {}, {},
00033 CC::X, {}, {}, {}, {}, {}, {}, {},
00034 {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g,
00035 LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o,
00036 CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {},
00037 CC::x, {}, LM::z, {}, {}, {}, {}, {},
00038 {}, {}, {}, {}, {}, {}, {}, {},
00039 {}, {}, {}, {}, {}, {}, {}, {},
00040 {}, {}, {}, {}, {}, {}, {}, {},
00041 {}, {}, {}, {}, {}, {}, {}, {},
00042 {}, {}, {}, {}, {}, {}, {}, {},
00043 {}, {}, {}, {}, {}, {}, {}, {},
00044 {}, {}, {}, {}, {}, {}, {}, {},
00045 {}, {}, {}, {}, {}, {}, {}, {},
00046 {}, {}, {}, {}, {}, {}, {}, {},
00047 {}, {}, {}, {}, {}, {}, {}, {},
00048 {}, {}, {}, {}, {}, {}, {}, {},
00049 {}, {}, {}, {}, {}, {}, {}, {},
00050 {}, {}, {}, {}, {}, {}, {}, {},
00051 {}, {}, {}, {}, {}, {}, {}, {},
00052 {}, {}, {}, {}, {}, {}, {}, {},
00053 {}, {}, {}, {}, {}, {}, {}, {},
00054 };
00055
00056 namespace {
00057
00058 bool CheckFastPathSetting(const UnboundConversion& conv) {
00059 bool should_be_basic = !conv.flags.left &&
00060 !conv.flags.show_pos &&
00061 !conv.flags.sign_col &&
00062 !conv.flags.alt &&
00063 !conv.flags.zero &&
00064 (conv.width.value() == -1) &&
00065 (conv.precision.value() == -1);
00066 if (should_be_basic != conv.flags.basic) {
00067 fprintf(stderr,
00068 "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
00069 "width=%d precision=%d\n",
00070 conv.flags.basic, conv.flags.left, conv.flags.show_pos,
00071 conv.flags.sign_col, conv.flags.alt, conv.flags.zero,
00072 conv.width.value(), conv.precision.value());
00073 }
00074 return should_be_basic == conv.flags.basic;
00075 }
00076
00077 template <bool is_positional>
00078 const char *ConsumeConversion(const char *pos, const char *const end,
00079 UnboundConversion *conv, int *next_arg) {
00080 const char* const original_pos = pos;
00081 char c;
00082
00083
00084 #define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
00085 do { \
00086 if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
00087 c = *pos++; \
00088 } while (0)
00089
00090 const auto parse_digits = [&] {
00091 int digits = c - '0';
00092
00093
00094
00095 int num_digits = std::numeric_limits<int>::digits10;
00096 for (;;) {
00097 if (ABSL_PREDICT_FALSE(pos == end)) break;
00098 c = *pos++;
00099 if (!std::isdigit(c)) break;
00100 --num_digits;
00101 if (ABSL_PREDICT_FALSE(!num_digits)) break;
00102 digits = 10 * digits + c - '0';
00103 }
00104 return digits;
00105 };
00106
00107 if (is_positional) {
00108 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00109 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
00110 conv->arg_position = parse_digits();
00111 assert(conv->arg_position > 0);
00112 if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
00113 }
00114
00115 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00116
00117
00118 assert(conv->flags.basic);
00119
00120
00121
00122
00123 if (c < 'A') {
00124 conv->flags.basic = false;
00125
00126 for (; c <= '0';) {
00127
00128
00129
00130 switch (c) {
00131 case '-':
00132 conv->flags.left = true;
00133 break;
00134 case '+':
00135 conv->flags.show_pos = true;
00136 break;
00137 case ' ':
00138 conv->flags.sign_col = true;
00139 break;
00140 case '#':
00141 conv->flags.alt = true;
00142 break;
00143 case '0':
00144 conv->flags.zero = true;
00145 break;
00146 default:
00147 goto flags_done;
00148 }
00149 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00150 }
00151 flags_done:
00152
00153 if (c <= '9') {
00154 if (c >= '0') {
00155 int maybe_width = parse_digits();
00156 if (!is_positional && c == '$') {
00157 if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
00158
00159 *next_arg = -1;
00160 conv->flags = Flags();
00161 conv->flags.basic = true;
00162 return ConsumeConversion<true>(original_pos, end, conv, next_arg);
00163 }
00164 conv->width.set_value(maybe_width);
00165 } else if (c == '*') {
00166 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00167 if (is_positional) {
00168 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
00169 conv->width.set_from_arg(parse_digits());
00170 if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
00171 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00172 } else {
00173 conv->width.set_from_arg(++*next_arg);
00174 }
00175 }
00176 }
00177
00178 if (c == '.') {
00179 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00180 if (std::isdigit(c)) {
00181 conv->precision.set_value(parse_digits());
00182 } else if (c == '*') {
00183 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00184 if (is_positional) {
00185 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
00186 conv->precision.set_from_arg(parse_digits());
00187 if (c != '$') return nullptr;
00188 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00189 } else {
00190 conv->precision.set_from_arg(++*next_arg);
00191 }
00192 } else {
00193 conv->precision.set_value(0);
00194 }
00195 }
00196 }
00197
00198 auto tag = GetTagForChar(c);
00199
00200 if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
00201 if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
00202
00203
00204 using str_format_internal::LengthMod;
00205 LengthMod length_mod = tag.as_length();
00206 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00207 if (c == 'h' && length_mod.id() == LengthMod::h) {
00208 conv->length_mod = LengthMod::FromId(LengthMod::hh);
00209 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00210 } else if (c == 'l' && length_mod.id() == LengthMod::l) {
00211 conv->length_mod = LengthMod::FromId(LengthMod::ll);
00212 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
00213 } else {
00214 conv->length_mod = length_mod;
00215 }
00216 tag = GetTagForChar(c);
00217 if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
00218 }
00219
00220 assert(CheckFastPathSetting(*conv));
00221 (void)(&CheckFastPathSetting);
00222
00223 conv->conv = tag.as_conv();
00224 if (!is_positional) conv->arg_position = ++*next_arg;
00225 return pos;
00226 }
00227
00228 }
00229
00230 const char *ConsumeUnboundConversion(const char *p, const char *end,
00231 UnboundConversion *conv, int *next_arg) {
00232 if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
00233 return ConsumeConversion<false>(p, end, conv, next_arg);
00234 }
00235
00236 struct ParsedFormatBase::ParsedFormatConsumer {
00237 explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
00238 : parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
00239
00240 bool Append(string_view s) {
00241 if (s.empty()) return true;
00242
00243 size_t text_end = AppendText(s);
00244
00245 if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) {
00246
00247 parsed->items_.back().text_end = text_end;
00248 } else {
00249
00250 parsed->items_.push_back({false, text_end, {}});
00251 }
00252 return true;
00253 }
00254
00255 bool ConvertOne(const UnboundConversion &conv, string_view s) {
00256 size_t text_end = AppendText(s);
00257 parsed->items_.push_back({true, text_end, conv});
00258 return true;
00259 }
00260
00261 size_t AppendText(string_view s) {
00262 memcpy(data_pos, s.data(), s.size());
00263 data_pos += s.size();
00264 return static_cast<size_t>(data_pos - parsed->data_.get());
00265 }
00266
00267 ParsedFormatBase *parsed;
00268 char* data_pos;
00269 };
00270
00271 ParsedFormatBase::ParsedFormatBase(string_view format, bool allow_ignored,
00272 std::initializer_list<Conv> convs)
00273 : data_(format.empty() ? nullptr : new char[format.size()]) {
00274 has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) ||
00275 !MatchesConversions(allow_ignored, convs);
00276 }
00277
00278 bool ParsedFormatBase::MatchesConversions(
00279 bool allow_ignored, std::initializer_list<Conv> convs) const {
00280 std::unordered_set<int> used;
00281 auto add_if_valid_conv = [&](int pos, char c) {
00282 if (static_cast<size_t>(pos) > convs.size() ||
00283 !Contains(convs.begin()[pos - 1], c))
00284 return false;
00285 used.insert(pos);
00286 return true;
00287 };
00288 for (const ConversionItem &item : items_) {
00289 if (!item.is_conversion) continue;
00290 auto &conv = item.conv;
00291 if (conv.precision.is_from_arg() &&
00292 !add_if_valid_conv(conv.precision.get_from_arg(), '*'))
00293 return false;
00294 if (conv.width.is_from_arg() &&
00295 !add_if_valid_conv(conv.width.get_from_arg(), '*'))
00296 return false;
00297 if (!add_if_valid_conv(conv.arg_position, conv.conv.Char())) return false;
00298 }
00299 return used.size() == convs.size() || allow_ignored;
00300 }
00301
00302 }
00303 }