ascii.hpp
Go to the documentation of this file.
1 // Copyright (C) 2020-2024 Jonathan Müller and lexy contributors
2 // SPDX-License-Identifier: BSL-1.0
3 
4 #ifndef LEXY_DSL_ASCII_HPP_INCLUDED
5 #define LEXY_DSL_ASCII_HPP_INCLUDED
6 
8 #include <lexy/dsl/base.hpp>
10 
11 // SWAR tricks inspired by https://garbagecollected.org/2017/01/31/four-column-ascii/.
12 
13 namespace lexyd::ascii
14 {
15 //=== control ===//
16 struct _control : char_class_base<_control>
17 {
19  {
20  return "ASCII.control";
21  }
22 
24  {
26  result.insert(0x00, 0x1F);
27  result.insert(0x7F);
28  return result;
29  }
30 
31  template <typename Encoding>
33  {
34  using char_type = typename Encoding::char_type;
35  constexpr auto mask = lexy::_detail::swar_fill_compl(char_type(0b11111));
36  constexpr auto expected = lexy::_detail::swar_fill(char_type(0b00'00000));
37 
38  // We're only checking for 0x00-0x1F, and allow a false negative for 0x7F.
39  return (c & mask) == expected;
40  }
41 };
42 inline constexpr auto control = _control{};
43 
44 //=== whitespace ===//
45 struct _blank : char_class_base<_blank>
46 {
48  {
49  return "ASCII.blank";
50  }
51 
53  {
55  result.insert(' ');
56  result.insert('\t');
57  return result;
58  }
59 };
60 inline constexpr auto blank = _blank{};
61 
62 struct _newline : char_class_base<_newline>
63 {
65  {
66  return "ASCII.newline";
67  }
68 
70  {
72  result.insert('\n');
73  result.insert('\r');
74  return result;
75  }
76 };
77 inline constexpr auto newline = _newline{};
78 
79 struct _other_space : char_class_base<_other_space>
80 {
82  {
83  return "ASCII.other-space";
84  }
85 
87  {
89  result.insert('\f');
90  result.insert('\v');
91  return result;
92  }
93 };
94 inline constexpr auto other_space = _other_space{};
95 
96 struct _space : char_class_base<_space>
97 {
99  {
100  return "ASCII.space";
101  }
102 
104  {
109  return result;
110  }
111 };
112 inline constexpr auto space = _space{};
113 
114 //=== alpha ===//
115 struct _lower : char_class_base<_lower>
116 {
118  {
119  return "ASCII.lower";
120  }
121 
123  {
125  result.insert('a', 'z');
126  return result;
127  }
128 
129  template <typename Encoding>
131  {
132  using char_type = typename Encoding::char_type;
133 
134  // All interesting characters are in column 4.
135  constexpr auto mask = lexy::_detail::swar_fill_compl(char_type(0b11111));
136  constexpr auto expected = lexy::_detail::swar_fill(char_type(0b11'00000));
137 
138  // But we need to eliminate ~ at the beginning and {|}~\x7F at the end.
139  constexpr auto offset_low = lexy::_detail::swar_fill(char_type(1));
140  constexpr auto offset_high = lexy::_detail::swar_fill(char_type(5));
141 
142  return ((c - offset_low) & mask) == expected && ((c + offset_high) & mask) == expected;
143  }
144 };
145 inline constexpr auto lower = _lower{};
146 
147 struct _upper : char_class_base<_upper>
148 {
149  static LEXY_CONSTEVAL auto char_class_name()
150  {
151  return "ASCII.upper";
152  }
153 
154  static LEXY_CONSTEVAL auto char_class_ascii()
155  {
156  lexy::_detail::ascii_set result;
157  result.insert('A', 'Z');
158  return result;
159  }
160 
161  template <typename Encoding>
162  static constexpr auto char_class_match_swar(lexy::_detail::swar_int c)
163  {
164  using char_type = typename Encoding::char_type;
165 
166  // All interesting characters are in column 3.
167  constexpr auto mask = lexy::_detail::swar_fill_compl(char_type(0b11111));
168  constexpr auto expected = lexy::_detail::swar_fill(char_type(0b10'00000));
169 
170  // But we need to eliminate @ at the beginning and [\]^_ at the end.
171  constexpr auto offset_low = lexy::_detail::swar_fill(char_type(1));
172  constexpr auto offset_high = lexy::_detail::swar_fill(char_type(5));
173 
174  return ((c - offset_low) & mask) == expected && ((c + offset_high) & mask) == expected;
175  }
176 };
177 inline constexpr auto upper = _upper{};
178 
179 struct _alpha : char_class_base<_alpha>
180 {
182  {
183  return "ASCII.alpha";
184  }
185 
187  {
189  result.insert('a', 'z');
190  result.insert('A', 'Z');
191  return result;
192  }
193 
194  template <typename Encoding>
196  {
197  // We're assuming lower characters are more common, so do the efficient check only for them.
198  return _lower::template char_class_match_swar<Encoding>(c);
199  }
200 };
201 inline constexpr auto alpha = _alpha{};
202 
203 struct _alphau : char_class_base<_alphau>
204 {
206  {
207  return "ASCII.alpha-underscore";
208  }
209 
211  {
213  result.insert('a', 'z');
214  result.insert('A', 'Z');
215  result.insert('_');
216  return result;
217  }
218 
219  template <typename Encoding>
221  {
222  // We're assuming alpha characters are more common, so do the efficient check only for them.
223  return _alpha::template char_class_match_swar<Encoding>(c);
224  }
225 };
226 inline constexpr auto alpha_underscore = _alphau{};
227 
228 //=== digit ===//
229 struct _digit : char_class_base<_digit>
230 {
232  {
233  return "ASCII.digit";
234  }
235 
237  {
239  result.insert('0', '9');
240  return result;
241  }
242 
243  template <typename Encoding>
245  {
246  using char_type = typename Encoding::char_type;
247 
248  // All interesting characters are in the second half of column 1.
249  constexpr auto mask = lexy::_detail::swar_fill_compl(char_type(0b01111));
250  constexpr auto expected = lexy::_detail::swar_fill(char_type(0b01'10000));
251 
252  // But we need to eliminate :;<=>? at the end.
253  constexpr auto offset_high = lexy::_detail::swar_fill(char_type(6));
254 
255  return (c & mask) == expected && ((c + offset_high) & mask) == expected;
256  }
257 };
258 inline constexpr auto digit = _digit{};
259 
260 struct _alnum : char_class_base<_alnum>
261 {
262  static LEXY_CONSTEVAL auto char_class_name()
263  {
264  return "ASCII.alpha-digit";
265  }
266 
267  static LEXY_CONSTEVAL auto char_class_ascii()
268  {
269  lexy::_detail::ascii_set result;
270  result.insert(_alpha::char_class_ascii());
271  result.insert(_digit::char_class_ascii());
272  return result;
273  }
274 
275  template <typename Encoding>
276  static constexpr auto char_class_match_swar(lexy::_detail::swar_int c)
277  {
278  // We're assuming alpha characters are more common, so do the efficient check only for them.
279  return _alpha::template char_class_match_swar<Encoding>(c);
280  }
281 };
282 inline constexpr auto alnum = _alnum{};
283 inline constexpr auto alpha_digit = _alnum{};
284 
285 struct _word : char_class_base<_word>
286 {
288  {
289  return "ASCII.word";
290  }
291 
293  {
297  return result;
298  }
299 
300  template <typename Encoding>
302  {
303  // We're assuming alphau characters are more common, so do the efficient check only for
304  // them.
305  return _alphau::template char_class_match_swar<Encoding>(c);
306  }
307 };
308 inline constexpr auto word = _word{};
309 inline constexpr auto alpha_digit_underscore = _word{};
310 
311 //=== punct ===//
312 struct _punct : char_class_base<_punct>
313 {
315  {
316  return "ASCII.punct";
317  }
318 
320  {
322  result.insert('!');
323  result.insert('"');
324  result.insert('#');
325  result.insert('$');
326  result.insert('%');
327  result.insert('&');
328  result.insert('\'');
329  result.insert('(');
330  result.insert(')');
331  result.insert('*');
332  result.insert('+');
333  result.insert(',');
334  result.insert('-');
335  result.insert('.');
336  result.insert('/');
337  result.insert(':');
338  result.insert(';');
339  result.insert('<');
340  result.insert('=');
341  result.insert('>');
342  result.insert('?');
343  result.insert('@');
344  result.insert('[');
345  result.insert('\\');
346  result.insert(']');
347  result.insert('^');
348  result.insert('_');
349  result.insert('`');
350  result.insert('{');
351  result.insert('|');
352  result.insert('}');
353  result.insert('~');
354  return result;
355  }
356 };
357 inline constexpr auto punct = _punct{};
358 
359 //=== categories ===//
360 struct _graph : char_class_base<_graph>
361 {
363  {
364  return "ASCII.graph";
365  }
366 
368  {
370  result.insert(0x21, 0x7E);
371  return result;
372  }
373 
374  template <typename Encoding>
376  {
377  using char_type = typename Encoding::char_type;
378 
379  // First check that we have only ASCII, but shifted by one, so we also exclude 0x7F.
380  constexpr auto ascii_mask = lexy::_detail::swar_fill_compl(char_type(0b11'11111));
381  constexpr auto ascii_offset = lexy::_detail::swar_fill(char_type(1));
382  constexpr auto ascii_expected = lexy::_detail::swar_fill(char_type(0));
383  if (((c + ascii_offset) & ascii_mask) != ascii_expected)
384  return false;
385 
386  // The above check also included 0xFF for single byte encodings where it overflowed,
387  // so do a separate check in those cases.
388  if constexpr (sizeof(char_type) == 1)
389  {
390  if ((c & ascii_mask) != ascii_expected)
391  return false;
392  }
393 
394  // Then we must not have a character in column 0, or space.
395  // If we subtract one we turn 0x21-0x01 into column 0 and 0x00 to a value definitely not in
396  // column 0, so need to check both.
397  constexpr auto mask = lexy::_detail::swar_fill_compl(char_type(0b11111));
398  constexpr auto offset_low = lexy::_detail::swar_fill(char_type(1));
399  return !lexy::_detail::swar_has_zero<char_type>(c & mask)
400  && !lexy::_detail::swar_has_zero<char_type>((c - offset_low) & mask);
401  }
402 };
403 inline constexpr auto graph = _graph{};
404 
405 struct _print : char_class_base<_print>
406 {
407  static LEXY_CONSTEVAL auto char_class_name()
408  {
409  return "ASCII.print";
410  }
411 
412  static LEXY_CONSTEVAL auto char_class_ascii()
413  {
414  lexy::_detail::ascii_set result;
415  result.insert(0x20, 0x7E);
416  return result;
417  }
418 
419  template <typename Encoding>
420  static constexpr auto char_class_match_swar(lexy::_detail::swar_int c)
421  {
422  using char_type = typename Encoding::char_type;
423 
424  // First check that we have only ASCII, but shifted by one, so we also exclude 0x7F.
425  constexpr auto ascii_mask = lexy::_detail::swar_fill_compl(char_type(0b11'11111));
426  constexpr auto ascii_offset = lexy::_detail::swar_fill(char_type(1));
427  constexpr auto ascii_expected = lexy::_detail::swar_fill(char_type(0));
428  if (((c + ascii_offset) & ascii_mask) != ascii_expected)
429  return false;
430 
431  // The above check also included 0xFF for single byte encodings where it overflowed,
432  // so do a separate check in those cases.
433  if constexpr (sizeof(char_type) == 1)
434  {
435  if ((c & ascii_mask) != ascii_expected)
436  return false;
437  }
438 
439  // Then we must not have a character in column 0.
440  constexpr auto mask = lexy::_detail::swar_fill_compl(char_type(0b11111));
441  return !lexy::_detail::swar_has_zero<char_type>(c & mask);
442  }
443 };
444 inline constexpr auto print = _print{};
445 
446 struct _char : char_class_base<_char>
447 {
449  {
450  return "ASCII";
451  }
452 
454  {
456  result.insert(0x00, 0x7F);
457  return result;
458  }
459 
460  template <typename Encoding>
462  {
463  using char_type = typename Encoding::char_type;
464 
465  constexpr auto mask = lexy::_detail::swar_fill_compl(char_type(0b11'11111));
466  constexpr auto expected = lexy::_detail::swar_fill(char_type(0));
467 
468  return (c & mask) == expected;
469  }
470 };
471 inline constexpr auto character = _char{};
472 } // namespace lexyd::ascii
473 
474 namespace lexyd::ascii
475 {
476 template <char... C>
477 struct _alt : char_class_base<_alt<C...>>
478 {
479  static_assert(sizeof...(C) > 0);
480 
481  static LEXY_CONSTEVAL auto char_class_name()
482  {
483  return lexy::_detail::type_string<char, C...>::template c_str<char>;
484  }
485 
486  static LEXY_CONSTEVAL auto char_class_ascii()
487  {
488  lexy::_detail::ascii_set result;
489  (result.insert(C), ...);
490  return result;
491  }
492 };
493 
494 template <typename CharT, CharT... C>
495 struct _one_of
496 {
497  static_assert((std::is_same_v<CharT, char> && ... && lexy::_detail::is_ascii(C)),
498  "only ASCII characters are supported");
499 
500  using rule = _alt<C...>;
501 };
502 
503 #if LEXY_HAS_NTTP
505 template <lexy::_detail::string_literal Str>
506 constexpr auto one_of = typename lexy::_detail::to_type_string<_one_of, Str>::rule{};
507 #endif
508 
509 #define LEXY_ASCII_ONE_OF(Str) \
510  LEXY_NTTP_STRING(::lexyd::ascii::_one_of, Str)::rule {}
511 } // namespace lexyd::ascii
512 
513 #endif // LEXY_DSL_ASCII_HPP_INCLUDED
514 
LEXY_CONSTEVAL
#define LEXY_CONSTEVAL
Definition: config.hpp:98
lexyd::ascii::_blank::char_class_name
static LEXY_CONSTEVAL auto char_class_name()
Definition: ascii.hpp:47
lexyd::ascii::_control::char_class_ascii
static LEXY_CONSTEVAL auto char_class_ascii()
Definition: ascii.hpp:23
lexyd::ascii::_char::char_class_ascii
static LEXY_CONSTEVAL auto char_class_ascii()
Definition: ascii.hpp:453
lexyd::ascii::_alphau::char_class_match_swar
static constexpr auto char_class_match_swar(lexy::_detail::swar_int c)
Definition: ascii.hpp:220
magic_enum::char_type
string_view::value_type char_type
Definition: magic_enum.hpp:145
lexyd::ascii::_other_space::char_class_name
static LEXY_CONSTEVAL auto char_class_name()
Definition: ascii.hpp:81
lexyd::ascii::_graph::char_class_match_swar
static constexpr auto char_class_match_swar(lexy::_detail::swar_int c)
Definition: ascii.hpp:375
lexyd::ascii::_other_space::char_class_ascii
static LEXY_CONSTEVAL auto char_class_ascii()
Definition: ascii.hpp:86
lexyd::ascii::alnum
constexpr auto alnum
Definition: ascii.hpp:282
lexyd::ascii::alpha_digit_underscore
constexpr auto alpha_digit_underscore
Definition: ascii.hpp:309
lexyd::ascii::alpha_underscore
constexpr auto alpha_underscore
Definition: ascii.hpp:226
lexyd::ascii::_alphau
Definition: ascii.hpp:203
lexyd::ascii::punct
constexpr auto punct
Definition: ascii.hpp:357
lexyd::ascii::_punct::char_class_name
static LEXY_CONSTEVAL auto char_class_name()
Definition: ascii.hpp:314
lexyd::ascii::_word::char_class_name
static LEXY_CONSTEVAL auto char_class_name()
Definition: ascii.hpp:287
lexyd::ascii::_blank
Definition: ascii.hpp:45
lexyd::ascii::newline
constexpr auto newline
Definition: ascii.hpp:77
lexy::_detail::swar_int
std::uintmax_t swar_int
Definition: swar.hpp:20
lexyd::ascii::_alphau::char_class_name
static LEXY_CONSTEVAL auto char_class_name()
Definition: ascii.hpp:205
lexyd::ascii::_alphau::char_class_ascii
static LEXY_CONSTEVAL auto char_class_ascii()
Definition: ascii.hpp:210
lexyd::ascii::_char
Definition: ascii.hpp:446
lexyd::ascii::_control::char_class_name
static LEXY_CONSTEVAL auto char_class_name()
Definition: ascii.hpp:18
lexy::_detail::ascii_set::insert
constexpr void insert(int c)
Definition: char_class.hpp:54
char_class.hpp
lexyd::ascii::_digit::char_class_name
static LEXY_CONSTEVAL auto char_class_name()
Definition: ascii.hpp:231
lexyd::ascii::alpha
constexpr auto alpha
Definition: ascii.hpp:201
lexyd::ascii::_char::char_class_name
static LEXY_CONSTEVAL auto char_class_name()
Definition: ascii.hpp:448
lexyd::ascii::_newline::char_class_name
static LEXY_CONSTEVAL auto char_class_name()
Definition: ascii.hpp:64
lexyd::ascii::_upper
Definition: ascii.hpp:147
lexyd::ascii::_word
Definition: ascii.hpp:285
lexyd::ascii::_alpha
Definition: ascii.hpp:179
lexyd::ascii::_char::char_class_match_swar
static constexpr auto char_class_match_swar(lexy::_detail::swar_int c)
Definition: ascii.hpp:461
lexyd::ascii::_word::char_class_ascii
static LEXY_CONSTEVAL auto char_class_ascii()
Definition: ascii.hpp:292
lexyd::ascii::_blank::char_class_ascii
static LEXY_CONSTEVAL auto char_class_ascii()
Definition: ascii.hpp:52
lexyd::ascii::_control
Definition: ascii.hpp:16
lexyd::ascii::_other_space
Definition: ascii.hpp:79
lexyd::ascii::word
constexpr auto word
Definition: ascii.hpp:308
lexyd::ascii::_digit::char_class_ascii
static LEXY_CONSTEVAL auto char_class_ascii()
Definition: ascii.hpp:236
lexyd::ascii::_space::char_class_name
static LEXY_CONSTEVAL auto char_class_name()
Definition: ascii.hpp:98
lexyd::ascii::_graph::char_class_name
static LEXY_CONSTEVAL auto char_class_name()
Definition: ascii.hpp:362
lexyd::ascii::other_space
constexpr auto other_space
Definition: ascii.hpp:94
lexyd::ascii::_digit::char_class_match_swar
static constexpr auto char_class_match_swar(lexy::_detail::swar_int c)
Definition: ascii.hpp:244
lexyd::char_class_base
Definition: char_class.hpp:170
lexyd::ascii::_punct::char_class_ascii
static LEXY_CONSTEVAL auto char_class_ascii()
Definition: ascii.hpp:319
lexyd::ascii::_graph
Definition: ascii.hpp:360
lexyd::ascii::_print
Definition: ascii.hpp:405
lexyd::ascii::_lower::char_class_name
static LEXY_CONSTEVAL auto char_class_name()
Definition: ascii.hpp:117
lexyd::ascii::_space::char_class_ascii
static LEXY_CONSTEVAL auto char_class_ascii()
Definition: ascii.hpp:103
base.hpp
lexyd::ascii::_newline
Definition: ascii.hpp:62
lexyd::ascii::_word::char_class_match_swar
static constexpr auto char_class_match_swar(lexy::_detail::swar_int c)
Definition: ascii.hpp:301
lexyd::ascii::_alpha::char_class_ascii
static LEXY_CONSTEVAL auto char_class_ascii()
Definition: ascii.hpp:186
lexyd::ascii::_control::char_class_match_swar
static constexpr auto char_class_match_swar(lexy::_detail::swar_int c)
Definition: ascii.hpp:32
lexyd::ascii::alpha_digit
constexpr auto alpha_digit
Definition: ascii.hpp:283
lexyd::ascii::_alnum
Definition: ascii.hpp:260
nttp_string.hpp
lexyd::ascii::_graph::char_class_ascii
static LEXY_CONSTEVAL auto char_class_ascii()
Definition: ascii.hpp:367
lexyd::ascii::_space
Definition: ascii.hpp:96
lexyd::ascii::control
constexpr auto control
Definition: ascii.hpp:42
lexyd::ascii::_lower
Definition: ascii.hpp:115
lexyd::ascii::_alpha::char_class_match_swar
static constexpr auto char_class_match_swar(lexy::_detail::swar_int c)
Definition: ascii.hpp:195
lexyd::ascii::_lower::char_class_match_swar
static constexpr auto char_class_match_swar(lexy::_detail::swar_int c)
Definition: ascii.hpp:130
lexyd::ascii::_alpha::char_class_name
static LEXY_CONSTEVAL auto char_class_name()
Definition: ascii.hpp:181
lexyd::ascii::space
constexpr auto space
Definition: ascii.hpp:112
lexyd::ascii::_digit
Definition: ascii.hpp:229
lexyd::ascii::upper
constexpr auto upper
Definition: ascii.hpp:177
lexyd::ascii::blank
constexpr auto blank
Definition: ascii.hpp:60
lexyd::ascii::print
constexpr auto print
Definition: ascii.hpp:444
lexyd::ascii
Definition: ascii.hpp:13
lexyd::ascii::_newline::char_class_ascii
static LEXY_CONSTEVAL auto char_class_ascii()
Definition: ascii.hpp:69
lexy::_detail::swar_fill_compl
constexpr swar_int swar_fill_compl(CharT _c)
Definition: swar.hpp:61
lexyd::ascii::_lower::char_class_ascii
static LEXY_CONSTEVAL auto char_class_ascii()
Definition: ascii.hpp:122
lexy::_detail::ascii_set
Definition: char_class.hpp:14
lexy::_detail::swar_fill
constexpr swar_int swar_fill(CharT _c)
Definition: swar.hpp:46
lexyd::ascii::_punct
Definition: ascii.hpp:312


behaviortree_cpp_v4
Author(s): Davide Faconti
autogenerated on Fri Dec 13 2024 03:19:16