case_folding.hpp
Go to the documentation of this file.
1 // Copyright (C) 2020-2024 Jonathan Müller and lexy contributors
2 // SPDX-License-Identifier: BSL-1.0
3 
4 #ifndef LEXY_DSL_CASE_FOLDING_HPP_INCLUDED
5 #define LEXY_DSL_CASE_FOLDING_HPP_INCLUDED
6 
8 #include <lexy/code_point.hpp>
9 #include <lexy/dsl/base.hpp>
10 #include <lexy/dsl/literal.hpp>
11 
12 //=== generic rule impl ===//
13 namespace lexyd
14 {
15 template <template <typename> typename CaseFolding>
17 {
18  template <typename Reader>
19  using reader = CaseFolding<Reader>;
20 };
21 
22 template <typename Literal, template <typename> typename CaseFolding>
23 struct _cfl : token_base<_cfl<Literal, CaseFolding>>, _lit_base
24 {
25  static constexpr auto lit_max_char_count = Literal::lit_max_char_count;
26 
27  static constexpr auto lit_char_classes = Literal::lit_char_classes;
28 
30 
31  template <typename Encoding>
32  static constexpr auto lit_first_char() -> typename Encoding::char_type
33  {
34  return Literal::template lit_first_char<Encoding>();
35  }
36 
37  template <typename Trie>
38  static LEXY_CONSTEVAL std::size_t lit_insert(Trie& trie, std::size_t pos,
39  std::size_t char_class)
40  {
41  return Literal::lit_insert(trie, pos, char_class);
42  }
43 
44  template <typename Reader>
45  struct tp
46  {
48  typename Reader::marker end;
49 
50  constexpr explicit tp(const Reader& reader)
51  : impl(CaseFolding<Reader>{reader}), end(reader.current())
52  {}
53 
54  constexpr bool try_parse(Reader _reader)
55  {
56  CaseFolding<Reader> reader{_reader};
57  auto result = impl.try_parse(reader);
58  end = impl.end;
59  return result;
60  }
61 
62  template <typename Context>
63  constexpr void report_error(Context& context, Reader reader)
64  {
65  impl.report_error(context, CaseFolding<Reader>{reader});
66  }
67  };
68 };
69 } // namespace lexyd
70 
71 namespace lexy
72 {
73 template <typename Literal, template <typename> typename CaseFolding>
74 constexpr auto token_kind_of<lexy::dsl::_cfl<Literal, CaseFolding>> = lexy::literal_token_kind;
75 } // namespace lexy
76 
77 //=== ASCII ===//
78 namespace lexy
79 {
80 template <typename Reader>
81 struct _acfr // ascii case folding reader
82 {
83  Reader _impl;
84 
85  using encoding = typename Reader::encoding;
86  using iterator = typename Reader::iterator;
87  using marker = typename Reader::marker;
88 
89  constexpr auto peek() const -> typename encoding::int_type
90  {
91  auto c = _impl.peek();
92  if (encoding::to_int_type('A') <= c && c <= encoding::to_int_type('Z'))
93  return typename encoding::int_type(c + encoding::to_int_type('a' - 'A'));
94  else
95  return c;
96  }
97 
98  constexpr void bump()
99  {
100  _impl.bump();
101  }
102 
103  constexpr iterator position() const
104  {
105  return _impl.position();
106  }
107 
108  constexpr marker current() const noexcept
109  {
110  return _impl.current();
111  }
112  constexpr void reset(marker m) noexcept
113  {
114  _impl.reset(m);
115  }
116 };
117 } // namespace lexy
118 
119 namespace lexyd::ascii
120 {
121 struct _cf_dsl
122 {
123  template <typename Encoding>
124  static constexpr auto is_inplace = true;
125 
126  template <typename Reader>
128 
129  template <typename Literal>
130  constexpr auto operator()(Literal) const
131  {
132  static_assert(lexy::is_literal_rule<Literal>);
133  static_assert(std::is_void_v<typename Literal::lit_case_folding>, "cannot case fold twice");
135  }
136 };
137 
139 inline constexpr auto case_folding = _cf_dsl{};
140 } // namespace lexyd::ascii
141 
142 //=== Unicode ===//
143 namespace lexy
144 {
145 template <typename Reader>
146 struct _sucfr32 // simple unicode case folding reader, UTF-32
147 {
148  Reader _impl;
149 
150  constexpr explicit _sucfr32(Reader impl) : _impl(impl) {}
151 
152  using encoding = typename Reader::encoding;
153  using iterator = typename Reader::iterator;
154  using marker = typename Reader::marker;
155 
156  constexpr auto peek() const -> typename encoding::int_type
157  {
158  auto c = _impl.peek();
160  }
161 
162  constexpr void bump()
163  {
164  _impl.bump();
165  }
166 
167  constexpr iterator position() const
168  {
169  return _impl.position();
170  }
171 
172  constexpr marker current() const noexcept
173  {
174  return _impl.current();
175  }
176  constexpr void reset(marker m) noexcept
177  {
178  _impl.reset(m);
179  }
180 };
181 
182 template <typename Reader>
183 struct _sucfrm // simple unicode case folding reader, UTF-8 and UTF-16
184 {
185  using encoding = typename Reader::encoding;
186  using iterator = typename Reader::iterator;
187  using marker = typename Reader::marker;
188 
189  Reader _impl;
190  typename Reader::marker _cur_pos;
192  unsigned char _buffer_size;
193  unsigned char _buffer_cur;
194 
195  constexpr explicit _sucfrm(Reader impl)
197  {
198  _fill();
199  }
200 
201  constexpr void _fill()
202  {
203  _cur_pos = _impl.current();
204 
205  // We need to read the next code point at this point.
206  auto result = lexy::_detail::parse_code_point(_impl);
207  if (result.error == lexy::_detail::cp_error::success)
208  {
209  // Fill the buffer with the folded code point.
210  auto folded = lexy::simple_case_fold(lexy::code_point(result.cp));
211  _buffer_size = static_cast<unsigned char>(
212  lexy::_detail::encode_code_point<encoding>(folded.value(), _buffer, 4));
213  _buffer_cur = 0;
214  _impl.reset(result.end);
215  }
216  else
217  {
218  // Fill the buffer with the partial code point.
220  while (_impl.position() != result.end.position())
221  {
222  _buffer[_buffer_size] = static_cast<typename encoding::char_type>(_impl.peek());
223  ++_buffer_size;
224  _impl.bump();
225  }
226  }
227  }
228 
229  constexpr auto peek() const -> typename encoding::int_type
230  {
231  if (_buffer_cur == _buffer_size)
232  return encoding::eof();
233 
234  auto cur = _buffer[_buffer_cur];
235  return encoding::to_int_type(cur);
236  }
237 
238  constexpr void bump()
239  {
240  ++_buffer_cur;
241  if (_buffer_cur == _buffer_size)
242  _fill();
243  }
244 
245  constexpr iterator position() const
246  {
247  return current().position();
248  }
249 
250  constexpr marker current() const noexcept
251  {
252  // We only report a marker at a code point boundary.
253  // This has two consequences:
254  // 1. If we don't match a rule, the error token does not include any common start code
255  // units. That's actually nice, and makes it unnecessary to handle that situation in the
256  // error reporting. The only relevant difference is in the error token.
257  // 2. If the user wants to match partial code unit sequences, the behavior can become buggy.
258  // However, that's not really something we should worry about.
259  return _cur_pos;
260  }
261  constexpr void reset(marker m) noexcept
262  {
263  _impl.reset(m);
264  }
265 };
266 
267 template <typename Reader>
268 using _sucfr_for
269  = std::conditional_t<std::is_same_v<typename Reader::encoding, lexy::utf32_encoding>,
271 
272 template <typename Reader>
273 struct _sucfr : _sucfr_for<Reader>
274 {
276 };
277 } // namespace lexy
278 
279 namespace lexyd::unicode
280 {
281 struct _scf_dsl
282 {
283  template <typename Encoding>
284  static constexpr auto is_inplace = std::is_same_v<Encoding, lexy::utf32_encoding>;
285 
286  template <typename Reader>
288 
289  template <typename Literal>
290  constexpr auto operator()(Literal) const
291  {
292  static_assert(lexy::is_literal_rule<Literal>);
293  static_assert(std::is_void_v<typename Literal::lit_case_folding>, "cannot case fold twice");
295  }
296 };
297 
299 inline constexpr auto simple_case_folding = _scf_dsl{};
300 } // namespace lexyd::unicode
301 
302 #endif // LEXY_DSL_CASE_FOLDING_HPP_INCLUDED
303 
lexyd::_cfl::lit_first_char
static constexpr auto lit_first_char() -> typename Encoding::char_type
Definition: case_folding.hpp:32
code_point.hpp
LEXY_CONSTEVAL
#define LEXY_CONSTEVAL
Definition: config.hpp:98
lexy::_sucfr32::current
constexpr marker current() const noexcept
Definition: case_folding.hpp:172
lexy::simple_case_fold
LEXY_UNICODE_CONSTEXPR code_point simple_case_fold(code_point cp) noexcept
lexy::_sucfrm::position
constexpr iterator position() const
Definition: case_folding.hpp:245
lexyd::_cfl::lit_insert
static LEXY_CONSTEVAL std::size_t lit_insert(Trie &trie, std::size_t pos, std::size_t char_class)
Definition: case_folding.hpp:38
literal.hpp
lexy::_sucfrm::_buffer
encoding::char_type _buffer[4]
Definition: case_folding.hpp:191
magic_enum::char_type
string_view::value_type char_type
Definition: magic_enum.hpp:145
lexyd::ascii::_cf_dsl::is_inplace
static constexpr auto is_inplace
Definition: case_folding.hpp:124
lexy::_sucfrm::current
constexpr marker current() const noexcept
Definition: case_folding.hpp:250
lexy::_detail::parse_code_point
constexpr cp_result< Reader > parse_code_point(Reader reader)
Definition: _detail/code_point.hpp:142
lexyd::_cfl::lit_char_classes
static constexpr auto lit_char_classes
Definition: case_folding.hpp:27
lexyd::_cfl::tp::report_error
constexpr void report_error(Context &context, Reader reader)
Definition: case_folding.hpp:63
lexy::_acfr::peek
constexpr auto peek() const -> typename encoding::int_type
Definition: case_folding.hpp:89
lexyd::unicode
Definition: case_folding.hpp:279
lexy::_sucfr32::_sucfr32
constexpr _sucfr32(Reader impl)
Definition: case_folding.hpp:150
lexyd::ascii::_cf_dsl
Definition: case_folding.hpp:121
lexyd::_cfl::tp::end
Reader::marker end
Definition: case_folding.hpp:48
lexyd::_cfl::tp::try_parse
constexpr bool try_parse(Reader _reader)
Definition: case_folding.hpp:54
lexy::_sucfrm::_buffer_size
unsigned char _buffer_size
Definition: case_folding.hpp:192
lexy
Definition: any_ref.hpp:12
lexyd::_cfl
Definition: case_folding.hpp:23
lexy::_sucfr32::position
constexpr iterator position() const
Definition: case_folding.hpp:167
lexy::literal_token_kind
@ literal_token_kind
Definition: grammar.hpp:90
lexy::_sucfr_for
std::conditional_t< std::is_same_v< typename Reader::encoding, lexy::utf32_encoding >, _sucfr32< Reader >, _sucfrm< Reader > > _sucfr_for
Definition: case_folding.hpp:270
lexy::_acfr::iterator
typename Reader::iterator iterator
Definition: case_folding.hpp:86
lexy::_acfr::reset
constexpr void reset(marker m) noexcept
Definition: case_folding.hpp:112
lexy::code_point::value
constexpr auto value() const noexcept
Definition: code_point.hpp:26
lexy::_acfr::encoding
typename Reader::encoding encoding
Definition: case_folding.hpp:85
lexy::_acfr::position
constexpr iterator position() const
Definition: case_folding.hpp:103
lexy::_sucfrm::_buffer_cur
unsigned char _buffer_cur
Definition: case_folding.hpp:193
lexyd::unicode::_scf_dsl
Definition: case_folding.hpp:281
lexyd::_cfl::tp
Definition: case_folding.hpp:45
lexy::_acfr::current
constexpr marker current() const noexcept
Definition: case_folding.hpp:108
lexy::_acfr::marker
typename Reader::marker marker
Definition: case_folding.hpp:87
lexyd::token_base
Definition: dsl/token.hpp:42
lexyd::_cfl::lit_max_char_count
static constexpr auto lit_max_char_count
Definition: case_folding.hpp:25
lexy::_sucfrm::encoding
typename Reader::encoding encoding
Definition: case_folding.hpp:185
lexy::_sucfr32::bump
constexpr void bump()
Definition: case_folding.hpp:162
lexyd::unicode::simple_case_folding
constexpr auto simple_case_folding
Matches Literal with case insensitive Unicode characters (simple case folding).
Definition: case_folding.hpp:299
lexy::_sucfr32::peek
constexpr auto peek() const -> typename encoding::int_type
Definition: case_folding.hpp:156
lexy::_sucfrm::_fill
constexpr void _fill()
Definition: case_folding.hpp:201
lexy::_sucfrm
Definition: case_folding.hpp:183
lexy::_sucfr32::reset
constexpr void reset(marker m) noexcept
Definition: case_folding.hpp:176
lexyd::unicode::_scf_dsl::operator()
constexpr auto operator()(Literal) const
Definition: case_folding.hpp:290
lexy::_sucfr
Definition: case_folding.hpp:273
lexyd::_cfl_folding
Definition: case_folding.hpp:16
lexy::_sucfr32::iterator
typename Reader::iterator iterator
Definition: case_folding.hpp:153
lexy::_sucfrm::marker
typename Reader::marker marker
Definition: case_folding.hpp:187
base.hpp
lexy::_sucfrm::reset
constexpr void reset(marker m) noexcept
Definition: case_folding.hpp:261
lexy::_sucfrm::peek
constexpr auto peek() const -> typename encoding::int_type
Definition: case_folding.hpp:229
lexy::_acfr::bump
constexpr void bump()
Definition: case_folding.hpp:98
lexyd::ascii::case_folding
constexpr auto case_folding
Matches Literal with case insensitive ASCII characters.
Definition: case_folding.hpp:139
lexy::_sucfrm::_sucfrm
constexpr _sucfrm(Reader impl)
Definition: case_folding.hpp:195
lexy::_detail::cp_error::success
@ success
lexy::_sucfrm::_impl
Reader _impl
Definition: case_folding.hpp:189
lexy::_sucfrm::iterator
typename Reader::iterator iterator
Definition: case_folding.hpp:186
lexy::_acfr::_impl
Reader _impl
Definition: case_folding.hpp:83
code_point.hpp
lexy::_sucfr32::encoding
typename Reader::encoding encoding
Definition: case_folding.hpp:152
lexy::_sucfrm::_cur_pos
Reader::marker _cur_pos
Definition: case_folding.hpp:190
lexy::_acfr
Definition: case_folding.hpp:81
lexy::_sucfr32::_impl
Reader _impl
Definition: case_folding.hpp:148
lexy::_sucfr32
Definition: case_folding.hpp:146
lexy::_sucfr32::marker
typename Reader::marker marker
Definition: case_folding.hpp:154
lexyd::_cfl::tp::impl
lexy::token_parser_for< Literal, CaseFolding< Reader > > impl
Definition: case_folding.hpp:47
lexyd::_cfl::tp::tp
constexpr tp(const Reader &reader)
Definition: case_folding.hpp:50
lexy::token_parser_for
typename TokenRule::template tp< Reader > token_parser_for
Definition: dsl/base.hpp:242
lexyd::_lit_base
Definition: grammar.hpp:32
lexyd::ascii
Definition: ascii.hpp:13
lexyd
Definition: trace.hpp:22
lexy::_sucfrm::bump
constexpr void bump()
Definition: case_folding.hpp:238
lexyd::eof
constexpr auto eof
Matches EOF.
Definition: eof.hpp:72
lexyd::unicode::_scf_dsl::is_inplace
static constexpr auto is_inplace
Definition: case_folding.hpp:284
lexyd::_cfl_folding::reader
CaseFolding< Reader > reader
Definition: case_folding.hpp:19
lexyd::ascii::_cf_dsl::operator()
constexpr auto operator()(Literal) const
Definition: case_folding.hpp:130
lexy::code_point
A unicode code point.
Definition: code_point.hpp:20


behaviortree_cpp_v4
Author(s): Davide Faconti
autogenerated on Fri Dec 13 2024 03:19:16