case_folding.hpp
Go to the documentation of this file.
1 // Copyright (C) 2020-2023 Jonathan Müller and lexy contributors
2 // SPDX-License-Identifier: BSL-1.0
3 
4 #ifndef LEXY_DSL_CASE_FOLDING_HPP_INCLUDED
5 #define LEXY_DSL_CASE_FOLDING_HPP_INCLUDED
6 
8 #include <lexy/code_point.hpp>
9 #include <lexy/dsl/base.hpp>
10 #include <lexy/dsl/literal.hpp>
11 
12 //=== generic rule impl ===//
13 namespace lexyd
14 {
15 template <template <typename> typename CaseFolding>
17 {
18  template <typename Reader>
19  using reader = CaseFolding<Reader>;
20 };
21 
22 template <typename Literal, template <typename> typename CaseFolding>
23 struct _cfl : token_base<_cfl<Literal, CaseFolding>>, _lit_base
24 {
25  static constexpr auto lit_max_char_count = Literal::lit_max_char_count;
26 
27  static constexpr auto lit_char_classes = Literal::lit_char_classes;
28 
30 
31  template <typename Encoding>
32  static constexpr auto lit_first_char() -> typename Encoding::char_type
33  {
34  return Literal::template lit_first_char<Encoding>();
35  }
36 
37  template <typename Trie>
38  static LEXY_CONSTEVAL std::size_t lit_insert(Trie& trie, std::size_t pos,
39  std::size_t char_class)
40  {
41  return Literal::lit_insert(trie, pos, char_class);
42  }
43 
44  template <typename Reader>
45  struct tp
46  {
48  typename Reader::iterator end;
49 
50  constexpr explicit tp(const Reader& reader)
51  : impl(CaseFolding<Reader>{reader}), end(reader.position())
52  {}
53 
54  constexpr bool try_parse(Reader _reader)
55  {
56  CaseFolding<Reader> reader{_reader};
57  auto result = impl.try_parse(reader);
58  end = impl.end;
59  return result;
60  }
61 
62  template <typename Context>
63  constexpr void report_error(Context& context, Reader reader)
64  {
65  impl.report_error(context, CaseFolding<Reader>{reader});
66  }
67  };
68 };
69 } // namespace lexyd
70 
71 namespace lexy
72 {
73 template <typename Literal, template <typename> typename CaseFolding>
74 constexpr auto token_kind_of<lexy::dsl::_cfl<Literal, CaseFolding>> = lexy::literal_token_kind;
75 } // namespace lexy
76 
77 //=== ASCII ===//
78 namespace lexy
79 {
80 template <typename Reader>
81 struct _acfr // ascii case folding reader
82 {
83  Reader _impl;
84 
85  using encoding = typename Reader::encoding;
86  using iterator = typename Reader::iterator;
87 
88  constexpr auto peek() const -> typename encoding::int_type
89  {
90  auto c = _impl.peek();
91  if (encoding::to_int_type('A') <= c && c <= encoding::to_int_type('Z'))
92  return typename encoding::int_type(c + encoding::to_int_type('a' - 'A'));
93  else
94  return c;
95  }
96 
97  constexpr void bump()
98  {
99  _impl.bump();
100  }
101 
102  constexpr iterator position() const
103  {
104  return _impl.position();
105  }
106 
107  constexpr void set_position(iterator new_pos)
108  {
109  _impl.set_position(new_pos);
110  }
111 };
112 } // namespace lexy
113 
114 namespace lexyd::ascii
115 {
116 struct _cf_dsl
117 {
118  template <typename Encoding>
119  static constexpr auto is_inplace = true;
120 
121  template <typename Reader>
123 
124  template <typename Literal>
125  constexpr auto operator()(Literal) const
126  {
127  static_assert(lexy::is_literal_rule<Literal>);
128  static_assert(std::is_void_v<typename Literal::lit_case_folding>, "cannot case fold twice");
130  }
131 };
132 
134 inline constexpr auto case_folding = _cf_dsl{};
135 } // namespace lexyd::ascii
136 
137 //=== Unicode ===//
138 namespace lexy
139 {
140 template <typename Reader>
141 struct _sucfr32 // simple unicode case folding reader, UTF-32
142 {
143  Reader _impl;
144 
145  constexpr explicit _sucfr32(Reader impl) : _impl(impl) {}
146 
147  using encoding = typename Reader::encoding;
148  using iterator = typename Reader::iterator;
149 
150  constexpr auto peek() const -> typename encoding::int_type
151  {
152  auto c = _impl.peek();
154  }
155 
156  constexpr void bump()
157  {
158  _impl.bump();
159  }
160 
161  constexpr iterator position() const
162  {
163  return _impl.position();
164  }
165 
166  constexpr void set_position(iterator new_pos)
167  {
168  _impl.set_position(new_pos);
169  }
170 };
171 
172 template <typename Reader>
173 struct _sucfrm // simple unicode case folding reader, UTF-8 and UTF-16
174 {
175  using encoding = typename Reader::encoding;
176  using iterator = typename Reader::iterator;
177 
178  Reader _impl;
179  typename Reader::iterator _cur_pos;
181  unsigned char _buffer_size;
182  unsigned char _buffer_cur;
183 
184  constexpr explicit _sucfrm(Reader impl)
186  {
187  _fill();
188  }
189 
190  constexpr void _fill()
191  {
192  _cur_pos = _impl.position();
193 
194  // We need to read the next code point at this point.
195  auto result = lexy::_detail::parse_code_point(_impl);
196  if (result.error == lexy::_detail::cp_error::success)
197  {
198  // Fill the buffer with the folded code point.
199  auto folded = lexy::simple_case_fold(lexy::code_point(result.cp));
200  _buffer_size = static_cast<unsigned char>(
201  lexy::_detail::encode_code_point<encoding>(folded.value(), _buffer, 4));
202  _buffer_cur = 0;
203  _impl.set_position(result.end);
204  }
205  else
206  {
207  // Fill the buffer with the partial code point.
209  while (_impl.position() != result.end)
210  {
211  _buffer[_buffer_size] = static_cast<typename encoding::char_type>(_impl.peek());
212  ++_buffer_size;
213  _impl.bump();
214  }
215  }
216  }
217 
218  constexpr auto peek() const -> typename encoding::int_type
219  {
220  if (_buffer_cur == _buffer_size)
221  return encoding::eof();
222 
223  auto cur = _buffer[_buffer_cur];
224  return encoding::to_int_type(cur);
225  }
226 
227  constexpr void bump()
228  {
229  ++_buffer_cur;
230  if (_buffer_cur == _buffer_size)
231  _fill();
232  }
233 
234  constexpr iterator position() const
235  {
236  // We only report the position at a code point boundary.
237  // This has two consequences:
238  // 1. If we don't match a rule, the error token does not include any common start code
239  // units.
240  // That's actually nice, and makes it unnecessary to handle that situation in the error
241  // reporting. The only relevant difference is in the error token.
242  // 2. If the user wants to match partial code unit sequences, the behavior can become buggy.
243  // However, that's not really something we should worry about.
244  return _cur_pos;
245  }
246 
247  constexpr void set_position(iterator new_pos)
248  {
249  // It's a code point boundary, so reset.
250  _impl.set_position(new_pos);
251  _fill();
252  }
253 };
254 
255 template <typename Reader>
256 using _sucfr_for
257  = std::conditional_t<std::is_same_v<typename Reader::encoding, lexy::utf32_encoding>,
259 
260 template <typename Reader>
261 struct _sucfr : _sucfr_for<Reader>
262 {
264 };
265 } // namespace lexy
266 
267 namespace lexyd::unicode
268 {
269 struct _scf_dsl
270 {
271  template <typename Encoding>
272  static constexpr auto is_inplace = std::is_same_v<Encoding, lexy::utf32_encoding>;
273 
274  template <typename Reader>
276 
277  template <typename Literal>
278  constexpr auto operator()(Literal) const
279  {
280  static_assert(lexy::is_literal_rule<Literal>);
281  static_assert(std::is_void_v<typename Literal::lit_case_folding>, "cannot case fold twice");
283  }
284 };
285 
287 inline constexpr auto simple_case_folding = _scf_dsl{};
288 } // namespace lexyd::unicode
289 
290 #endif // LEXY_DSL_CASE_FOLDING_HPP_INCLUDED
291 
lexyd::_cfl::lit_first_char
static constexpr auto lit_first_char() -> typename Encoding::char_type
Definition: case_folding.hpp:32
code_point.hpp
LEXY_CONSTEVAL
#define LEXY_CONSTEVAL
Definition: config.hpp:90
lexy::simple_case_fold
LEXY_UNICODE_CONSTEXPR code_point simple_case_fold(code_point cp) noexcept
lexy::_sucfrm::_cur_pos
Reader::iterator _cur_pos
Definition: case_folding.hpp:179
lexy::_sucfrm::position
constexpr iterator position() const
Definition: case_folding.hpp:234
lexyd::_cfl::lit_insert
static LEXY_CONSTEVAL std::size_t lit_insert(Trie &trie, std::size_t pos, std::size_t char_class)
Definition: case_folding.hpp:38
literal.hpp
lexy::_sucfrm::_buffer
encoding::char_type _buffer[4]
Definition: case_folding.hpp:180
magic_enum::char_type
string_view::value_type char_type
Definition: magic_enum.hpp:145
lexyd::ascii::_cf_dsl::is_inplace
static constexpr auto is_inplace
Definition: case_folding.hpp:119
lexy::_detail::parse_code_point
constexpr cp_result< Reader > parse_code_point(Reader reader)
Definition: _detail/code_point.hpp:142
lexyd::_cfl::lit_char_classes
static constexpr auto lit_char_classes
Definition: case_folding.hpp:27
lexyd::_cfl::tp::report_error
constexpr void report_error(Context &context, Reader reader)
Definition: case_folding.hpp:63
lexy::_acfr::peek
constexpr auto peek() const -> typename encoding::int_type
Definition: case_folding.hpp:88
lexyd::unicode
Definition: case_folding.hpp:267
lexy::_sucfr32::_sucfr32
constexpr _sucfr32(Reader impl)
Definition: case_folding.hpp:145
lexyd::ascii::_cf_dsl
Definition: case_folding.hpp:116
lexyd::_cfl::tp::try_parse
constexpr bool try_parse(Reader _reader)
Definition: case_folding.hpp:54
lexy::_sucfrm::_buffer_size
unsigned char _buffer_size
Definition: case_folding.hpp:181
lexy
Definition: any_ref.hpp:12
lexyd::_cfl
Definition: case_folding.hpp:23
lexy::_sucfr32::position
constexpr iterator position() const
Definition: case_folding.hpp:161
lexy::literal_token_kind
@ literal_token_kind
Definition: grammar.hpp:81
lexy::_sucfr_for
std::conditional_t< std::is_same_v< typename Reader::encoding, lexy::utf32_encoding >, _sucfr32< Reader >, _sucfrm< Reader > > _sucfr_for
Definition: case_folding.hpp:258
lexy::_acfr::iterator
typename Reader::iterator iterator
Definition: case_folding.hpp:86
lexy::code_point::value
constexpr auto value() const noexcept
Definition: code_point.hpp:30
lexy::_acfr::encoding
typename Reader::encoding encoding
Definition: case_folding.hpp:85
lexy::_acfr::position
constexpr iterator position() const
Definition: case_folding.hpp:102
lexy::_sucfrm::set_position
constexpr void set_position(iterator new_pos)
Definition: case_folding.hpp:247
lexy::_sucfrm::_buffer_cur
unsigned char _buffer_cur
Definition: case_folding.hpp:182
lexyd::unicode::_scf_dsl
Definition: case_folding.hpp:269
lexyd::_cfl::tp
Definition: case_folding.hpp:45
lexyd::token_base
Definition: dsl/token.hpp:42
lexyd::_cfl::lit_max_char_count
static constexpr auto lit_max_char_count
Definition: case_folding.hpp:25
lexy::_sucfrm::encoding
typename Reader::encoding encoding
Definition: case_folding.hpp:175
lexy::_sucfr32::bump
constexpr void bump()
Definition: case_folding.hpp:156
lexyd::unicode::simple_case_folding
constexpr auto simple_case_folding
Matches Literal with case insensitive Unicode characters (simple case folding).
Definition: case_folding.hpp:287
lexy::_sucfr32::peek
constexpr auto peek() const -> typename encoding::int_type
Definition: case_folding.hpp:150
lexy::_sucfrm::_fill
constexpr void _fill()
Definition: case_folding.hpp:190
lexyd::_cfl::tp::end
Reader::iterator end
Definition: case_folding.hpp:48
lexy::_sucfrm
Definition: case_folding.hpp:173
lexyd::unicode::_scf_dsl::operator()
constexpr auto operator()(Literal) const
Definition: case_folding.hpp:278
lexy::_sucfr
Definition: case_folding.hpp:261
lexy::_sucfr32::set_position
constexpr void set_position(iterator new_pos)
Definition: case_folding.hpp:166
lexyd::_cfl_folding
Definition: case_folding.hpp:16
lexy::_sucfr32::iterator
typename Reader::iterator iterator
Definition: case_folding.hpp:148
lexy::_acfr::set_position
constexpr void set_position(iterator new_pos)
Definition: case_folding.hpp:107
base.hpp
lexy::_sucfrm::peek
constexpr auto peek() const -> typename encoding::int_type
Definition: case_folding.hpp:218
lexy::_acfr::bump
constexpr void bump()
Definition: case_folding.hpp:97
lexyd::ascii::case_folding
constexpr auto case_folding
Matches Literal with case insensitive ASCII characters.
Definition: case_folding.hpp:134
lexy::_sucfrm::_sucfrm
constexpr _sucfrm(Reader impl)
Definition: case_folding.hpp:184
lexy::_detail::cp_error::success
@ success
lexy::_sucfrm::_impl
Reader _impl
Definition: case_folding.hpp:178
lexy::_sucfrm::iterator
typename Reader::iterator iterator
Definition: case_folding.hpp:176
lexy::_acfr::_impl
Reader _impl
Definition: case_folding.hpp:83
code_point.hpp
lexy::_sucfr32::encoding
typename Reader::encoding encoding
Definition: case_folding.hpp:147
lexy::_acfr
Definition: case_folding.hpp:81
lexy::_sucfr32::_impl
Reader _impl
Definition: case_folding.hpp:143
lexy::_sucfr32
Definition: case_folding.hpp:141
lexyd::_cfl::tp::impl
lexy::token_parser_for< Literal, CaseFolding< Reader > > impl
Definition: case_folding.hpp:47
lexyd::_cfl::tp::tp
constexpr tp(const Reader &reader)
Definition: case_folding.hpp:50
lexy::token_parser_for
typename TokenRule::template tp< Reader > token_parser_for
Definition: dsl/base.hpp:229
lexyd::_lit_base
Definition: grammar.hpp:32
lexyd::ascii
Definition: ascii.hpp:13
lexyd
Definition: trace.hpp:22
lexy::_sucfrm::bump
constexpr void bump()
Definition: case_folding.hpp:227
lexyd::eof
constexpr auto eof
Matches EOF.
Definition: eof.hpp:72
lexyd::unicode::_scf_dsl::is_inplace
static constexpr auto is_inplace
Definition: case_folding.hpp:272
lexyd::_cfl_folding::reader
CaseFolding< Reader > reader
Definition: case_folding.hpp:19
lexyd::ascii::_cf_dsl::operator()
constexpr auto operator()(Literal) const
Definition: case_folding.hpp:125
lexy::code_point
A unicode code point.
Definition: code_point.hpp:24


behaviortree_cpp_v4
Author(s): Davide Faconti
autogenerated on Fri Jun 28 2024 02:20:07