input_location.hpp
Go to the documentation of this file.
1 // Copyright (C) 2020-2024 Jonathan Müller and lexy contributors
2 // SPDX-License-Identifier: BSL-1.0
3 
4 #ifndef LEXY_INPUT_LOCATION_HPP_INCLUDED
5 #define LEXY_INPUT_LOCATION_HPP_INCLUDED
6 
8 #include <lexy/dsl/newline.hpp>
9 #include <lexy/input/base.hpp>
10 #include <lexy/lexeme.hpp>
11 
12 //=== input_location_anchor ===//
13 namespace lexy
14 {
16 template <typename Input>
18 {
20 
21  constexpr explicit input_location_anchor(const Input& input)
22  : _line_begin(input.reader().current()), _line_nr(1)
23  {}
24 
25  // implementation detail
26  constexpr explicit input_location_anchor(marker line_begin, unsigned line_nr)
27  : _line_begin(line_begin), _line_nr(line_nr)
28  {}
29 
31  unsigned _line_nr;
32 };
33 } // namespace lexy
34 
35 //=== counting strategies ===//
36 namespace lexy
37 {
40 {
41 public:
42  template <typename Reader>
43  constexpr bool try_match_newline(Reader& reader)
44  {
45  static_assert(lexy::is_char_encoding<typename Reader::encoding>);
47  }
48 
49  template <typename Reader>
50  constexpr void match_column(Reader& reader)
51  {
52  static_assert(lexy::is_char_encoding<typename Reader::encoding>);
53  reader.bump();
54  }
55 };
56 
59 {
60 public:
61  template <typename Reader>
62  constexpr bool try_match_newline(Reader& reader)
63  {
64  static_assert(lexy::is_char_encoding<typename Reader::encoding>);
66  }
67 
68  template <typename Reader>
69  constexpr void match_column(Reader& reader)
70  {
71  static_assert(lexy::is_char_encoding<typename Reader::encoding>);
73  reader.bump();
74  }
75 };
76 
78 template <std::size_t LineWidth = 16>
80 {
81 public:
82  template <typename Reader>
83  constexpr bool try_match_newline(Reader& reader)
84  {
85  static_assert(lexy::is_byte_encoding<typename Reader::encoding>);
86  LEXY_PRECONDITION(_cur_index <= LineWidth - 1);
87  if (_cur_index == LineWidth - 1)
88  {
89  // Consider the last byte to be the "newline".
90  // We need to consume something if possible;
91  // the logic in the function breaks otherwise.
92  if (reader.peek() != Reader::encoding::eof())
93  reader.bump();
94  _cur_index = 0;
95  return true;
96  }
97  else
98  {
99  return false;
100  }
101  }
102 
103  template <typename Reader>
104  constexpr void match_column(Reader& reader)
105  {
106  static_assert(lexy::is_byte_encoding<typename Reader::encoding>);
107 
108  reader.bump();
109  ++_cur_index;
110  }
111 
112 private:
113  std::size_t _cur_index = 0;
114 };
115 
116 template <typename Input>
118 {
119  using encoding = typename lexy::input_reader<Input>::encoding;
120  if constexpr (lexy::is_byte_encoding<encoding>)
121  return byte_location_counting{};
122  else if constexpr (lexy::is_char_encoding<encoding>)
124  else
125  static_assert(_detail::error<Input>,
126  "input encoding does not have a default location counting policy");
127 }
128 
129 template <typename Input>
130 using _default_location_counting = decltype(_compute_default_location_counting<Input>());
131 } // namespace lexy
132 
133 //=== input_location ===//
134 namespace lexy
135 {
137 template <typename Input, typename Counting = _default_location_counting<Input>>
139 {
142 
143 public:
144  constexpr explicit input_location(const Input& input)
145  : _line_begin(input.reader().current()), _column_begin(_line_begin.position()), _line_nr(1),
146  _column_nr(1)
147  {}
148 
151  {
153  }
154 
155  constexpr unsigned line_nr() const
156  {
157  return _line_nr;
158  }
159  constexpr unsigned column_nr() const
160  {
161  return _column_nr;
162  }
163 
165  constexpr iterator position() const
166  {
167  return _column_begin;
168  }
169 
170  friend constexpr bool operator==(const input_location& lhs, const input_location& rhs)
171  {
172  return lhs._line_nr == rhs._line_nr && lhs._column_nr == rhs._column_nr;
173  }
174  friend constexpr bool operator!=(const input_location& lhs, const input_location& rhs)
175  {
176  return !(lhs == rhs);
177  }
178 
179  friend constexpr bool operator<(const input_location& lhs, const input_location& rhs)
180  {
181  if (lhs._line_nr != rhs._line_nr)
182  return lhs._line_nr < rhs._line_nr;
183  return lhs._column_nr < rhs._column_nr;
184  }
185  friend constexpr bool operator<=(const input_location& lhs, const input_location& rhs)
186  {
187  return !(rhs < lhs);
188  }
189  friend constexpr bool operator>(const input_location& lhs, const input_location& rhs)
190  {
191  return rhs < lhs;
192  }
193  friend constexpr bool operator>=(const input_location& lhs, const input_location& rhs)
194  {
195  return !(rhs > lhs);
196  }
197 
198 private:
199  constexpr input_location(marker line_begin, unsigned line_nr, iterator column_begin,
200  unsigned column_nr)
201  : _line_begin(line_begin), _column_begin(column_begin), _line_nr(line_nr), _column_nr(column_nr)
202  {}
203 
206  unsigned _line_nr, _column_nr;
207 
208  template <typename C, typename I>
209  friend constexpr auto get_input_location(const I& input,
213 };
214 
216 template <typename Counting, typename Input>
217 constexpr auto get_input_location(const Input& input,
221 {
222  auto reader = input.reader();
223  reader.reset(anchor._line_begin);
224 
225  auto line_begin = anchor._line_begin;
226  auto line_nr = anchor._line_nr;
227  auto column_begin = line_begin;
228  auto column_nr = 1u;
229 
230  Counting counting;
231  while (true)
232  {
233  if (reader.position() == position)
234  {
235  // We've already found the position; it's at the beginning of a colum nor newline.
236  // No need to do the expensive checks.
237  //
238  // This also allows `lexy_ext::shell` to work properly, if position is at EOF,
239  // the reader.peek() call will ask for more input.
240  break;
241  }
242  else if (reader.peek() == lexy::input_reader<Input>::encoding::eof())
243  {
244  LEXY_ASSERT(false, "invalid position + anchor combination");
245  }
246  else if (counting.try_match_newline(reader))
247  {
248  // [column_begin, newline_end) covers the newline.
249  auto newline_end = reader.current();
250  if (lexy::_detail::min_range_end(column_begin.position(), newline_end.position(),
251  position)
252  != newline_end.position())
253  break;
254 
255  // Advance to the next line.
256  ++line_nr;
257  line_begin = newline_end;
258  column_nr = 1;
259  column_begin = line_begin;
260  }
261  else
262  {
263  counting.match_column(reader);
264 
265  // [column_begin, column_end) covers the column.
266  auto column_end = reader.current();
267  if (lexy::_detail::min_range_end(column_begin.position(), column_end.position(),
268  position)
269  != column_end.position())
270  break;
271 
272  // Advance to the next column.
273  ++column_nr;
274  column_begin = column_end;
275  }
276  }
277 
278  return {line_begin, line_nr, column_begin.position(), column_nr};
279 }
280 
281 template <typename Counting, typename Input>
282 constexpr auto get_input_location(const Input& input,
284 {
285  return get_input_location<Counting>(input, position, input_location_anchor(input));
286 }
287 template <typename Input>
288 constexpr auto get_input_location(const Input& input,
291 {
292  return get_input_location<_default_location_counting<Input>>(input, position, anchor);
293 }
294 template <typename Input>
295 constexpr auto get_input_location(const Input& input,
297 {
298  return get_input_location<_default_location_counting<Input>>(input, position,
299  input_location_anchor(input));
300 }
301 } // namespace lexy
302 
303 //=== input_line_annotation ===//
304 namespace lexy::_detail
305 {
306 template <typename Counting, typename Input>
307 constexpr auto get_input_line(const Input& input,
308  typename lexy::input_reader<Input>::marker line_begin)
309 {
310  auto reader = input.reader();
311  reader.reset(line_begin);
312 
313  auto line_end = reader.position();
314  for (Counting counting;
315  reader.peek() != decltype(reader)::encoding::eof() && !counting.try_match_newline(reader);
316  line_end = reader.position())
317  {
318  counting.match_column(reader);
319  }
320  auto newline_end = reader.position();
321 
322  struct result_t
323  {
326  };
327  return result_t{{line_begin.position(), line_end}, {line_end, newline_end}};
328 }
329 
330 // Advances the iterator to the beginning of the next code point.
331 template <typename Encoding, typename Iterator>
332 constexpr Iterator find_cp_boundary(Iterator cur, Iterator end)
333 {
334  auto is_cp_continuation = [](auto c) {
335  if constexpr (std::is_same_v<Encoding,
337  || std::is_same_v<Encoding, lexy::utf8_char_encoding>)
338  return (c & 0b1100'0000) == (0b10 << 6);
339  else if constexpr (std::is_same_v<Encoding, lexy::utf16_encoding>)
340  return 0xDC00 <= c && c <= 0xDFFF;
341  else
342  {
343  // This encoding doesn't have continuation code units.
344  (void)c;
345  return std::false_type{};
346  }
347  };
348 
349  while (cur != end && is_cp_continuation(*cur))
350  ++cur;
351  return cur;
352 }
353 } // namespace lexy::_detail
354 
355 namespace lexy
356 {
357 template <typename Input>
359 {
366 
373 };
374 
375 template <typename Input>
381 {
382  // At this point there are two cases:
383  // Either line.begin() <= begin < end <= newline.end()),
384  // or line.begin() <= begin == end == newline.end().
385 
386  // We then round end to the code point boundary.
387  // Note that we don't round begin.
388  {
389  auto old_end = end;
390 
391  using encoding = typename lexy::input_reader<Input>::encoding;
392  end = _detail::find_cp_boundary<encoding>(end, newline.end());
393 
394  result.rounded_end = end != old_end;
395  }
396 
397  // Now we can compute the annotation.
398  if (lexy::_detail::min_range_end(line.begin(), line.end(), end) == end)
399  {
400  // We have end <= line.end(),
401  // so line.end() is the end of after.
402  result.before = {line.begin(), begin};
403  result.annotated = {begin, end};
404  result.after = {end, line.end()};
405  }
406  else
407  {
408  // We have end > line.end(),
409  // so newline.end() is the end of annotated.
410  result.before = {line.begin(), begin};
411  result.annotated = {begin, newline.end()};
412  result.after = {newline.end(), newline.end()};
413  result.annotated_newline = true;
414  }
415 }
416 
417 template <typename Input, typename Counting>
418 constexpr auto get_input_line_annotation(const Input& input,
419  const input_location<Input, Counting>& begin_location,
422 {
424 
425  auto [line, newline]
426  = _detail::get_input_line<Counting>(input, begin_location.anchor()._line_begin);
427 
428  // We first normalize the range.
429  auto begin = begin_location.position();
430  if (begin == end)
431  {
432  if (end == newline.begin())
433  {
434  // Empty range at the newline; make it cover the entire newline.
435  end = newline.end();
436  }
437  else if (end != newline.end())
438  {
439  // Empty range before end of newline; extend by one code unit.
440  ++end;
441  }
442  else
443  {
444  // begin == end == newline.end()
445  }
446  }
447  else if (lexy::_detail::min_range_end(begin, end, newline.end()) != end)
448  {
449  // Truncate a multiline range to a single line.
450  // Note that we can't have both an empty range and a multiline range.
451  end = newline.end();
452  result.truncated_multiline = true;
453  }
454 
455  _get_input_line_annotation(result, line, newline, begin, end);
456  return result;
457 }
458 
460 template <typename Input, typename Counting>
461 constexpr auto get_input_line_annotation(const Input& input,
462  const input_location<Input, Counting>& location,
463  std::size_t size)
464 {
466  auto [line, newline] = _detail::get_input_line<Counting>(input, location.anchor()._line_begin);
467 
468  // We don't want an empty annotation.
469  auto range_size = size == 0 ? 1 : size;
470 
471  auto begin = location.position();
472  auto end = _detail::next_clamped(location.position(), range_size, newline.end());
473  if (_detail::range_size(location.position(), end) < size)
474  {
475  // We didn't have enough of the current line to match the size request.
476  // As such, we needed to truncate it.
477  result.truncated_multiline = true;
478  }
479 
480  _get_input_line_annotation(result, line, newline, begin, end);
481  return result;
482 }
483 } // namespace lexy
484 
485 #endif // LEXY_INPUT_LOCATION_HPP_INCLUDED
486 
cx::size
constexpr auto size(const C &c) -> decltype(c.size())
Definition: wildcards.hpp:636
lexyd::position
constexpr auto position
Produces an iterator to the current reader position without parsing anything.
Definition: position.hpp:79
lexy::input_location::_line_begin
marker _line_begin
Definition: input_location.hpp:204
lexy::input_location::operator<=
constexpr friend bool operator<=(const input_location &lhs, const input_location &rhs)
Definition: input_location.hpp:185
lexy::code_unit_location_counting::try_match_newline
constexpr bool try_match_newline(Reader &reader)
Definition: input_location.hpp:43
lexy::code_unit_location_counting::match_column
constexpr void match_column(Reader &reader)
Definition: input_location.hpp:50
lexy::input_location
A location in the input.
Definition: input_location.hpp:138
lexy::input_location_anchor::_line_begin
marker _line_begin
Definition: input_location.hpp:30
lexy::code_point_location_counting::match_column
constexpr void match_column(Reader &reader)
Definition: input_location.hpp:69
lexy::byte_location_counting::try_match_newline
constexpr bool try_match_newline(Reader &reader)
Definition: input_location.hpp:83
lexy::input_location::iterator
typename lexy::input_reader< Input >::iterator iterator
Definition: input_location.hpp:140
lexy::_get_input_line_annotation
constexpr void _get_input_line_annotation(input_line_annotation< Input > &result, lexy::lexeme_for< Input > line, lexy::lexeme_for< Input > newline, typename lexy::input_reader< Input >::iterator begin, typename lexy::input_reader< Input >::iterator end)
Definition: input_location.hpp:376
lexy::code_point_location_counting::try_match_newline
constexpr bool try_match_newline(Reader &reader)
Definition: input_location.hpp:62
newline.hpp
lexy::input_location_anchor::_line_nr
unsigned _line_nr
Definition: input_location.hpp:31
lexyd::ascii::newline
constexpr auto newline
Definition: ascii.hpp:77
lexy::input_location::_column_begin
iterator _column_begin
Definition: input_location.hpp:205
lexy::input_location_anchor::input_location_anchor
constexpr input_location_anchor(marker line_begin, unsigned line_nr)
Definition: input_location.hpp:26
lexy::lexeme::begin
constexpr iterator begin() const noexcept
Definition: lexeme.hpp:45
lexyd::code_point
constexpr auto code_point
Matches a single unicode code point in the current unicode encoding.
Definition: dsl/code_point.hpp:200
lexy::get_input_location
constexpr auto get_input_location(const Input &input, typename lexy::input_reader< Input >::iterator position, input_location_anchor< Input > anchor) -> input_location< Input, Counting >
The location for a position in the input; search starts at the anchor.
Definition: input_location.hpp:217
lexy
Definition: any_ref.hpp:12
LEXY_PRECONDITION
#define LEXY_PRECONDITION(Expr)
Definition: assert.hpp:36
lexy::input_location::marker
typename lexy::input_reader< Input >::marker marker
Definition: input_location.hpp:141
cx::end
constexpr auto end(const C &c) -> decltype(c.end())
Definition: wildcards.hpp:686
lexy::input_location::column_nr
constexpr unsigned column_nr() const
Definition: input_location.hpp:159
lexy::_compute_default_location_counting
auto _compute_default_location_counting()
Definition: input_location.hpp:117
lexy::_detail::next_clamped
constexpr Iterator next_clamped(Iterator iter, std::size_t n, Sentinel end)
Definition: iterator.hpp:58
lexy::input_line_annotation::rounded_end
bool rounded_end
true if end needed to be moved to a code point boundary.
Definition: input_location.hpp:372
lexy::input_location::get_input_location
constexpr friend auto get_input_location(const I &input, typename lexy::input_reader< I >::iterator position, input_location_anchor< I > anchor) -> input_location< I, C >
lexy::input_location::operator!=
constexpr friend bool operator!=(const input_location &lhs, const input_location &rhs)
Definition: input_location.hpp:174
lexy::input_location::line_nr
constexpr unsigned line_nr() const
Definition: input_location.hpp:155
lexy::get_input_line_annotation
constexpr auto get_input_line_annotation(const Input &input, const input_location< Input, Counting > &begin_location, typename lexy::input_reader< Input >::iterator end) -> input_line_annotation< Input >
Definition: input_location.hpp:418
lexy::input_location::position
constexpr iterator position() const
The corresponding position, rounded down to the previous column start.
Definition: input_location.hpp:165
lexy::try_match_token
constexpr LEXY_FORCE_INLINE auto try_match_token(TokenRule, Reader &reader)
Definition: dsl/base.hpp:245
lexeme.hpp
lexy::input_line_annotation
Definition: input_location.hpp:358
lexy::byte_location_counting::match_column
constexpr void match_column(Reader &reader)
Definition: input_location.hpp:104
lexy::byte_location_counting
Counts bytes for columns, lines end after LineWidth bytes.
Definition: input_location.hpp:79
lexy::input_line_annotation::annotated
lexy::lexeme_for< Input > annotated
The annotated part.
Definition: input_location.hpp:363
lexy::input_location::input_location
constexpr input_location(marker line_begin, unsigned line_nr, iterator column_begin, unsigned column_nr)
Definition: input_location.hpp:199
lexy::utf8_encoding
An encoding where the input is assumed to be valid UTF-8.
Definition: encoding.hpp:84
lexy::input_location::operator>=
constexpr friend bool operator>=(const input_location &lhs, const input_location &rhs)
Definition: input_location.hpp:193
lexy::input_location_anchor::marker
typename lexy::input_reader< Input >::marker marker
Definition: input_location.hpp:19
code_point.hpp
lexy::_detail::range_size
constexpr std::size_t range_size(Iterator begin, Sentinel end)
Definition: iterator.hpp:22
lexy::_detail::get_input_line
constexpr auto get_input_line(const Input &input, typename lexy::input_reader< Input >::marker line_begin)
Definition: input_location.hpp:307
lexy::lexeme::end
constexpr iterator end() const noexcept
Definition: lexeme.hpp:49
cx::begin
constexpr auto begin(const C &c) -> decltype(c.begin())
Definition: wildcards.hpp:661
lexy::_default_location_counting
decltype(_compute_default_location_counting< Input >()) _default_location_counting
Definition: input_location.hpp:130
lexy::code_point_location_counting
Counts code points for columns, newlines for lines.
Definition: input_location.hpp:58
lexy::input_line_annotation::truncated_multiline
bool truncated_multiline
true if the the range was spanning multiple line and needed to be truncated.
Definition: input_location.hpp:368
lexy::_detail
Definition: any_ref.hpp:12
lexy::byte_location_counting::_cur_index
std::size_t _cur_index
Definition: input_location.hpp:113
lexy::input_location::input_location
constexpr input_location(const Input &input)
Definition: input_location.hpp:144
lexy::input_location::_column_nr
unsigned _column_nr
Definition: input_location.hpp:206
lexy::input_location::anchor
constexpr input_location_anchor< Input > anchor() const
The closest previous anchor.
Definition: input_location.hpp:150
lexy::input_line_annotation::annotated_newline
bool annotated_newline
true if annotated includes the newline (this implies after.empty())
Definition: input_location.hpp:370
lexy::lexeme
Definition: lexeme.hpp:16
lexy::input_location::operator==
constexpr friend bool operator==(const input_location &lhs, const input_location &rhs)
Definition: input_location.hpp:170
lexy::input_location::_line_nr
unsigned _line_nr
Definition: input_location.hpp:206
base.hpp
lexy::input_location_anchor::input_location_anchor
constexpr input_location_anchor(const Input &input)
Definition: input_location.hpp:21
lexy::input_location::operator<
constexpr friend bool operator<(const input_location &lhs, const input_location &rhs)
Definition: input_location.hpp:179
lexy::_detail::min_range_end
constexpr Iterator min_range_end(Iterator begin, Iterator end_a, Iterator end_b)
Definition: iterator.hpp:93
lexy::_detail::find_cp_boundary
constexpr Iterator find_cp_boundary(Iterator cur, Iterator end)
Definition: input_location.hpp:332
lexy::input_line_annotation::before
lexy::lexeme_for< Input > before
Everything of the line before the range.
Definition: input_location.hpp:361
lexy::input_location_anchor
Anchor for the location search.
Definition: input_location.hpp:17
lexy::input_line_annotation::after
lexy::lexeme_for< Input > after
Everything of the line after the annotated range.
Definition: input_location.hpp:365
lexyd::eof
constexpr auto eof
Matches EOF.
Definition: eof.hpp:72
LEXY_ASSERT
#define LEXY_ASSERT(Expr, Msg)
Definition: assert.hpp:37
lexy::input_reader
decltype(LEXY_DECLVAL(Input).reader()) input_reader
Definition: input/base.hpp:106
lexy::code_unit_location_counting
Counts code units for columns, newlines for lines.
Definition: input_location.hpp:39
lexy::input_location::operator>
constexpr friend bool operator>(const input_location &lhs, const input_location &rhs)
Definition: input_location.hpp:189


behaviortree_cpp_v4
Author(s): Davide Faconti
autogenerated on Fri Dec 13 2024 03:19:16