encoding.hpp
Go to the documentation of this file.
1 // Copyright (C) 2020-2024 Jonathan Müller and lexy contributors
2 // SPDX-License-Identifier: BSL-1.0
3 
4 #ifndef LEXY_ENCODING_HPP_INCLUDED
5 #define LEXY_ENCODING_HPP_INCLUDED
6 
7 #include <cstdint>
10 
11 //=== encoding classes ===//
12 namespace lexy
13 {
16 {
18  little,
20  big,
23  bom,
24 };
25 
28 {
29  using char_type = char;
30  using int_type = int;
31 
32  template <typename OtherCharType>
33  static constexpr bool is_secondary_char_type()
34  {
35  return false;
36  }
37 
39  {
40  return -1;
41  }
42 
43  static constexpr int_type to_int_type(char_type c)
44  {
45  if constexpr (std::is_unsigned_v<char_type>)
46  // We can just convert it to int directly.
47  return static_cast<int_type>(c);
48  else
49  {
50  // We first need to prevent negative values, by making it unsigned.
51  auto value = static_cast<unsigned char>(c);
52  return static_cast<int_type>(value);
53  }
54  }
55 };
56 
57 // An encoding where the input is assumed to be valid ASCII.
59 {
60  using char_type = char;
61  using int_type = char;
62 
63  template <typename OtherCharType>
64  static constexpr bool is_secondary_char_type()
65  {
66  return false;
67  }
68 
70  {
71  if constexpr (std::is_signed_v<char_type>)
72  return int_type(-1);
73  else
74  return int_type(0xFFu);
75  }
76 
77  static constexpr int_type to_int_type(char_type c)
78  {
79  return int_type(c);
80  }
81 };
82 
85 {
88 
89  template <typename OtherCharType>
90  static constexpr bool is_secondary_char_type()
91  {
92  return std::is_same_v<OtherCharType, char>;
93  }
94 
96  {
97  // 0xFF is not part of valid UTF-8.
98  return int_type(0xFF);
99  }
100 
101  static constexpr int_type to_int_type(char_type c)
102  {
103  return int_type(c);
104  }
105 };
106 
109 {
110  using char_type = char;
111  using int_type = char;
112 
113  template <typename OtherCharType>
114  static constexpr bool is_secondary_char_type()
115  {
116  return std::is_same_v<OtherCharType, LEXY_CHAR8_T>;
117  }
118 
120  {
121  // 0xFF is not part of valid UTF-8.
122  return int_type(0xFF);
123  }
124 
125  static constexpr int_type to_int_type(char_type c)
126  {
127  return int_type(c);
128  }
129 };
130 
133 {
134  using char_type = char16_t;
135  using int_type = std::int_least32_t;
136 
137  template <typename OtherCharType>
138  static constexpr bool is_secondary_char_type()
139  {
140  return sizeof(wchar_t) == sizeof(char16_t) && std::is_same_v<OtherCharType, wchar_t>;
141  }
142 
144  {
145  // Every value of char16_t is valid UTF16.
146  return int_type(-1);
147  }
148 
149  static constexpr int_type to_int_type(char_type c)
150  {
151  return int_type(c);
152  }
153 };
154 
157 {
158  using char_type = char32_t;
159  using int_type = char32_t;
160 
161  template <typename OtherCharType>
162  static constexpr bool is_secondary_char_type()
163  {
164  return sizeof(wchar_t) == sizeof(char32_t) && std::is_same_v<OtherCharType, wchar_t>;
165  }
166 
168  {
169  // The highest unicode code point is U+10'FFFF, so this is never a valid code point.
170  return int_type(0xFFFF'FFFF);
171  }
172 
173  static constexpr int_type to_int_type(char_type c)
174  {
175  return c;
176  }
177 };
178 
180 struct byte_encoding
181 {
182  using char_type = unsigned char;
183  using int_type = int;
184 
185  template <typename OtherCharType>
186  static constexpr bool is_secondary_char_type()
187  {
188  return std::is_same_v<OtherCharType, char> || std::is_same_v<OtherCharType, std::byte>;
189  }
190 
191  static LEXY_CONSTEVAL int_type eof()
192  {
193  return -1;
194  }
195 
196  static constexpr int_type to_int_type(char_type c)
197  {
198  return int_type(c);
199  }
200 };
201 } // namespace lexy
202 
203 //=== deduce_encoding ===//
204 namespace lexy
205 {
206 template <typename CharT>
207 struct _deduce_encoding;
208 template <typename CharT>
209 using deduce_encoding = typename _deduce_encoding<CharT>::type;
210 
211 template <>
212 struct _deduce_encoding<char>
213 {
214 #if defined(LEXY_ENCODING_OF_CHAR)
215  using type = LEXY_ENCODING_OF_CHAR;
216  static_assert(std::is_same_v<type, default_encoding> //
217  || std::is_same_v<type, ascii_encoding> //
218  || std::is_same_v<type, utf8_encoding> //
219  || std::is_same_v<type, utf8_char_encoding>,
220  "invalid value for LEXY_ENCODING_OF_CHAR");
221 #else
222  using type = default_encoding; // Don't know the exact encoding.
223 #endif
224 };
225 
226 #if LEXY_HAS_CHAR8_T
227 template <>
229 {
230  using type = utf8_encoding;
231 };
232 #endif
233 template <>
234 struct _deduce_encoding<char16_t>
235 {
237 };
238 template <>
239 struct _deduce_encoding<char32_t>
240 {
242 };
243 
244 template <>
245 struct _deduce_encoding<unsigned char>
246 {
248 };
249 template <>
251 {
253 };
254 } // namespace lexy
255 
256 //=== encoding traits ===//
257 namespace lexy
258 {
259 template <typename Encoding>
260 constexpr auto is_unicode_encoding
261  = std::is_same_v<Encoding, ascii_encoding> || std::is_same_v<Encoding, utf8_encoding>
262  || std::is_same_v<Encoding, utf8_char_encoding> || std::is_same_v<Encoding, utf16_encoding>
263  || std::is_same_v<Encoding, utf32_encoding>;
264 
265 template <typename Encoding>
266 constexpr auto is_text_encoding
267  = is_unicode_encoding<Encoding> || std::is_same_v<Encoding, default_encoding>;
268 
269 template <typename Encoding>
270 constexpr auto is_byte_encoding = std::is_same_v<Encoding, byte_encoding>;
271 
272 template <typename Encoding>
273 constexpr auto is_char_encoding = is_text_encoding<Encoding> || is_byte_encoding<Encoding>;
274 
275 template <typename Encoding>
276 constexpr auto is_node_encoding = false;
277 } // namespace lexy
278 
279 //=== impls ===//
280 namespace lexy::_detail
281 {
282 template <typename Encoding, typename CharT>
283 constexpr bool is_compatible_char_type = std::is_same_v<typename Encoding::char_type, CharT>
284  || Encoding::template is_secondary_char_type<CharT>();
285 
286 template <typename Encoding, typename CharT>
288  = std::enable_if_t<Encoding::template is_secondary_char_type<CharT>()>;
289 
290 template <typename CharT>
291 constexpr bool is_ascii(CharT c)
292 {
293  if constexpr (std::is_signed_v<CharT>)
294  return 0 <= c && c <= 0x7F;
295  else
296  return c <= 0x7F;
297 }
298 
299 template <typename TargetCharT, typename CharT>
300 LEXY_CONSTEVAL TargetCharT transcode_char(CharT c)
301 {
302  if constexpr (std::is_same_v<CharT, TargetCharT>)
303  {
304  return c;
305  }
306 #if !LEXY_HAS_CHAR8_T
307  else if constexpr (std::is_same_v<CharT, char> && std::is_same_v<TargetCharT, LEXY_CHAR8_T>)
308  {
309  // If we don't have char8_t, `LEXY_LIT(u8"ä")` would have the type char, not LEXY_CHAR8_T
310  // (which is unsigned char). So we disable checking in that case, to allow such usage. Note
311  // that this prevents catching `LEXY_LIT("ä")`, but there is nothing we can do.
312  return static_cast<LEXY_CHAR8_T>(c);
313  }
314 #endif
315  else
316  {
317  LEXY_ASSERT(is_ascii(c), "character type of string literal didn't match, "
318  "so only ASCII characters are supported");
319  // Note that we don't need to worry about signed/unsigned conversion, all ASCII values are
320  // positive.
321  return static_cast<TargetCharT>(c);
322  }
323 }
324 
325 template <typename Encoding, typename CharT>
326 LEXY_CONSTEVAL auto transcode_int(CharT c) -> typename Encoding::int_type
327 {
328  return Encoding::to_int_type(transcode_char<typename Encoding::char_type>(c));
329 }
330 } // namespace lexy::_detail
331 
332 #endif // LEXY_ENCODING_HPP_INCLUDED
333 
lexy::utf16_encoding::is_secondary_char_type
static constexpr bool is_secondary_char_type()
Definition: encoding.hpp:138
LEXY_CONSTEVAL
#define LEXY_CONSTEVAL
Definition: config.hpp:98
lexy::encoding_endianness
encoding_endianness
The endianness used by an encoding.
Definition: encoding.hpp:15
lexy::encoding_endianness::little
@ little
Little endian.
lexy::utf8_encoding::eof
static LEXY_CONSTEVAL int_type eof()
Definition: encoding.hpp:95
lexy::_detail::transcode_char
LEXY_CONSTEVAL TargetCharT transcode_char(CharT c)
Definition: encoding.hpp:300
lexy::is_unicode_encoding
constexpr auto is_unicode_encoding
Definition: encoding.hpp:261
lexy::_detail::require_secondary_char_type
std::enable_if_t< Encoding::template is_secondary_char_type< CharT >()> require_secondary_char_type
Definition: encoding.hpp:288
lexy::utf32_encoding::is_secondary_char_type
static constexpr bool is_secondary_char_type()
Definition: encoding.hpp:162
config.hpp
lexy::default_encoding::to_int_type
static constexpr int_type to_int_type(char_type c)
Definition: encoding.hpp:43
lexy::utf8_char_encoding::int_type
char int_type
Definition: encoding.hpp:111
lexy::ascii_encoding::char_type
char char_type
Definition: encoding.hpp:60
lexy::utf32_encoding::char_type
char32_t char_type
Definition: encoding.hpp:158
lexy::is_byte_encoding
constexpr auto is_byte_encoding
Definition: encoding.hpp:270
lexy::utf16_encoding::int_type
std::int_least32_t int_type
Definition: encoding.hpp:135
lexy::utf16_encoding::to_int_type
static constexpr int_type to_int_type(char_type c)
Definition: encoding.hpp:149
lexy
Definition: any_ref.hpp:12
lexy::utf8_char_encoding::to_int_type
static constexpr int_type to_int_type(char_type c)
Definition: encoding.hpp:125
lexy::utf32_encoding
An encoding where the input is assumed to be valid UTF-32.
Definition: encoding.hpp:156
lexy::utf32_encoding::int_type
char32_t int_type
Definition: encoding.hpp:159
lexy::utf8_char_encoding::eof
static LEXY_CONSTEVAL int_type eof()
Definition: encoding.hpp:119
lexy::default_encoding::is_secondary_char_type
static constexpr bool is_secondary_char_type()
Definition: encoding.hpp:33
lexy::utf16_encoding::char_type
char16_t char_type
Definition: encoding.hpp:134
lexy::is_text_encoding
constexpr auto is_text_encoding
Definition: encoding.hpp:267
lexy::_deduce_encoding
Definition: encoding.hpp:207
lexy::default_encoding::int_type
int int_type
Definition: encoding.hpp:30
lexy::byte_encoding
An encoding where the input is just raw bytes, not characters.
Definition: encoding.hpp:180
lexy::encoding_endianness::big
@ big
Big endian.
lexy::ascii_encoding::eof
static LEXY_CONSTEVAL int_type eof()
Definition: encoding.hpp:69
lexy::utf8_char_encoding::char_type
char char_type
Definition: encoding.hpp:110
lexy::utf8_encoding::char_type
LEXY_CHAR8_T char_type
Definition: encoding.hpp:86
lexy::utf8_encoding::is_secondary_char_type
static constexpr bool is_secondary_char_type()
Definition: encoding.hpp:90
lexy::utf8_encoding
An encoding where the input is assumed to be valid UTF-8.
Definition: encoding.hpp:84
assert.hpp
lexy::_detail::is_compatible_char_type
constexpr bool is_compatible_char_type
Definition: encoding.hpp:283
lexy::ascii_encoding
Definition: encoding.hpp:58
lexy::_detail::is_ascii
constexpr bool is_ascii(CharT c)
Definition: encoding.hpp:291
lexy::default_encoding::eof
static LEXY_CONSTEVAL int_type eof()
Definition: encoding.hpp:38
lexy::default_encoding
An encoding where the input is some 8bit encoding (ASCII, UTF-8, extended ASCII etc....
Definition: encoding.hpp:27
lexy::encoding_endianness::bom
@ bom
lexy::utf8_char_encoding
An encoding where the input is assumed to be valid UTF-8, but the char type is char.
Definition: encoding.hpp:108
lexy::is_char_encoding
constexpr auto is_char_encoding
Definition: encoding.hpp:273
lexy::utf16_encoding
An encoding where the input is assumed to be valid UTF-16.
Definition: encoding.hpp:132
lexy::utf16_encoding::eof
static LEXY_CONSTEVAL int_type eof()
Definition: encoding.hpp:143
lexy::default_encoding::char_type
char char_type
Definition: encoding.hpp:29
lexy::ascii_encoding::int_type
char int_type
Definition: encoding.hpp:61
lexy::_detail
Definition: any_ref.hpp:12
lexy::ascii_encoding::is_secondary_char_type
static constexpr bool is_secondary_char_type()
Definition: encoding.hpp:64
std
Definition: std.hpp:31
lexyd::byte
constexpr auto byte
Matches an arbitrary byte.
Definition: byte.hpp:133
lexy::utf8_encoding::to_int_type
static constexpr int_type to_int_type(char_type c)
Definition: encoding.hpp:101
lexy::_detail::transcode_int
LEXY_CONSTEVAL auto transcode_int(CharT c) -> typename Encoding::int_type
Definition: encoding.hpp:326
lexy::ascii_encoding::to_int_type
static constexpr int_type to_int_type(char_type c)
Definition: encoding.hpp:77
lexy::utf32_encoding::eof
static LEXY_CONSTEVAL int_type eof()
Definition: encoding.hpp:167
lexy::utf8_char_encoding::is_secondary_char_type
static constexpr bool is_secondary_char_type()
Definition: encoding.hpp:114
lexy::utf8_encoding::int_type
LEXY_CHAR8_T int_type
Definition: encoding.hpp:87
LEXY_ASSERT
#define LEXY_ASSERT(Expr, Msg)
Definition: assert.hpp:37
lexy::is_node_encoding
constexpr auto is_node_encoding
Definition: encoding.hpp:276
LEXY_CHAR8_T
#define LEXY_CHAR8_T
Definition: config.hpp:139


behaviortree_cpp_v4
Author(s): Davide Faconti
autogenerated on Fri Dec 13 2024 03:19:16