encoding.hpp
Go to the documentation of this file.
1 // Copyright (C) 2020-2023 Jonathan Müller and lexy contributors
2 // SPDX-License-Identifier: BSL-1.0
3 
4 #ifndef LEXY_ENCODING_HPP_INCLUDED
5 #define LEXY_ENCODING_HPP_INCLUDED
6 
7 #include <cstdint>
10 
11 //=== encoding classes ===//
12 namespace lexy
13 {
16 {
18  little,
20  big,
23  bom,
24 };
25 
28 {
29  using char_type = char;
30  using int_type = int;
31 
32  template <typename OtherCharType>
33  static constexpr bool is_secondary_char_type()
34  {
35  return false;
36  }
37 
39  {
40  return -1;
41  }
42 
43  static constexpr int_type to_int_type(char_type c)
44  {
45  if constexpr (std::is_unsigned_v<char_type>)
46  // We can just convert it to int directly.
47  return static_cast<int_type>(c);
48  else
49  {
50  // We first need to prevent negative values, by making it unsigned.
51  auto value = static_cast<unsigned char>(c);
52  return static_cast<int_type>(value);
53  }
54  }
55 };
56 
57 // An encoding where the input is assumed to be valid ASCII.
59 {
60  using char_type = char;
61  using int_type = char;
62 
63  template <typename OtherCharType>
64  static constexpr bool is_secondary_char_type()
65  {
66  return false;
67  }
68 
70  {
71  if constexpr (std::is_signed_v<char_type>)
72  return int_type(-1);
73  else
74  return int_type(0xFFu);
75  }
76 
77  static constexpr int_type to_int_type(char_type c)
78  {
79  return int_type(c);
80  }
81 };
82 
85 {
88 
89  template <typename OtherCharType>
90  static constexpr bool is_secondary_char_type()
91  {
92  return std::is_same_v<OtherCharType, char>;
93  }
94 
96  {
97  // 0xFF is not part of valid UTF-8.
98  return int_type(0xFF);
99  }
100 
101  static constexpr int_type to_int_type(char_type c)
102  {
103  return int_type(c);
104  }
105 };
106 
109 {
110  using char_type = char;
111  using int_type = char;
112 
113  template <typename OtherCharType>
114  static constexpr bool is_secondary_char_type()
115  {
116  return std::is_same_v<OtherCharType, LEXY_CHAR8_T>;
117  }
118 
120  {
121  // 0xFF is not part of valid UTF-8.
122  return int_type(0xFF);
123  }
124 
125  static constexpr int_type to_int_type(char_type c)
126  {
127  return int_type(c);
128  }
129 };
130 
133 {
134  using char_type = char16_t;
135  using int_type = std::int_least32_t;
136 
137  template <typename OtherCharType>
138  static constexpr bool is_secondary_char_type()
139  {
140  return sizeof(wchar_t) == sizeof(char16_t) && std::is_same_v<OtherCharType, wchar_t>;
141  }
142 
144  {
145  // Every value of char16_t is valid UTF16.
146  return int_type(-1);
147  }
148 
149  static constexpr int_type to_int_type(char_type c)
150  {
151  return int_type(c);
152  }
153 };
154 
157 {
158  using char_type = char32_t;
159  using int_type = char32_t;
160 
161  template <typename OtherCharType>
162  static constexpr bool is_secondary_char_type()
163  {
164  return sizeof(wchar_t) == sizeof(char32_t) && std::is_same_v<OtherCharType, wchar_t>;
165  }
166 
168  {
169  // The highest unicode code point is U+10'FFFF, so this is never a valid code point.
170  return int_type(0xFFFF'FFFF);
171  }
172 
173  static constexpr int_type to_int_type(char_type c)
174  {
175  return c;
176  }
177 };
178 
180 struct byte_encoding
181 {
182  using char_type = unsigned char;
183  using int_type = int;
184 
185  template <typename OtherCharType>
186  static constexpr bool is_secondary_char_type()
187  {
188  return std::is_same_v<OtherCharType, char> || std::is_same_v<OtherCharType, std::byte>;
189  }
190 
191  static LEXY_CONSTEVAL int_type eof()
192  {
193  return -1;
194  }
195 
196  static constexpr int_type to_int_type(char_type c)
197  {
198  return int_type(c);
199  }
200 };
201 } // namespace lexy
202 
203 //=== deduce_encoding ===//
204 namespace lexy
205 {
206 template <typename CharT>
207 struct _deduce_encoding;
208 template <typename CharT>
209 using deduce_encoding = typename _deduce_encoding<CharT>::type;
210 
211 template <>
212 struct _deduce_encoding<char>
213 {
214 #if defined(LEXY_ENCODING_OF_CHAR)
215  using type = LEXY_ENCODING_OF_CHAR;
216  static_assert(std::is_same_v<type, default_encoding> //
217  || std::is_same_v<type, ascii_encoding> //
218  || std::is_same_v<type, utf8_encoding> //
219  || std::is_same_v<type, utf8_char_encoding>,
220  "invalid value for LEXY_ENCODING_OF_CHAR");
221 #else
222  using type = default_encoding; // Don't know the exact encoding.
223 #endif
224 };
225 
226 #if LEXY_HAS_CHAR8_T
227 template <>
229 {
230  using type = utf8_encoding;
231 };
232 #endif
233 template <>
234 struct _deduce_encoding<char16_t>
235 {
237 };
238 template <>
239 struct _deduce_encoding<char32_t>
240 {
242 };
243 
244 template <>
245 struct _deduce_encoding<unsigned char>
246 {
248 };
249 template <>
251 {
253 };
254 } // namespace lexy
255 
256 //=== impls ===//
257 namespace lexy::_detail
258 {
259 template <typename Encoding, typename CharT>
260 constexpr bool is_compatible_char_type
261  = std::is_same_v<typename Encoding::char_type,
262  CharT> || Encoding::template is_secondary_char_type<CharT>();
263 
264 template <typename Encoding, typename CharT>
266  = std::enable_if_t<Encoding::template is_secondary_char_type<CharT>()>;
267 
268 template <typename CharT>
269 constexpr bool is_ascii(CharT c)
270 {
271  if constexpr (std::is_signed_v<CharT>)
272  return 0 <= c && c <= 0x7F;
273  else
274  return c <= 0x7F;
275 }
276 
277 template <typename TargetCharT, typename CharT>
278 LEXY_CONSTEVAL TargetCharT transcode_char(CharT c)
279 {
280  if constexpr (std::is_same_v<CharT, TargetCharT>)
281  {
282  return c;
283  }
284 #if !LEXY_HAS_CHAR8_T
285  else if constexpr (std::is_same_v<CharT, char> && std::is_same_v<TargetCharT, LEXY_CHAR8_T>)
286  {
287  // If we don't have char8_t, `LEXY_LIT(u8"ä")` would have the type char, not LEXY_CHAR8_T
288  // (which is unsigned char). So we disable checking in that case, to allow such usage. Note
289  // that this prevents catching `LEXY_LIT("ä")`, but there is nothing we can do.
290  return static_cast<LEXY_CHAR8_T>(c);
291  }
292 #endif
293  else
294  {
295  LEXY_ASSERT(is_ascii(c), "character type of string literal didn't match, "
296  "so only ASCII characters are supported");
297  // Note that we don't need to worry about signed/unsigned conversion, all ASCII values are
298  // positive.
299  return static_cast<TargetCharT>(c);
300  }
301 }
302 
303 template <typename Encoding, typename CharT>
304 LEXY_CONSTEVAL auto transcode_int(CharT c) -> typename Encoding::int_type
305 {
306  return Encoding::to_int_type(transcode_char<typename Encoding::char_type>(c));
307 }
308 } // namespace lexy::_detail
309 
310 #endif // LEXY_ENCODING_HPP_INCLUDED
311 
lexy::utf16_encoding::is_secondary_char_type
static constexpr bool is_secondary_char_type()
Definition: encoding.hpp:138
LEXY_CONSTEVAL
#define LEXY_CONSTEVAL
Definition: config.hpp:90
lexy::encoding_endianness
encoding_endianness
The endianness used by an encoding.
Definition: encoding.hpp:15
lexy::encoding_endianness::little
@ little
Little endian.
lexy::utf8_encoding::eof
static LEXY_CONSTEVAL int_type eof()
Definition: encoding.hpp:95
lexy::_detail::transcode_char
LEXY_CONSTEVAL TargetCharT transcode_char(CharT c)
Definition: encoding.hpp:278
lexy::_detail::require_secondary_char_type
std::enable_if_t< Encoding::template is_secondary_char_type< CharT >()> require_secondary_char_type
Definition: encoding.hpp:266
magic_enum::char_type
string_view::value_type char_type
Definition: magic_enum.hpp:145
lexy::utf32_encoding::is_secondary_char_type
static constexpr bool is_secondary_char_type()
Definition: encoding.hpp:162
config.hpp
lexy::default_encoding::to_int_type
static constexpr int_type to_int_type(char_type c)
Definition: encoding.hpp:43
lexy::utf8_char_encoding::int_type
char int_type
Definition: encoding.hpp:111
lexy::ascii_encoding::char_type
char char_type
Definition: encoding.hpp:60
lexy::utf32_encoding::char_type
char32_t char_type
Definition: encoding.hpp:158
lexy::utf16_encoding::int_type
std::int_least32_t int_type
Definition: encoding.hpp:135
lexy::utf16_encoding::to_int_type
static constexpr int_type to_int_type(char_type c)
Definition: encoding.hpp:149
lexy
Definition: any_ref.hpp:12
lexy::utf8_char_encoding::to_int_type
static constexpr int_type to_int_type(char_type c)
Definition: encoding.hpp:125
lexy::utf32_encoding
An encoding where the input is assumed to be valid UTF-32.
Definition: encoding.hpp:156
lexy::utf32_encoding::int_type
char32_t int_type
Definition: encoding.hpp:159
lexy::utf8_char_encoding::eof
static LEXY_CONSTEVAL int_type eof()
Definition: encoding.hpp:119
lexy::default_encoding::is_secondary_char_type
static constexpr bool is_secondary_char_type()
Definition: encoding.hpp:33
lexy::utf16_encoding::char_type
char16_t char_type
Definition: encoding.hpp:134
lexy::_deduce_encoding
Definition: encoding.hpp:207
lexy::default_encoding::int_type
int int_type
Definition: encoding.hpp:30
lexy::byte_encoding
An encoding where the input is just raw bytes, not characters.
Definition: encoding.hpp:180
lexy::encoding_endianness::big
@ big
Big endian.
lexy::ascii_encoding::eof
static LEXY_CONSTEVAL int_type eof()
Definition: encoding.hpp:69
lexy::utf8_char_encoding::char_type
char char_type
Definition: encoding.hpp:110
lexy::utf8_encoding::char_type
LEXY_CHAR8_T char_type
Definition: encoding.hpp:86
lexy::utf8_encoding::is_secondary_char_type
static constexpr bool is_secondary_char_type()
Definition: encoding.hpp:90
lexy::utf8_encoding
An encoding where the input is assumed to be valid UTF-8.
Definition: encoding.hpp:84
assert.hpp
lexy::_detail::is_compatible_char_type
constexpr bool is_compatible_char_type
Definition: encoding.hpp:261
lexy::ascii_encoding
Definition: encoding.hpp:58
lexy::_detail::is_ascii
constexpr bool is_ascii(CharT c)
Definition: encoding.hpp:269
lexy::default_encoding::eof
static LEXY_CONSTEVAL int_type eof()
Definition: encoding.hpp:38
lexy::default_encoding
An encoding where the input is some 8bit encoding (ASCII, UTF-8, extended ASCII etc....
Definition: encoding.hpp:27
lexy::encoding_endianness::bom
@ bom
lexy::utf8_char_encoding
An encoding where the input is assumed to be valid UTF-8, but the char type is char.
Definition: encoding.hpp:108
lexy::utf16_encoding
An encoding where the input is assumed to be valid UTF-16.
Definition: encoding.hpp:132
lexy::utf16_encoding::eof
static LEXY_CONSTEVAL int_type eof()
Definition: encoding.hpp:143
lexy::default_encoding::char_type
char char_type
Definition: encoding.hpp:29
lexy::ascii_encoding::int_type
char int_type
Definition: encoding.hpp:61
lexy::_detail
Definition: any_ref.hpp:12
lexy::ascii_encoding::is_secondary_char_type
static constexpr bool is_secondary_char_type()
Definition: encoding.hpp:64
std
Definition: std.hpp:30
lexyd::byte
constexpr auto byte
Matches an arbitrary byte.
Definition: byte.hpp:51
lexy::utf8_encoding::to_int_type
static constexpr int_type to_int_type(char_type c)
Definition: encoding.hpp:101
lexy::_detail::transcode_int
LEXY_CONSTEVAL auto transcode_int(CharT c) -> typename Encoding::int_type
Definition: encoding.hpp:304
lexy::ascii_encoding::to_int_type
static constexpr int_type to_int_type(char_type c)
Definition: encoding.hpp:77
lexy::utf32_encoding::eof
static LEXY_CONSTEVAL int_type eof()
Definition: encoding.hpp:167
lexy::utf8_char_encoding::is_secondary_char_type
static constexpr bool is_secondary_char_type()
Definition: encoding.hpp:114
lexy::utf8_encoding::int_type
LEXY_CHAR8_T int_type
Definition: encoding.hpp:87
LEXY_ASSERT
#define LEXY_ASSERT(Expr, Msg)
Definition: assert.hpp:37
LEXY_CHAR8_T
#define LEXY_CHAR8_T
Definition: config.hpp:116


behaviortree_cpp_v4
Author(s): Davide Faconti
autogenerated on Fri Jun 28 2024 02:20:07