4 #ifndef LEXY_CODE_POINT_HPP_INCLUDED
5 #define LEXY_CODE_POINT_HPP_INCLUDED
11 #ifndef LEXY_HAS_UNICODE_DATABASE
12 # define LEXY_HAS_UNICODE_DATABASE 0
15 #if LEXY_HAS_UNICODE_DATABASE
16 # define LEXY_UNICODE_CONSTEXPR constexpr
18 # define LEXY_UNICODE_CONSTEXPR
30 constexpr
auto value() const noexcept
40 constexpr
bool is_bmp() const noexcept
46 return _value <= 0x10
'FFFF;
49 constexpr bool is_control() const noexcept
51 return _value <= 0x1F || (0x7F <= _value && _value <= 0x9F);
53 constexpr bool is_surrogate() const noexcept
55 return 0xD800 <= _value && _value <= 0xDFFF;
57 constexpr bool is_private_use() const noexcept
59 return (0xE000 <= _value && _value <= 0xF8FF)
63 constexpr bool is_noncharacter() const noexcept
65 // Contiguous range of 32 non-characters.
66 if (0xFDD0 <= _value && _value <= 0xFDEF)
69 // Last two code points of every plane.
70 auto in_plane = _value & 0xFFFF;
71 return in_plane == 0xFFFE || in_plane == 0xFFFF;
74 constexpr bool is_scalar() const noexcept
76 return is_valid() && !is_surrogate();
79 //=== general category ===//
80 enum general_category_t
82 // NOLINTNEXTLINE: can't use parentheses here
83 #define LEXY_UNICODE_CATEGORY(Short, Long) Short, Long = Short
122 #undef LEXY_UNICODE_CATEGORY
132 return ((
cat == Cats) || ...);
136 return ((
cat == Cats) || ...);
149 #define LEXY_UNICODE_CATEGORY_GROUP(Name, Short, Long, ...) \
150 static constexpr _gc_group<__VA_ARGS__> Short{"code-point." Name}; \
151 static constexpr _gc_group<__VA_ARGS__> Long = Short
162 #undef LEXY_UNICODE_CATEGORY_GROUP
169 return lhs._value == rhs._value;
173 return lhs._value != rhs._value;
189 case lexy::code_point::Lu:
190 return "code-point.uppercase-letter";
191 case lexy::code_point::Ll:
192 return "code-point.lowercase-letter";
193 case lexy::code_point::Lt:
194 return "code-point.titlecase-letter";
195 case lexy::code_point::Lm:
196 return "code-point.modifier-letter";
197 case lexy::code_point::Lo:
198 return "code-point.other-letter";
200 case lexy::code_point::Mn:
201 return "code-point.nonspacing-mark";
202 case lexy::code_point::Mc:
203 return "code-point.combining-mark";
204 case lexy::code_point::Me:
205 return "code-point.enclosing-mark";
207 case lexy::code_point::Nd:
208 return "code-point.decimal-number";
209 case lexy::code_point::Nl:
210 return "code-point.letter-number";
211 case lexy::code_point::No:
212 return "code-point.other-number";
214 case lexy::code_point::Pc:
215 return "code-point.connector-punctuation";
216 case lexy::code_point::Pd:
217 return "code-point.dash-punctuation";
218 case lexy::code_point::Ps:
219 return "code-point.open-punctuation";
220 case lexy::code_point::Pe:
221 return "code-point.close-punctuation";
222 case lexy::code_point::Pi:
223 return "code-point.initial-quote-punctuation";
224 case lexy::code_point::Pf:
225 return "code-point.final-quote-punctuation";
226 case lexy::code_point::Po:
227 return "code-point.other-punctuation";
229 case lexy::code_point::Sm:
230 return "code-point.math-symbol";
231 case lexy::code_point::Sc:
232 return "code-point.currency-symbol";
233 case lexy::code_point::Sk:
234 return "code-point.modifier-symbol";
235 case lexy::code_point::So:
236 return "code-point.other-symbol";
238 case lexy::code_point::Zs:
239 return "code-point.space-separator";
240 case lexy::code_point::Zl:
241 return "code-point.line-separator";
242 case lexy::code_point::Zp:
243 return "code-point.paragraph-separator";
245 case lexy::code_point::Cc:
246 return "code-point.control";
247 case lexy::code_point::Cf:
248 return "code-point.format";
249 case lexy::code_point::Cs:
250 return "code-point.surrogate";
251 case lexy::code_point::Co:
252 return "code-point.private-use";
253 case lexy::code_point::Cn:
254 return "code-point.not-assigned";
261 #if LEXY_HAS_UNICODE_DATABASE
267 return general_category_t::unassigned;
280 return code_point(char32_t(std::int_least32_t(cp.value()) + offset));
288 constexpr
auto mask = ((1 << Props) | ...);
292 return (props & mask) != 0;
296 # define LEXY_UNICODE_PROPERTY(Name) ::lexy::_unicode_db::Name
301 template <
int... Props>
305 # define LEXY_UNICODE_PROPERTY(Name) 0
309 #endif // LEXY_CODE_POINT_HPP_INCLUDED