242 #define UTF8_ACCEPT 0u 244 static const unsigned char utf8d[] = {
247 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
248 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
249 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
250 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
251 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
252 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
253 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
254 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
258 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
259 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
260 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
261 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
262 12,36,12,12,12,12,12,12,12,12,12,12,
265 static unsigned inline decode(
unsigned* state,
unsigned* codep,
unsigned byte) {
266 unsigned type =
utf8d[byte];
269 (byte & 0x3fu) | (*codep << 6) :
270 (0xffu >> type) & (byte);
272 *state =
utf8d[256 + *state + type];
287 for (
const unsigned* range =
kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
288 for (
unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
295 unsigned decodedCodepoint = 0;
298 unsigned decodedCount = 0;
299 for (
const char*
s = encodedStr; *
s; ++
s)
300 if (!
decode(&state, &decodedCodepoint, static_cast<unsigned char>(*
s))) {
301 EXPECT_EQ(codepoint, decodedCodepoint);
306 EXPECT_EQ(1u, decodedCount);
311 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
317 unsigned decodedCodepoint;
320 EXPECT_EQ(codepoint, decodedCodepoint);
321 if (!result || codepoint != decodedCodepoint)
322 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
340 for (
const unsigned* range =
kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
341 for (
unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
354 unsigned decodedCodepoint = 0;
358 if (!
decode(&state, &decodedCodepoint, static_cast<unsigned char>(*
s)))
362 if (codepoint <= 0xFFFF)
366 *p++ =
static_cast<UTF16<>::Ch>(0xD7C0 + (decodedCodepoint >> 10));
367 *p++ =
static_cast<UTF16<>::Ch>(0xDC00 + (decodedCodepoint & 0x3FF));
371 EXPECT_EQ(0,
StrCmp(buffer, encodedStr));
377 unsigned decodedCodepoint;
380 EXPECT_EQ(codepoint, decodedCodepoint);
381 if (!result || codepoint != decodedCodepoint)
382 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
399 for (
const unsigned* range =
kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
400 for (
unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
408 unsigned decodedCodepoint;
411 EXPECT_EQ(codepoint, decodedCodepoint);
412 if (!result || codepoint != decodedCodepoint)
413 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
430 for (
unsigned codepoint = 0; codepoint < 128; codepoint++) {
436 unsigned decodedCodepoint;
438 if (!result || codepoint != decodedCodepoint)
439 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
static bool Decode(InputStream &is, unsigned *codepoint)
const Ch * GetString() const
static void Encode(OutputStream &os, unsigned codepoint)
static bool Validate(InputStream &is, OutputStream &os)
static bool Validate(InputStream &is, OutputStream &os)
TEST(EncodingsTest, UTF8)
static bool Validate(InputStream &is, OutputStream &os)
static bool Decode(InputStream &is, unsigned *codepoint)
static const unsigned char utf8d[]
static const unsigned kCodepointRanges[]
static void Encode(OutputStream &os, unsigned codepoint)
int StrCmp(const Ch *s1, const Ch *s2)
static void Encode(OutputStream &os, unsigned codepoint)
static bool Decode(InputStream &is, unsigned *codepoint)
static unsigned decode(unsigned *state, unsigned *codep, unsigned byte)
static bool Decode(InputStream &is, unsigned *codepoint)
static void Encode(OutputStream &os, unsigned codepoint)
static bool Validate(InputStream &is, OutputStream &os)