52 #define UNI_SUR_HIGH_START (UTF32)0xD800
53 #define UNI_SUR_HIGH_END (UTF32)0xDBFF
54 #define UNI_SUR_LOW_START (UTF32)0xDC00
55 #define UNI_SUR_LOW_END (UTF32)0xDFFF
62 const UTF32** sourceStart,
const UTF32* sourceEnd,
67 while (
source < sourceEnd) {
94 if (
target + 1 >= targetEnd) {
111 const UTF16** sourceStart,
const UTF16* sourceEnd,
117 while (
source < sourceEnd) {
148 if (
target >= targetEnd) {
158 fprintf(
stderr,
"ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
175 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
176 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
177 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
178 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
179 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
180 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
181 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
182 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
191 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
215 const UTF16** sourceStart,
const UTF16* sourceEnd,
220 while (
source < sourceEnd) {
222 unsigned short bytesToWrite = 0;
223 const UTF32 byteMask = 0xBF;
224 const UTF32 byteMark = 0x80;
256 if (ch < (
UTF32)0x80) { bytesToWrite = 1;
257 }
else if (ch < (
UTF32)0x800) { bytesToWrite = 2;
258 }
else if (ch < (
UTF32)0x10000) { bytesToWrite = 3;
259 }
else if (ch < (
UTF32)0x110000) { bytesToWrite = 4;
260 }
else { bytesToWrite = 3;
269 switch (bytesToWrite) {
270 case 4: *--
target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
271 case 3: *--
target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
272 case 2: *--
target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
299 default:
return false;
301 case 4:
if ((
a = (*--srcptr)) < 0x80 ||
a > 0xBF)
return false;
302 case 3:
if ((
a = (*--srcptr)) < 0x80 ||
a > 0xBF)
return false;
303 case 2:
if ((
a = (*--srcptr)) > 0xBF)
return false;
307 case 0xE0:
if (
a < 0xA0)
return false;
break;
308 case 0xED:
if (
a > 0x9F)
return false;
break;
309 case 0xF0:
if (
a < 0x90)
return false;
break;
310 case 0xF4:
if (
a > 0x8F)
return false;
break;
311 default:
if (
a < 0x80)
return false;
314 case 1:
if (*
source >= 0x80 && *
source < 0xC2)
return false;
316 if (*
source > 0xF4)
return false;
337 const UTF8** sourceStart,
const UTF8* sourceEnd,
342 while (
source < sourceEnd) {
345 if (
source + extraBytesToRead >= sourceEnd) {
356 switch (extraBytesToRead) {
357 case 5: ch += *
source++; ch <<= 6;
358 case 4: ch += *
source++; ch <<= 6;
359 case 3: ch += *
source++; ch <<= 6;
360 case 2: ch += *
source++; ch <<= 6;
361 case 1: ch += *
source++; ch <<= 6;
366 if (
target >= targetEnd) {
367 source -= (extraBytesToRead+1);
374 source -= (extraBytesToRead+1);
386 source -= (extraBytesToRead+1);
393 if (
target + 1 >= targetEnd) {
394 source -= (extraBytesToRead+1);
410 const UTF32** sourceStart,
const UTF32* sourceEnd,
415 while (
source < sourceEnd) {
417 unsigned short bytesToWrite = 0;
418 const UTF32 byteMask = 0xBF;
419 const UTF32 byteMark = 0x80;
433 if (ch < (
UTF32)0x80) { bytesToWrite = 1;
434 }
else if (ch < (
UTF32)0x800) { bytesToWrite = 2;
435 }
else if (ch < (
UTF32)0x10000) { bytesToWrite = 3;
437 }
else { bytesToWrite = 3;
447 switch (bytesToWrite) {
448 case 4: *--
target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
449 case 3: *--
target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
450 case 2: *--
target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
463 const UTF8** sourceStart,
const UTF8* sourceEnd,
468 while (
source < sourceEnd) {
471 if (
source + extraBytesToRead >= sourceEnd) {
482 switch (extraBytesToRead) {
483 case 5: ch += *
source++; ch <<= 6;
484 case 4: ch += *
source++; ch <<= 6;
485 case 3: ch += *
source++; ch <<= 6;
486 case 2: ch += *
source++; ch <<= 6;
487 case 1: ch += *
source++; ch <<= 6;
492 if (
target >= targetEnd) {
493 source -= (extraBytesToRead+1);
503 source -= (extraBytesToRead+1);