00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00015
00016
00017 #include "pcl/surface/3rdparty/opennurbs/opennurbs.h"
00018
00019 int ON_IsValidUnicodeCodePoint(ON__UINT32 u)
00020 {
00021 return ( u < 0xD800 || (u >= 0xE000 && u <= 0x10FFFF) );
00022 }
00023
00024 int ON_EncodeUTF8( ON__UINT32 u, ON__UINT8 sUTF8[6] )
00025 {
00026 ON__UINT32 c;
00027
00028 if ( u <= 0x7F )
00029 {
00030
00031 sUTF8[0] = (ON__UINT8)u;
00032 return 1;
00033 }
00034
00035 if ( u <= 0x7FF )
00036 {
00037
00038 c = (u / 0x40);
00039 c |= 0xC0;
00040 sUTF8[0] = (ON__UINT8)c;
00041 c = (u & 0x3F);
00042 c |= 0x80;
00043 sUTF8[1] = (ON__UINT8)c;
00044 return 2;
00045 }
00046
00047 if ( u <= 0xFFFF )
00048 {
00049
00050 c = (u / 0x1000);
00051 c |= 0xE0;
00052 sUTF8[0] = (ON__UINT8)c;
00053 c = ((u & 0xFFF) / 0x40);
00054 c |= 0x80;
00055 sUTF8[1] = (ON__UINT8)c;
00056 c = u & 0x3F;
00057 c |= 0x80;
00058 sUTF8[2] = (ON__UINT8)c;
00059 return 3;
00060 }
00061
00062 if ( u <= 0x1FFFFF )
00063 {
00064
00065 c = (u / 0x40000);
00066 c |= 0xF0;
00067 sUTF8[0] = (ON__UINT8)c;
00068 c = ((u & 0x3FFFF)/0x1000);
00069 c |= 0x80;
00070 sUTF8[1] = (ON__UINT8)c;
00071 c = ((u & 0xFFF) / 0x40);
00072 c |= 0x80;
00073 sUTF8[2] = (ON__UINT8)c;
00074 c = u & 0x3F;
00075 c |= 0x80;
00076 sUTF8[3] = (ON__UINT8)c;
00077 return 4;
00078 }
00079
00080 if ( u <= 0x3FFFFFF )
00081 {
00082
00083 c = (u / 0xFFFFFF);
00084 c |= 0xF8;
00085 sUTF8[0] = (ON__UINT8)c;
00086 c = ((u & 0xFFFFFF)/0x40000);
00087 c |= 0x80;
00088 sUTF8[1] = (ON__UINT8)c;
00089 c = ((u & 0x3FFFF)/0x1000);
00090 c |= 0x80;
00091 sUTF8[2] = (ON__UINT8)c;
00092 c = ((u & 0xFFF) / 0x40);
00093 c |= 0x80;
00094 sUTF8[3] = (ON__UINT8)c;
00095 c = u & 0x3F;
00096 c |= 0x80;
00097 sUTF8[4] = (ON__UINT8)c;
00098 return 5;
00099 }
00100
00101 if ( u <= 0x7FFFFFFF )
00102 {
00103
00104 c = (u / 0x40000000);
00105 c |= 0xFC;
00106 sUTF8[0] = (ON__UINT8)c;
00107 c = ((u & 0x3FFFFFFF)/0x1000000);
00108 c |= 0x80;
00109 sUTF8[1] = (ON__UINT8)c;
00110 c = ((u & 0xFFFFFF)/0x40000);
00111 c |= 0x80;
00112 sUTF8[2] = (ON__UINT8)c;
00113 c = ((u & 0x3FFFF)/0x1000);
00114 c |= 0x80;
00115 sUTF8[3] = (ON__UINT8)c;
00116 c = ((u & 0xFFF) / 0x40);
00117 c |= 0x80;
00118 sUTF8[4] = (ON__UINT8)c;
00119 c = u & 0x3F;
00120 c |= 0x80;
00121 sUTF8[5] = (ON__UINT8)c;
00122 return 6;
00123 }
00124
00125 return 0;
00126 }
00127
00128 static int ON_DecodeUTF8Helper(
00129 const ON__UINT8* sUTF8,
00130 int sUTF8_count,
00131 ON__UINT32* value,
00132 unsigned int* error_status
00133 )
00134 {
00135 #define INPUT_BUFFER_TOO_SHORT 16
00136 #define INVALID_CONTINUATION_VALUE 16
00137 #define OVERLONG_ENCODING 8
00138
00139 ON__UINT32 u;
00140 ON__UINT8 c;
00141
00142 c = sUTF8[0];
00143
00144 if ( 0 == (0x80 & c) )
00145 {
00146
00147 *value = c;
00148 return 1;
00149 }
00150
00151 if ( 0xC0 == ( 0xE0 & c) )
00152 {
00153
00154 if ( sUTF8_count < 2 )
00155 {
00156 *error_status |= INPUT_BUFFER_TOO_SHORT;
00157 return 0;
00158 }
00159 u = (0x1F & c);
00160 c = sUTF8[1];
00161 if ( 0x80 != ( 0xC0 & c) )
00162 {
00163 *error_status |= INVALID_CONTINUATION_VALUE;
00164 return 0;
00165 }
00166 u *= 64;
00167 u |= (0x3F & c);
00168 if ( u <= 0x7F )
00169 {
00170 *error_status |= OVERLONG_ENCODING;
00171 }
00172 *value = u;
00173 return 2;
00174 }
00175
00176 if ( 0xE0 == ( 0xF0 & c) )
00177 {
00178
00179 if ( sUTF8_count < 3 )
00180 {
00181 *error_status |= INPUT_BUFFER_TOO_SHORT;
00182 return 0;
00183 }
00184 u = (0x0F & c);
00185 c = sUTF8[1];
00186 if ( 0x80 != ( 0xC0 & c) )
00187 {
00188 *error_status |= INVALID_CONTINUATION_VALUE;
00189 return 0;
00190 }
00191 u *= 64;
00192 u |= (0x3F & c);
00193 c = sUTF8[2];
00194 if ( 0x80 != ( 0xC0 & c) )
00195 {
00196 *error_status |= INVALID_CONTINUATION_VALUE;
00197 return 0;
00198 }
00199 u *= 64;
00200 u |= (0x3F & c);
00201 if ( u <= 0x7FF )
00202 {
00203 *error_status |= OVERLONG_ENCODING;
00204 }
00205 *value = u;
00206 return 3;
00207 }
00208
00209 if ( 0xF0 == ( 0xF8 & c) )
00210 {
00211
00212 if ( sUTF8_count < 4 )
00213 {
00214 *error_status |= INPUT_BUFFER_TOO_SHORT;
00215 return 0;
00216 }
00217
00218 u = (0x07 & c);
00219 c = sUTF8[1];
00220 if ( 0x80 != ( 0xC0 & c) )
00221 {
00222 *error_status |= INVALID_CONTINUATION_VALUE;
00223 return 0;
00224 }
00225 u *= 64;
00226 u |= (0x3F & c);
00227 c = sUTF8[2];
00228 if ( 0x80 != ( 0xC0 & c) )
00229 {
00230 *error_status |= INVALID_CONTINUATION_VALUE;
00231 return 0;
00232 }
00233 u *= 64;
00234 u |= (0x3F & c);
00235 c = sUTF8[3];
00236 if ( 0x80 != ( 0xC0 & c) )
00237 {
00238 *error_status |= INVALID_CONTINUATION_VALUE;
00239 return 0;
00240 }
00241 u *= 64;
00242 u |= (0x3F & c);
00243 if ( u <= 0xFFFF )
00244 {
00245 *error_status |= OVERLONG_ENCODING;
00246 }
00247 *value = u;
00248 return 4;
00249 }
00250
00251 if ( 0xF8 == ( 0xFC & c) )
00252 {
00253
00254 if ( sUTF8_count < 5 )
00255 {
00256 *error_status |= INPUT_BUFFER_TOO_SHORT;
00257 return 0;
00258 }
00259
00260 u = (0x03 & c);
00261 c = sUTF8[1];
00262 if ( 0x80 != ( 0xC0 & c) )
00263 {
00264 *error_status |= INVALID_CONTINUATION_VALUE;
00265 return 0;
00266 }
00267 u *= 64;
00268 u |= (0x3F & c);
00269 c = sUTF8[2];
00270 if ( 0x80 != ( 0xC0 & c) )
00271 {
00272 *error_status |= INVALID_CONTINUATION_VALUE;
00273 return 0;
00274 }
00275 u *= 64;
00276 u |= (0x3F & c);
00277 c = sUTF8[3];
00278 if ( 0x80 != ( 0xC0 & c) )
00279 {
00280 *error_status |= INVALID_CONTINUATION_VALUE;
00281 return 0;
00282 }
00283 u *= 64;
00284 u |= (0x3F & c);
00285 c = sUTF8[4];
00286 if ( 0x80 != ( 0xC0 & c) )
00287 {
00288 *error_status |= INVALID_CONTINUATION_VALUE;
00289 return 0;
00290 }
00291 u *= 64;
00292 u |= (0x3F & c);
00293 if ( u <= 0x1FFFFF )
00294 {
00295 *error_status |= OVERLONG_ENCODING;
00296 }
00297 *value = u;
00298 return 5;
00299 }
00300
00301 if ( 0xFC == ( 0xFE & c) )
00302 {
00303
00304 if ( sUTF8_count < 6 )
00305 {
00306 *error_status |= INPUT_BUFFER_TOO_SHORT;
00307 return 0;
00308 }
00309
00310 u = (0x01 & c);
00311 c = sUTF8[1];
00312 if ( 0x80 != ( 0xC0 & c) )
00313 {
00314 *error_status |= INVALID_CONTINUATION_VALUE;
00315 return 0;
00316 }
00317 u *= 64;
00318 u |= (0x3F & c);
00319 c = sUTF8[2];
00320 if ( 0x80 != ( 0xC0 & c) )
00321 {
00322 *error_status |= INVALID_CONTINUATION_VALUE;
00323 return 0;
00324 }
00325 u *= 64;
00326 u |= (0x3F & c);
00327 c = sUTF8[3];
00328 if ( 0x80 != ( 0xC0 & c) )
00329 {
00330 *error_status |= INVALID_CONTINUATION_VALUE;
00331 return 0;
00332 }
00333 u *= 64;
00334 u |= (0x3F & c);
00335 c = sUTF8[4];
00336 if ( 0x80 != ( 0xC0 & c) )
00337 {
00338 *error_status |= INVALID_CONTINUATION_VALUE;
00339 return 0;
00340 }
00341 u *= 64;
00342 u |= (0x3F & c);
00343 c = sUTF8[5];
00344 if ( 0x80 != ( 0xC0 & c) )
00345 {
00346 *error_status |= INVALID_CONTINUATION_VALUE;
00347 return 0;
00348 }
00349 u *= 64;
00350 u |= (0x3F & c);
00351 if ( u <= 0x3FFFFFF )
00352 {
00353 *error_status |= OVERLONG_ENCODING;
00354 }
00355 *value = u;
00356 return 6;
00357 }
00358
00359 *error_status |= INVALID_CONTINUATION_VALUE;
00360 return 0;
00361
00362 #undef INPUT_BUFFER_TOO_SHORT
00363 #undef INVALID_CONTINUATION_VALUE
00364 #undef OVERLONG_ENCODING
00365 }
00366
00367 int ON_DecodeUTF8(
00368 const ON__UINT8* sUTF8,
00369 int sUTF8_count,
00370 struct ON_UnicodeErrorParameters* e,
00371 ON__UINT32* unicode_code_point
00372 )
00373 {
00374 ON__UINT32 u0, u1;
00375 int i0, i1;
00376 unsigned int error_status;
00377 ON__UINT16 sUTF16[2];
00378 ON__UINT8 c;
00379
00380 if ( 0 == sUTF8 || sUTF8_count <= 0 || 0 == unicode_code_point )
00381 return 0;
00382
00383
00384
00385
00386 if ( 0 == (0x80 & sUTF8[0]) )
00387 {
00388 *unicode_code_point = sUTF8[0];
00389 return 1;
00390 }
00391
00392 c = sUTF8[0];
00393 if ( 0xC0 == ( 0xE0 & c) && sUTF8_count >= 2 )
00394 {
00395
00396 u0 = (0x1F & c);
00397 c = sUTF8[1];
00398 if ( 0x80 == ( 0xC0 & c) )
00399 {
00400 u0 *= 64;
00401 u0 |= (0x3F & c);
00402 if ( u0 > 0x7F )
00403 {
00404 *unicode_code_point = u0;
00405 return 2;
00406 }
00407 }
00408 }
00409 else if ( 0xE0 == ( 0xF0 & c) && sUTF8_count >= 3 )
00410 {
00411
00412 u0 = (0x0F & c);
00413 c = sUTF8[1];
00414 if ( 0x80 == ( 0xC0 & c) )
00415 {
00416 u0 *= 64;
00417 u0 |= (0x3F & c);
00418 c = sUTF8[2];
00419 if ( 0x80 == ( 0xC0 & c) )
00420 {
00421 u0 *= 64;
00422 u0 |= (0x3F & c);
00423 if ( u0 >= 0x0800 && (u0 <= 0xD800 || u0 >= 0xE000) )
00424 {
00425 *unicode_code_point = u0;
00426 return 3;
00427 }
00428 }
00429 }
00430 }
00431 else if ( 0xF0 == ( 0xF8 & c) && sUTF8_count >= 4 )
00432 {
00433
00434 u0 = (0x07 & c);
00435 c = sUTF8[1];
00436 if ( 0x80 == ( 0xC0 & c) )
00437 {
00438 u0 *= 64;
00439 u0 |= (0x3F & c);
00440 c = sUTF8[2];
00441 if ( 0x80 == ( 0xC0 & c) )
00442 {
00443 u0 *= 64;
00444 u0 |= (0x3F & c);
00445 c = sUTF8[3];
00446 if ( 0x80 == ( 0xC0 & c) )
00447 {
00448 u0 *= 64;
00449 u0 |= (0x3F & c);
00450 if ( u0 >= 0x010000 && u0 <= 0x10FFFF )
00451 {
00452 *unicode_code_point = u0;
00453 return 4;
00454 }
00455 }
00456 }
00457 }
00458 }
00459
00460
00461 error_status = 0;
00462 u0 = 0xFFFFFFFF;
00463 i0 = ON_DecodeUTF8Helper(sUTF8,sUTF8_count,&u0,&error_status);
00464 if ( i0 > 0 && 0 == error_status && (u0 < 0xD800 || (u0 >= 0xE000 && u0 <= 0x10FFFF) ) )
00465 {
00466
00467 *unicode_code_point = u0;
00468 return i0;
00469 }
00470
00471
00472 if ( 0 == e )
00473 {
00474
00475 return 0;
00476 }
00477
00478
00479 e->m_error_status |= error_status;
00480
00481 if ( error_status != (error_status & e->m_error_mask) )
00482 {
00483
00484 return 0;
00485 }
00486
00487 if ( i0 <= 0 )
00488 {
00489 i0 = 1;
00490 if ( ON_IsValidUnicodeCodePoint(e->m_error_code_point) )
00491 {
00492
00493 for ( ; i0 < sUTF8_count; i0++ )
00494 {
00495
00496
00497 c = sUTF8[i0];
00498 if ( 0 == (0x80 & c)
00499 || 0xC0 == ( 0xE0 & c)
00500 || 0xE0 == ( 0xF0 & c)
00501 || 0xF0 == ( 0xF8 & c)
00502 || 0xF8 == ( 0xFC & c)
00503 || 0xFC == ( 0xFE & c)
00504 )
00505 {
00506
00507 break;
00508 }
00509 }
00510 *unicode_code_point = e->m_error_code_point;
00511 }
00512 return i0;
00513 }
00514
00515 if ( ON_IsValidUnicodeCodePoint(u0) && 8 == error_status )
00516 {
00517
00518 *unicode_code_point = u0;
00519 return i0;
00520 }
00521
00522 if ( i0 < sUTF8_count
00523 && u0 >= 0xD800 && u0 <= 0xDBFF
00524 && (0 == error_status || 8 == error_status)
00525 && 0 != (4 & e->m_error_mask)
00526 )
00527 {
00528
00529
00530 u1 = 0xFFFFFFFF;
00531 i1 = ON_DecodeUTF8Helper(sUTF8+i0,sUTF8_count-i0,&u1,&error_status);
00532 if ( i1 > 0 && (0 == error_status || 8 == error_status) )
00533 {
00534 error_status = 0;
00535 sUTF16[0] = (ON__UINT16)u0;
00536 sUTF16[1] = (ON__UINT16)u1;
00537 u0 = 0xFFFFFFFF;
00538 if ( 2 == ON_ConvertUTF16ToUTF32(false,sUTF16,2,&u0,1,&error_status,0,0,0)
00539 && 0 == error_status
00540 && ON_IsValidUnicodeCodePoint(u0)
00541 )
00542 {
00543 *unicode_code_point = u0;
00544 e->m_error_status |= 4;
00545 return i0+i1;
00546 }
00547 }
00548 }
00549
00550 if ( ON_IsValidUnicodeCodePoint(e->m_error_code_point) )
00551 {
00552 *unicode_code_point = e->m_error_code_point;
00553 return i0;
00554 }
00555
00556 return 0;
00557 }
00558
00559 int ON_EncodeUTF16( ON__UINT32 unicode_code_point, ON__UINT16 sUTF16[2] )
00560 {
00561
00562 if ( unicode_code_point < 0xD800 )
00563 {
00564
00565
00566 sUTF16[0] = (ON__UINT16)unicode_code_point;
00567 return 1;
00568 }
00569
00570 if ( unicode_code_point < 0xE000 )
00571 {
00572
00573 return 0;
00574 }
00575
00576 if ( unicode_code_point <= 0xFFFF )
00577 {
00578
00579
00580 sUTF16[0] = (ON__UINT16)unicode_code_point;
00581 return 1;
00582 }
00583
00584 if ( unicode_code_point <= 0x10FFFF )
00585 {
00586
00587
00588 unicode_code_point -= 0x10000;
00589 sUTF16[0] = (ON__UINT16)(0xD800 + (unicode_code_point / 0x400));
00590 sUTF16[1] = (ON__UINT16)(0xDC00 + (unicode_code_point & 0x3FF));
00591 return 2;
00592 }
00593
00594
00595 return 0;
00596 }
00597
00598 int ON_DecodeUTF16(
00599 const ON__UINT16* sUTF16,
00600 int sUTF16_count,
00601 struct ON_UnicodeErrorParameters* e,
00602 ON__UINT32* unicode_code_point
00603 )
00604 {
00605 ON__UINT32 uhi, ulo;
00606
00607 if ( 0 == sUTF16 || sUTF16_count <= 0 || 0 == unicode_code_point )
00608 return 0;
00609
00610
00611 if ( ( sUTF16[0] < 0xD800 ) || ( sUTF16[0] >= 0xE000 ) )
00612 {
00613 *unicode_code_point = sUTF16[0];
00614 return 1;
00615 }
00616
00617 if ( sUTF16_count >= 2 && sUTF16[0] < 0xDC00 && sUTF16[1] >= 0xDC00 && sUTF16[1] < 0xE000 )
00618 {
00619
00620 uhi = sUTF16[0];
00621 ulo = sUTF16[1];
00622 *unicode_code_point = (uhi-0xD800)*0x400 + (ulo-0xDC00) + 0x10000;
00623 return 2;
00624 }
00625
00626
00627 if ( 0 == e )
00628 {
00629
00630 return 0;
00631 }
00632
00633
00634 e->m_error_status |= 16;
00635
00636 if ( 16 != (16 & e->m_error_mask) || !ON_IsValidUnicodeCodePoint(e->m_error_code_point) )
00637 {
00638
00639 return 0;
00640 }
00641
00642
00643
00644 int i;
00645 for ( i = 1; i < sUTF16_count; i++ )
00646 {
00647 if ( ( sUTF16[i] < 0xD800 ) || ( sUTF16[i] >= 0xE000 ) )
00648 {
00649
00650 break;
00651 }
00652 if ( i+1 < sUTF16_count
00653 && sUTF16[i] >= 0xD800 && sUTF16[i] < 0xDC00
00654 && sUTF16[i+1] >= 0xDC00 && sUTF16[i+1] < 0xE000
00655 )
00656 {
00657
00658 break;
00659 }
00660 }
00661
00662 *unicode_code_point = e->m_error_code_point;
00663
00664 return i;
00665 }
00666
00667 int ON_DecodeSwapByteUTF16(
00668 const ON__UINT16* sUTF16,
00669 int sUTF16_count,
00670 struct ON_UnicodeErrorParameters* e,
00671 ON__UINT32* unicode_code_point
00672 )
00673 {
00674 int i;
00675 ON__UINT32 uhi, ulo;
00676 ON__UINT16 w0, w1;
00677 const ON__UINT8* p;
00678 ON__UINT8* p0;
00679 ON__UINT8* p1;
00680
00681
00682 if ( 0 == sUTF16 || sUTF16_count <= 0 || 0 == unicode_code_point )
00683 return 0;
00684
00685
00686
00687 p = (const ON__UINT8*)sUTF16;
00688 p0 = (ON__UINT8*)&w0;
00689 p0[1] = p[0];
00690 p0[0] = p[1];
00691 if ( ( w0 < 0xD800 ) || (w0 >= 0xE000 ) )
00692 {
00693 *unicode_code_point = w0;
00694 return 1;
00695 }
00696
00697 if ( sUTF16_count >= 2 && w0 < 0xDC00 )
00698 {
00699
00700 p1 = (ON__UINT8*)&w1;
00701 p1[1] = p[2];
00702 p1[0] = p[3];
00703 if ( w1 >= 0xDC00 && w1 < 0xE000 )
00704 {
00705
00706 uhi = w0;
00707 ulo = w1;
00708 *unicode_code_point = (uhi-0xD800)*0x400 + (ulo-0xDC00) + 0x10000;
00709 return 2;
00710 }
00711 }
00712
00713
00714 if ( 0 == e )
00715 {
00716
00717 return 0;
00718 }
00719
00720
00721 e->m_error_status |= 16;
00722
00723 if ( 16 != (16 & e->m_error_mask) || !ON_IsValidUnicodeCodePoint(e->m_error_code_point) )
00724 {
00725
00726 return 0;
00727 }
00728
00729
00730
00731 p1 = (ON__UINT8*)&w1;
00732 p += sizeof(sUTF16[0]);
00733 for ( i = 1; i < sUTF16_count; i++, p += sizeof(sUTF16[0]) )
00734 {
00735
00736 p0[1] = p[0];
00737 p0[0] = p[1];
00738 if ( ( w0 < 0xD800 ) || ( w0 >= 0xE000 ) )
00739 {
00740
00741 break;
00742 }
00743 if ( i+1 < sUTF16_count && w0 >= 0xD800 && w0 < 0xDC00 )
00744 {
00745
00746 p1[1] = p[sizeof(sUTF16[0])];
00747 p1[0] = p[sizeof(sUTF16[0])+1];
00748 if ( w1 >= 0xDC00 && w1 < 0xE000 )
00749 {
00750
00751 break;
00752 }
00753 }
00754 }
00755
00756 *unicode_code_point = e->m_error_code_point;
00757
00758 return i;
00759 }
00760
00761 int ON_ConvertUTF8ToUTF16(
00762 const ON__UINT8* sUTF8,
00763 int sUTF8_count,
00764 ON__UINT16* sUTF16,
00765 int sUTF16_count,
00766 unsigned int* error_status,
00767 unsigned int error_mask,
00768 ON__UINT32 error_code_point,
00769 const ON__UINT8** sNextUTF8
00770 )
00771 {
00772 int i, j, k, output_count;
00773 ON__UINT32 u;
00774 ON__UINT16 w[2];
00775 struct ON_UnicodeErrorParameters e;
00776
00777 if ( 0 != error_status )
00778 *error_status = 0;
00779
00780 if ( -1 == sUTF8_count && 0 != sUTF8 )
00781 {
00782 for ( sUTF8_count = 0; 0 != sUTF8[sUTF8_count]; sUTF8_count++)
00783 {
00784
00785 }
00786 }
00787
00788 if ( 0 == sUTF8 || sUTF8_count < 0 )
00789 {
00790 if ( 0 != error_status )
00791 *error_status |= 1;
00792 if ( sNextUTF8 )
00793 *sNextUTF8 = sUTF8;
00794 return 0;
00795 }
00796
00797 if ( 0 == sUTF16_count )
00798 {
00799 sUTF16 = 0;
00800 sUTF16_count = 2147483647;
00801 }
00802 else if ( 0 == sUTF16 )
00803 {
00804 if ( 0 != error_status )
00805 *error_status |= 1;
00806 if ( sNextUTF8 )
00807 *sNextUTF8 = sUTF8;
00808 return 0;
00809 }
00810
00811 e.m_error_status = 0;
00812 e.m_error_mask = error_mask;
00813 e.m_error_code_point = error_code_point;
00814
00815 output_count = 0;
00816
00817 for ( i = 0; i < sUTF8_count; i += j )
00818 {
00819 j = ON_DecodeUTF8(sUTF8+i,sUTF8_count-i,&e,&u);
00820 if ( j <= 0 )
00821 break;
00822 k = ON_EncodeUTF16(u,w);
00823 if ( 0 != sUTF16 )
00824 {
00825 if ( output_count + k > sUTF16_count )
00826 {
00827 e.m_error_status |= 2;
00828 break;
00829 }
00830 sUTF16[output_count] = w[0];
00831 if ( 2 == k )
00832 sUTF16[output_count+1] = w[1];
00833 }
00834 output_count += k;
00835 }
00836
00837 if ( 0 != sUTF16 && output_count < sUTF16_count)
00838 sUTF16[output_count] = 0;
00839 if ( sNextUTF8 )
00840 *sNextUTF8 = sUTF8+i;
00841 if ( error_status )
00842 *error_status = e.m_error_status;
00843
00844 return output_count;
00845 }
00846
00847 int ON_ConvertUTF8ToUTF32(
00848 const ON__UINT8* sUTF8,
00849 int sUTF8_count,
00850 ON__UINT32* sUTF32,
00851 int sUTF32_count,
00852 unsigned int* error_status,
00853 unsigned int error_mask,
00854 ON__UINT32 error_code_point,
00855 const ON__UINT8** sNextUTF8
00856 )
00857 {
00858 int i, j, output_count;
00859 ON__UINT32 u;
00860 struct ON_UnicodeErrorParameters e;
00861
00862 if ( 0 != error_status )
00863 *error_status = 0;
00864
00865 if ( -1 == sUTF8_count && 0 != sUTF8 )
00866 {
00867 for ( sUTF8_count = 0; 0 != sUTF8[sUTF8_count]; sUTF8_count++)
00868 {
00869
00870 }
00871 }
00872
00873 if ( 0 == sUTF8 || sUTF8_count < 0 )
00874 {
00875 if ( 0 != error_status )
00876 *error_status |= 1;
00877 if ( sNextUTF8 )
00878 *sNextUTF8 = sUTF8;
00879 return 0;
00880 }
00881
00882 if ( 0 == sUTF32_count )
00883 {
00884 sUTF32 = 0;
00885 sUTF32_count = 2147483647;
00886 }
00887 else if ( 0 == sUTF32 )
00888 {
00889 if ( 0 != error_status )
00890 *error_status |= 1;
00891 if ( sNextUTF8 )
00892 *sNextUTF8 = sUTF8;
00893 return 0;
00894 }
00895
00896 e.m_error_status = 0;
00897 e.m_error_mask = error_mask;
00898 e.m_error_code_point = error_code_point;
00899
00900 output_count = 0;
00901
00902 for ( i = 0; i < sUTF8_count; i += j )
00903 {
00904 j = ON_DecodeUTF8(sUTF8+i,sUTF8_count-i,&e,&u);
00905 if ( j <= 0 )
00906 break;
00907 if ( 0 != sUTF32 )
00908 {
00909 if ( output_count >= sUTF32_count )
00910 {
00911 e.m_error_status |= 2;
00912 break;
00913 }
00914 sUTF32[output_count] = u;
00915 }
00916 output_count++;
00917 }
00918
00919 if ( 0 != sUTF32 && output_count < sUTF32_count)
00920 sUTF32[output_count] = 0;
00921 if ( sNextUTF8 )
00922 *sNextUTF8 = sUTF8+i;
00923 if ( error_status )
00924 *error_status = e.m_error_status;
00925
00926 return output_count;
00927 }
00928
00929 int ON_ConvertUTF16ToUTF8(
00930 int bTestByteOrder,
00931 const ON__UINT16* sUTF16,
00932 int sUTF16_count,
00933 ON__UINT8* sUTF8,
00934 int sUTF8_count,
00935 unsigned int* error_status,
00936 unsigned int error_mask,
00937 ON__UINT32 error_code_point,
00938 const ON__UINT16** sNextUTF16
00939 )
00940 {
00941 int i, j, k, output_count, bSwapBytes;
00942 ON__UINT32 u;
00943 ON__UINT8 s[6];
00944 struct ON_UnicodeErrorParameters e;
00945
00946 if ( 0 != error_status )
00947 *error_status = 0;
00948
00949 if ( -1 == sUTF16_count && 0 != sUTF16 )
00950 {
00951 for ( sUTF16_count = 0; 0 != sUTF16[sUTF16_count]; sUTF16_count++)
00952 {
00953
00954 }
00955 }
00956
00957 if ( 0 == sUTF16 || sUTF16_count < 0 )
00958 {
00959 if ( 0 != error_status )
00960 *error_status |= 1;
00961 if ( sNextUTF16 )
00962 *sNextUTF16 = sUTF16;
00963 return 0;
00964 }
00965
00966 if ( 0 == sUTF8_count )
00967 {
00968 sUTF8 = 0;
00969 sUTF8_count = 2147483647;
00970 }
00971 else if ( 0 == sUTF8 )
00972 {
00973 if ( 0 != error_status )
00974 *error_status |= 1;
00975 if ( sNextUTF16 )
00976 *sNextUTF16 = sUTF16;
00977 return 0;
00978 }
00979
00980 bSwapBytes = false;
00981 if ( bTestByteOrder && sUTF16_count > 0 )
00982 {
00983 if ( 0xFEFF == sUTF16[0] )
00984 {
00985
00986 sUTF16_count--;
00987 sUTF16++;
00988 }
00989 else if ( 0xFFFE == sUTF16[0] )
00990 {
00991
00992 bSwapBytes = true;
00993 sUTF16_count--;
00994 sUTF16++;
00995 }
00996 }
00997
00998 e.m_error_status = 0;
00999 e.m_error_mask = error_mask;
01000 e.m_error_code_point = error_code_point;
01001
01002 output_count = 0;
01003
01004 if ( bSwapBytes )
01005 {
01006 for ( i = 0; i < sUTF16_count; i += j )
01007 {
01008 j = ON_DecodeSwapByteUTF16(sUTF16+i,sUTF16_count-i,&e,&u);
01009 if ( j <= 0 )
01010 break;
01011 k = ON_EncodeUTF8(u,s);
01012 if ( 0 != sUTF8 )
01013 {
01014 if ( output_count + k > sUTF8_count )
01015 {
01016 e.m_error_status |= 2;
01017 break;
01018 }
01019 memcpy(sUTF8+output_count,s,k*sizeof(sUTF8[0]));
01020 }
01021 output_count += k;
01022 }
01023 }
01024 else
01025 {
01026 for ( i = 0; i < sUTF16_count; i += j )
01027 {
01028 j = ON_DecodeUTF16(sUTF16+i,sUTF16_count-i,&e,&u);
01029 if ( j <= 0 )
01030 break;
01031 k = ON_EncodeUTF8(u,s);
01032 if ( 0 != sUTF8 )
01033 {
01034 if ( output_count + k > sUTF8_count )
01035 {
01036 e.m_error_status |= 2;
01037 break;
01038 }
01039 memcpy(sUTF8+output_count,s,k*sizeof(sUTF8[0]));
01040 }
01041 output_count += k;
01042 }
01043 }
01044 if ( 0 != sUTF8 && output_count < sUTF8_count)
01045 sUTF8[output_count] = 0;
01046 if ( sNextUTF16 )
01047 *sNextUTF16 = sUTF16+i;
01048 if ( error_status )
01049 *error_status = e.m_error_status;
01050
01051 return output_count;
01052 }
01053
01054 int ON_ConvertUTF16ToUTF32(
01055 int bTestByteOrder,
01056 const ON__UINT16* sUTF16,
01057 int sUTF16_count,
01058 unsigned int* sUTF32,
01059 int sUTF32_count,
01060 unsigned int* error_status,
01061 unsigned int error_mask,
01062 ON__UINT32 error_code_point,
01063 const ON__UINT16** sNextUTF16
01064 )
01065 {
01066 int i, j, output_count, bSwapBytes;
01067 ON__UINT32 u;
01068 struct ON_UnicodeErrorParameters e;
01069
01070 if ( 0 != error_status )
01071 *error_status = 0;
01072
01073 if ( -1 == sUTF16_count && 0 != sUTF16 )
01074 {
01075 for ( sUTF16_count = 0; 0 != sUTF16[sUTF16_count]; sUTF16_count++)
01076 {
01077
01078 }
01079 }
01080
01081 if ( 0 == sUTF16 || sUTF16_count < 0 )
01082 {
01083 if ( 0 != error_status )
01084 *error_status |= 1;
01085 if ( sNextUTF16 )
01086 *sNextUTF16 = sUTF16;
01087 return 0;
01088 }
01089
01090 if ( 0 == sUTF32_count )
01091 {
01092 sUTF32 = 0;
01093 sUTF32_count = 2147483647;
01094 }
01095 else if ( 0 == sUTF32 )
01096 {
01097 if ( 0 != error_status )
01098 *error_status |= 1;
01099 if ( sNextUTF16 )
01100 *sNextUTF16 = sUTF16;
01101 return 0;
01102 }
01103
01104 bSwapBytes = false;
01105 if ( bTestByteOrder && sUTF16_count > 0 )
01106 {
01107 if ( 0xFEFF == sUTF16[0] )
01108 {
01109
01110 sUTF16_count--;
01111 sUTF16++;
01112 }
01113 else if ( 0xFFFE == sUTF16[0] )
01114 {
01115
01116 bSwapBytes = true;
01117 sUTF16_count--;
01118 sUTF16++;
01119 }
01120 }
01121
01122 e.m_error_status = 0;
01123 e.m_error_mask = error_mask;
01124 e.m_error_code_point = error_code_point;
01125
01126 output_count = 0;
01127
01128 if ( bSwapBytes )
01129 {
01130 for ( i = 0; i < sUTF16_count; i += j )
01131 {
01132 j = ON_DecodeSwapByteUTF16(sUTF16+i,sUTF16_count-i,&e,&u);
01133 if ( j <= 0 )
01134 break;
01135 if ( 0 != sUTF32 )
01136 {
01137 if ( output_count >= sUTF32_count )
01138 {
01139 e.m_error_status |= 2;
01140 break;
01141 }
01142 sUTF32[output_count] = u;
01143 }
01144 output_count++;
01145 }
01146 }
01147 else
01148 {
01149 for ( i = 0; i < sUTF16_count; i += j )
01150 {
01151 j = ON_DecodeUTF16(sUTF16+i,sUTF16_count-i,&e,&u);
01152 if ( j <= 0 )
01153 break;
01154 if ( 0 != sUTF32 )
01155 {
01156 if ( output_count >= sUTF32_count )
01157 {
01158 e.m_error_status |= 2;
01159 break;
01160 }
01161 sUTF32[output_count] = u;
01162 }
01163 output_count++;
01164 }
01165 }
01166
01167 if ( 0 != sUTF32 && output_count < sUTF32_count)
01168 sUTF32[output_count] = 0;
01169 if ( sNextUTF16 )
01170 *sNextUTF16 = sUTF16+i;
01171 if ( error_status )
01172 *error_status = e.m_error_status;
01173
01174 return output_count;
01175 }
01176
01177 static ON__UINT32 SwapBytes32(ON__UINT32 u)
01178 {
01179 ON__UINT8 b;
01180 ON__UINT8* p = (ON__UINT8*)&u;
01181 b = p[0]; p[0] = p[3]; p[3] = b;
01182 b = p[1]; p[1] = p[2]; p[2] = b;
01183 return u;
01184 }
01185
01186 int ON_ConvertUTF32ToUTF8(
01187 int bTestByteOrder,
01188 const ON__UINT32* sUTF32,
01189 int sUTF32_count,
01190 ON__UINT8* sUTF8,
01191 int sUTF8_count,
01192 unsigned int* error_status,
01193 unsigned int error_mask,
01194 ON__UINT32 error_code_point,
01195 const ON__UINT32** sNextUTF32
01196 )
01197 {
01198 int i, k, output_count, bSwapBytes;
01199 ON__UINT32 u;
01200 ON__UINT8 s[6];
01201 struct ON_UnicodeErrorParameters e;
01202
01203 if ( 0 != error_status )
01204 *error_status = 0;
01205
01206 if ( -1 == sUTF32_count && 0 != sUTF32 )
01207 {
01208 for ( sUTF32_count = 0; 0 != sUTF32[sUTF32_count]; sUTF32_count++)
01209 {
01210
01211 }
01212 }
01213
01214 if ( 0 == sUTF32 || sUTF32_count < 0 )
01215 {
01216 if ( 0 != error_status )
01217 *error_status |= 1;
01218 if ( sNextUTF32 )
01219 *sNextUTF32 = sUTF32;
01220 return 0;
01221 }
01222
01223 if ( 0 == sUTF8_count )
01224 {
01225 sUTF8 = 0;
01226 sUTF8_count = 2147483647;
01227 }
01228 else if ( 0 == sUTF8 )
01229 {
01230 if ( 0 != error_status )
01231 *error_status |= 1;
01232 if ( sNextUTF32 )
01233 *sNextUTF32 = sUTF32;
01234 return 0;
01235 }
01236
01237 bSwapBytes = false;
01238 if ( bTestByteOrder && sUTF32_count > 0 )
01239 {
01240 if ( 0x0000FEFF == sUTF32[0] )
01241 {
01242
01243 sUTF32_count--;
01244 sUTF32++;
01245 }
01246 else if ( 0xFFFE0000 == sUTF32[0] )
01247 {
01248
01249 bSwapBytes = true;
01250 sUTF32_count--;
01251 sUTF32++;
01252 }
01253 }
01254
01255 e.m_error_status = 0;
01256 e.m_error_mask = error_mask;
01257 e.m_error_code_point = error_code_point;
01258
01259 output_count = 0;
01260
01261 for ( i = 0; i < sUTF32_count; i++ )
01262 {
01263 u = bSwapBytes ? SwapBytes32(sUTF32[i]) : sUTF32[i];
01264 if ( !ON_IsValidUnicodeCodePoint(u) )
01265 {
01266 e.m_error_status |= 16;
01267 if ( 16 != (16 & e.m_error_mask) )
01268 break;
01269 if ( !ON_IsValidUnicodeCodePoint(e.m_error_code_point) )
01270 break;
01271 u = e.m_error_code_point;
01272 }
01273 k = ON_EncodeUTF8(u,s);
01274 if ( 0 != sUTF8 )
01275 {
01276 if ( output_count + k > sUTF8_count )
01277 {
01278 e.m_error_status |= 2;
01279 break;
01280 }
01281 memcpy(sUTF8+output_count,s,k*sizeof(sUTF8[0]));
01282 }
01283 output_count += k;
01284 }
01285
01286 if ( 0 != sUTF8 && output_count < sUTF8_count)
01287 sUTF8[output_count] = 0;
01288 if ( sNextUTF32 )
01289 *sNextUTF32 = sUTF32+i;
01290 if ( error_status )
01291 *error_status = e.m_error_status;
01292
01293 return output_count;
01294 }
01295
01296 int ON_ConvertUTF32ToUTF16(
01297 int bTestByteOrder,
01298 const ON__UINT32* sUTF32,
01299 int sUTF32_count,
01300 ON__UINT16* sUTF16,
01301 int sUTF16_count,
01302 unsigned int* error_status,
01303 unsigned int error_mask,
01304 ON__UINT32 error_code_point,
01305 const ON__UINT32** sNextUTF32
01306 )
01307 {
01308 int i, k, output_count, bSwapBytes;
01309 ON__UINT32 u;
01310 ON__UINT16 w[2];
01311 struct ON_UnicodeErrorParameters e;
01312
01313 if ( 0 != error_status )
01314 *error_status = 0;
01315
01316 if ( -1 == sUTF32_count && 0 != sUTF32 )
01317 {
01318 for ( sUTF32_count = 0; 0 != sUTF32[sUTF32_count]; sUTF32_count++)
01319 {
01320
01321 }
01322 }
01323
01324 if ( 0 == sUTF32 || sUTF32_count < 0 )
01325 {
01326 if ( 0 != error_status )
01327 *error_status |= 1;
01328 if ( sNextUTF32 )
01329 *sNextUTF32 = sUTF32;
01330 return 0;
01331 }
01332
01333 if ( 0 == sUTF16_count )
01334 {
01335 sUTF16 = 0;
01336 sUTF16_count = 2147483647;
01337 }
01338 else if ( 0 == sUTF16 )
01339 {
01340 if ( 0 != error_status )
01341 *error_status |= 1;
01342 if ( sNextUTF32 )
01343 *sNextUTF32 = sUTF32;
01344 return 0;
01345 }
01346
01347 bSwapBytes = false;
01348 if ( bTestByteOrder && sUTF32_count > 0 )
01349 {
01350 if ( 0x0000FEFF == sUTF32[0] )
01351 {
01352
01353 sUTF32_count--;
01354 sUTF32++;
01355 }
01356 else if ( 0xFFFE0000 == sUTF32[0] )
01357 {
01358
01359 bSwapBytes = true;
01360 sUTF32_count--;
01361 sUTF32++;
01362 }
01363 }
01364
01365 e.m_error_status = 0;
01366 e.m_error_mask = error_mask;
01367 e.m_error_code_point = error_code_point;
01368
01369 output_count = 0;
01370
01371 for ( i = 0; i < sUTF32_count; i++ )
01372 {
01373 u = bSwapBytes ? SwapBytes32(sUTF32[i]) : sUTF32[i];
01374 if ( !ON_IsValidUnicodeCodePoint(u) )
01375 {
01376 e.m_error_status |= 16;
01377 if ( 16 != (16 & e.m_error_mask) )
01378 break;
01379 if ( !ON_IsValidUnicodeCodePoint(e.m_error_code_point) )
01380 break;
01381 u = e.m_error_code_point;
01382 }
01383 k = ON_EncodeUTF16(u,w);
01384 if ( 0 != sUTF16 )
01385 {
01386 if ( output_count + k > sUTF16_count )
01387 {
01388 e.m_error_status |= 2;
01389 break;
01390 }
01391 sUTF16[output_count] = w[0];
01392 if ( 2 == k )
01393 sUTF16[output_count+1] = w[1];
01394 }
01395 output_count += k;
01396 }
01397
01398 if ( 0 != sUTF16 && output_count < sUTF16_count)
01399 sUTF16[output_count] = 0;
01400 if ( sNextUTF32 )
01401 *sNextUTF32 = sUTF32+i;
01402 if ( error_status )
01403 *error_status = e.m_error_status;
01404
01405 return output_count;
01406 }
01407
01408 ON_DECL
01409 int ON_ConvertWideCharToUTF8(
01410 int bTestByteOrder,
01411 const wchar_t* sWideChar,
01412 int sWideChar_count,
01413 char* sUTF8,
01414 int sUTF8_count,
01415 unsigned int* error_status,
01416 unsigned int error_mask,
01417 ON__UINT32 error_code_point,
01418 const wchar_t** sNextWideChar
01419 )
01420 {
01421 int rc;
01422
01423 switch(sizeof(sWideChar[0]))
01424 {
01425 case sizeof(ON__UINT16):
01426
01427 rc = ON_ConvertUTF16ToUTF8(
01428 bTestByteOrder,
01429 (const ON__UINT16*)sWideChar,sWideChar_count,
01430 (ON__UINT8*)sUTF8,sUTF8_count,
01431 error_status,error_mask,error_code_point,
01432 (const ON__UINT16**)sNextWideChar
01433 );
01434 break;
01435
01436 case sizeof(ON__UINT32):
01437
01438 rc = ON_ConvertUTF32ToUTF8(
01439 bTestByteOrder,
01440 (const ON__UINT32*)sWideChar,sWideChar_count,
01441 (ON__UINT8*)sUTF8,sUTF8_count,
01442 error_status,error_mask,error_code_point,
01443 (const ON__UINT32**)sNextWideChar
01444 );
01445 break;
01446
01447 default:
01448 rc = 0;
01449 }
01450
01451 return rc;
01452 }
01453
01454 ON_DECL
01455 int ON_ConvertUTF8ToWideChar(
01456 const char* sUTF8,
01457 int sUTF8_count,
01458 wchar_t* sWideChar,
01459 int sWideChar_count,
01460 unsigned int* error_status,
01461 unsigned int error_mask,
01462 ON__UINT32 error_code_point,
01463 const char** sNextUTF8
01464 )
01465 {
01466 int rc;
01467
01468 switch(sizeof(sWideChar[0]))
01469 {
01470 case sizeof(ON__UINT16):
01471
01472 rc = ON_ConvertUTF8ToUTF16(
01473 (const ON__UINT8*)sUTF8,sUTF8_count,
01474 (ON__UINT16*)sWideChar,sWideChar_count,
01475 error_status,error_mask,error_code_point,
01476 (const ON__UINT8**)sNextUTF8
01477 );
01478 break;
01479
01480 case sizeof(ON__UINT32):
01481
01482 rc = ON_ConvertUTF8ToUTF32(
01483 (const ON__UINT8*)sUTF8,sUTF8_count,
01484 (ON__UINT32*)sWideChar,sWideChar_count,
01485 error_status,error_mask,error_code_point,
01486 (const ON__UINT8**)sNextUTF8
01487 );
01488 break;
01489
01490 default:
01491 rc = 0;
01492 }
01493
01494 return rc;
01495 }