00001 /* $NoKeywords: $ */ 00002 /* 00003 // 00004 // Copyright (c) 1993-2012 Robert McNeel & Associates. All rights reserved. 00005 // OpenNURBS, Rhinoceros, and Rhino3D are registered trademarks of Robert 00006 // McNeel & Associates. 00007 // 00008 // THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY. 00009 // ALL IMPLIED WARRANTIES OF FITNESS FOR ANY PARTICULAR PURPOSE AND OF 00010 // MERCHANTABILITY ARE HEREBY DISCLAIMED. 00011 // 00012 // For complete openNURBS copyright information see <http://www.opennurbs.org>. 00013 // 00015 */ 00016 00017 #if !defined(OPENNURBS_UNICODE_INC_) 00018 #define OPENNURBS_UNICODE_INC_ 00019 00020 ON_BEGIN_EXTERNC 00021 00022 struct ON_UnicodeErrorParameters 00023 { 00024 /* 00025 If an error occurs, then bits of error_status are 00026 set to indicate what type of error occured. 00027 00028 Error types: 00029 1: The input parameters were invalid. 00030 This error cannot be masked. 00031 00032 2: The ouput buffer was not large enough to hold the converted 00033 string. As much conversion as possible is performed in this 00034 case and the error cannot be masked. 00035 00036 4: When parsing a UTF-8 or UTF-32 string, the values of two 00037 consecutive encoding sequences formed a valid UTF-16 00038 surrogate pair. 00039 00040 This error is masked if 0 != (4 & m_error_mask). 00041 If the error is masked, then the surrogate pair is 00042 decoded, the value of the resulting unicode code point 00043 is used, and parsing continues. 00044 00045 8: An overlong UTF-8 encoding sequence was encountered and 00046 the value of the overlong sUTF-8 equence was a valid unicode 00047 code point. 00048 00049 This error is masked if 0 != (8 & m_error_mask). 00050 If the error is masked, then the unicode code point is 00051 used and parsing continues. 00052 00053 16: An illegal UTF-8 encoding sequence occured or an invalid 00054 unicode code point value resulted from decoding a 00055 UTF-8 sequence. 00056 00057 This error is masked if 0 != (16 & m_error_mask). 00058 If the error is masked and the value of m_error_code_point is 00059 a valid unicode code point, then m_error_code_point is used 00060 and parsing continues. 00061 */ 00062 unsigned int m_error_status; 00063 00064 /* 00065 If 0 != (error_mask & 4), then type 4 errors are masked. 00066 If 0 != (error_mask & 8), then type 8 errors are masked. 00067 If 0 != (error_mask & 16) and m_error_code_point is a valid unicode 00068 code point value, then type 16 errors are masked. 00069 */ 00070 unsigned int m_error_mask; 00071 00072 /* 00073 Unicode code point value to use in when masking type 16 errors. 00074 If 0 == (error_mask & 16), then this parameter is ignored. 00075 0xFFFD is a popular choice for the m_error_code_point value. 00076 */ 00077 ON__UINT32 m_error_code_point; 00078 }; 00079 00080 00081 /* 00082 Description: 00083 Test a value to determine if it is a valid unicode code point value. 00084 Parameters: 00085 u - [in] value to test 00086 Returns: 00087 true: u is a valid unicode code point 00088 false: u is not a valid unicode code point 00089 Remarks: 00090 Valid unicode code points are 00091 (0 <= u && u <= 0xD7FF) || (0xE000 <= u && u <= 0x10FFFF) 00092 */ 00093 ON_DECL 00094 int ON_IsValidUnicodeCodePoint( ON__UINT32 u ); 00095 00096 /* 00097 Description: 00098 Convert an integer to its UTF-8 form. 00099 Parameters: 00100 u - [in] 00101 Interger in the CPU's native byte order that can be 00102 converted to UTF-8 form. 00103 Valid values are in the interval [0,2147483647]. 00104 sUTF8 - [out] 00105 sUTF8 is a buffer of 6 ON__UINT8 elements and the UTF-8 form 00106 is returned in sUTF8[]. The returned value specifies how 00107 many elements of sUTF8[] are set. 00108 Returns: 00109 0: u is too large (>=2^31) to be encode as a UTF-8 string. 00110 No changes are made to the sUTF8[] values. 00111 1: the UTF-8 form of u is 1 byte returned in sUTF8[0]. 00112 2: the UTF-8 form of u is 2 byts returned in sUTF8[0],sUTF8[1]. 00113 3: the UTF-8 form of u is 3 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2]. 00114 4: the UTF-8 form of u is 4 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3]. 00115 5: the UTF-8 form of u is 5 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3],sUTF8[4]. 00116 6: the UTF-8 form of u is 6 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3],sUTF8[4],sUTF8[5]. 00117 For return values requiring less than 6 bytes, no changes 00118 are made to the unused bytes in sUTF8[]. 00119 Remarks: 00120 Any integer in the range 0 to 2^31 - 1 can be encoded as a UTF-8 string. 00121 When a unicode string is being encoded take steps to ensure that 00122 u is a valid unicode code point value. The function ON_IsValidUnicodeCodePoint() 00123 can be used to determine if u is a valid unicode code point value. 00124 */ 00125 ON_DECL 00126 int ON_EncodeUTF8( ON__UINT32 u, ON__UINT8 sUTF8[6] ); 00127 00128 /* 00129 Description: 00130 Decode a UTF-8 encode string to get a single unicode code point. 00131 Parameters: 00132 sUTF8 - [in] 00133 UTF-8 string to convert. 00134 00135 sUTF8_count - [in] 00136 number of ON__UINT8 elements in sUTF8[]. 00137 00138 e - [in/out] 00139 If e is null, errors are not masked and parsing is performed 00140 to the point where the first error occurs. 00141 If e is not null, all errors are reported by setting the appropriate 00142 e->m_error_status bits and errors are handled as described in the 00143 definition of the ON_UnicodeErrorParameters struct. 00144 00145 unicode_code_point - [out] 00146 The unicode_code_point pointer must not be null. 00147 If a nonzero value is returned, then *unicode_code_point is 00148 a valid unicode code point value. 00149 Returns: 00150 Number of elements of sUTF8 that were parsed. 00151 0 indicates failure. 00152 */ 00153 ON_DECL 00154 int ON_DecodeUTF8( 00155 const ON__UINT8* sUTF8, 00156 int sUTF8_count, 00157 struct ON_UnicodeErrorParameters* e, 00158 ON__UINT32* unicode_code_point 00159 ); 00160 00161 /* 00162 Description: 00163 Convert a 4 byte unicode code point value to its UTF-16 form. 00164 Parameters: 00165 unicode_code_point - [in] 00166 4 byte unicode code point value in the CPU's native byte order. 00167 Valid values are in the interval [0,0xD7FF] or the 00168 interval [0xE000,0x10FFFF]. 00169 sUTF16 - [out] 00170 sUTF16 is buffer of 2 ON__UINT16 elements. If the UTF-16 form 00171 is a single value, it is returned in sUTF16[0]. If the UTF-16 00172 is a surrogate pair, the first code unit (high surrogate) 00173 is returned sUTF16[0] and the second unit (low surrogate) is 00174 returned in sUTF16[1]. The returned values are in 00175 the CPU's native byte order. 00176 Returns: 00177 0: u is not a valid Unicode code point. No changes are 00178 made to the w[] values. 00179 1: u is a valie Unicode code point with a UTF-16 form 00180 consisting of the single value returned in w[0]. 00181 2: u is a valid Unicode code point with a UTF-16 form 00182 consisting of a surrogate pair returned in w[0] and w[1]. 00183 */ 00184 ON_DECL 00185 int ON_EncodeUTF16( ON__UINT32 unicode_code_point, ON__UINT16 sUTF16[2] ); 00186 00187 /* 00188 Description: 00189 Decode a UTF-16 string to get a single unicode code point. 00190 Parameters: 00191 sUTF16 - [in] 00192 UTF-16 string to convert. 00193 00194 sUTF16_count - [in] 00195 number of ON__UINT16 elements in sUTF16[]. 00196 00197 e - [in/out] 00198 If e is null, errors are not masked and parsing is performed 00199 to the point where the first error occurs. 00200 If e is not null, all errors are reported by setting the appropriate 00201 e->m_error_status bits and errors are handled as described in the 00202 definition of the ON_UnicodeErrorParameters struct. 00203 00204 unicode_code_point - [out] 00205 The unicode_code_point pointer must not be null. 00206 If a nonzero value is returned, then *unicode_code_point is 00207 a valid unicode code point value in the CPU's native byte order. 00208 Returns: 00209 Number of elements of sUTF16 that were parsed. 00210 0 indicates failure. 00211 */ 00212 ON_DECL 00213 int ON_DecodeUTF16( 00214 const ON__UINT16* sUTF16, 00215 int sUTF16_count, 00216 struct ON_UnicodeErrorParameters* e, 00217 ON__UINT32* unicode_code_point 00218 ); 00219 00220 /* 00221 Description: 00222 Decode a UTF-16 encode string whose elements have byte order 00223 opposite the native CPU's to get a single unicode code point. 00224 Parameters: 00225 sUTF16 - [in] 00226 UTF-16 string to convert with byte order opposite the 00227 CPU's native byte order. 00228 00229 sUTF16_count - [in] 00230 number of ON__UINT16 elements in sUTF16[]. 00231 00232 e - [in/out] 00233 If e is null, errors are not masked and parsing is performed 00234 to the point where the first error occurs. 00235 If e is not null, all errors are reported by setting the appropriate 00236 e->m_error_status bits and errors are handled as described in the 00237 definition of the ON_UnicodeErrorParameters struct. 00238 00239 unicode_code_point - [out] 00240 The unicode_code_point pointer must not be null. 00241 If a nonzero value is returned, then *unicode_code_point is 00242 a valid unicode code point value in the CPU's native byte order. 00243 Returns: 00244 Number of elements of sUTF16 that were parsed. 00245 0 indicates failure. 00246 */ 00247 ON_DECL 00248 int ON_DecodeSwapByteUTF16( 00249 const ON__UINT16* sUTF16, 00250 int sUTF16_count, 00251 struct ON_UnicodeErrorParameters* e, 00252 ON__UINT32* unicode_code_point 00253 ); 00254 00255 /* 00256 Description: 00257 Convert a unicode string from a UTF-8 encoded ON__UINT8 array 00258 into a UTF-16 encoded ON__UINT16 array. 00259 00260 Parameters: 00261 sUTF8 - [in] 00262 UTF-8 string to convert. 00263 00264 sUTF8_count - [in] 00265 If sUTF8_count >= 0, then it specifies the number of 00266 ON__UINT8 elements in sUTF8[] to convert. 00267 00268 If sUTF8_count == -1, then sUTF8 must be a null terminated 00269 string and all the elements up to the first null element are 00270 converted. 00271 00272 sUTF16 - [out] 00273 If sUTF16 is not null and sUTF16_count > 0, then the UTF-16 00274 encoded string is returned in this buffer. If there is room 00275 for the null terminator, the converted string will be null 00276 terminated. The null terminator is never included in the count 00277 of returned by this function. The converted string is in the 00278 CPU's native byte order. No byte order mark is prepended. 00279 00280 sUTF16_count - [in] 00281 If sUTF16_count > 0, then it specifies the number of available 00282 ON__UINT16 elements in the sUTF16[] buffer. 00283 00284 If sUTF16_count == 0, then the sUTF16 parameter is ignored. 00285 00286 error_status - [out] 00287 If error_status is not null, then bits of *error_status are 00288 set to indicate the success or failure of the conversion. 00289 When the error_mask parameter is used to used to mask some 00290 conversion errors, multiple bits may be set. 00291 0: Successful conversion with no errors. 00292 1: Invalid input parameters. This error cannot be masked. 00293 2: The sUTF16 output buffer was not large enough to hold 00294 the converted string. This error cannot be masked. 00295 4: The values of two UTF-8 encoding sequences formed a valid 00296 UTF-16 surrogate pair. This error can be masked. If the 00297 error is masked, then the surrogate pair is added 00298 to the UTF-16 output string and parsing continues. 00299 8: An overlong UTF-8 encoding sequence was encountered. 00300 The value of the overlong sequence was a valid unicode 00301 code point. This error can be masked. If the error is masked, 00302 then the unicode code point is encoded and added to the 00303 UTF-16 output string and parsing continues. 00304 16: An illegal UTF-8 encoding sequence occured or an invalid 00305 unicode code point value resulted from decoding a 00306 UTF-8 sequence. This error can be masked. If the error is 00307 masked and error_code_point is a valid unicode code point, 00308 then its UTF-16 encoding is added to the UTF-16 output 00309 string and parsing continues. 00310 00311 error_mask - [in] 00312 If 0 != (error_mask & 4), then type 4 errors are masked. 00313 If 0 != (error_mask & 8), then type 8 errors are masked. 00314 If 0 != (error_mask & 16) and error_code_point is a valid unicode 00315 code point value, then type 16 errors are masked. 00316 00317 error_code_point - [in] 00318 Unicode code point value to use in when masking type 16 errors. 00319 If 0 == (error_mask & 16), then this parameter is ignored. 00320 0xFFFD is a popular choice for the error_code_point value. 00321 00322 sNextUTF8 - [out] 00323 If sNextUTF8 is not null, then *sNextUTF8 points to the first 00324 element in the input sUTF8[] buffer that was not converted. 00325 00326 If an error occurs and is not masked, then *sNextUTF8 points to 00327 the element of sUTF8[] where the conversion failed. If no errors 00328 occur or all errors are masked, then *sNextUTF8 points to 00329 sUTF8 + sUTF8_count. 00330 00331 Returns: 00332 If sUTF16_count > 0, the return value is the number of ON__UINT16 00333 elements written to sUTF16[]. When the return value < sUTF16_count, 00334 a null terminator is written to sUTF16[return value]. 00335 00336 If sUTF16_count == 0, the return value is the minimum number of 00337 ON__UINT16 elements that are needed to hold the converted string. 00338 The return value does not include room for a null terminator. 00339 Increment the return value by one if you want to have an element 00340 to use for a null terminator. 00341 */ 00342 ON_DECL 00343 int ON_ConvertUTF8ToUTF16( 00344 const ON__UINT8* sUTF8, 00345 int sUTF8_count, 00346 ON__UINT16* sUTF16, 00347 int sUTF16_count, 00348 unsigned int* error_status, 00349 unsigned int error_mask, 00350 ON__UINT32 error_code_point, 00351 const ON__UINT8** sNextUTF8 00352 ); 00353 00354 /* 00355 Description: 00356 Convert a unicode string from a UTF-8 encoded ON__UINT8 array 00357 into a UTF-32 encoded ON__UINT32 array. 00358 00359 Parameters: 00360 sUTF8 - [in] 00361 UTF-8 string to convert. 00362 00363 sUTF8_count - [in] 00364 If sUTF8_count >= 0, then it specifies the number of 00365 ON__UINT8 elements in sUTF8[] to convert. 00366 00367 If sUTF8_count == -1, then sUTF8 must be a null terminated 00368 string and all the elements up to the first null element are 00369 converted. 00370 00371 sUTF32 - [out] 00372 If sUTF32 is not null and sUTF32_count > 0, then the UTF-32 00373 encoded string is returned in this buffer. If there is room 00374 for the null terminator, the converted string will be null 00375 terminated. The null terminator is never included in the count 00376 of returned by this function. The converted string is in the 00377 CPU's native byte order. No byte order mark is prepended. 00378 00379 sUTF32_count - [in] 00380 If sUTF32_count > 0, then it specifies the number of available 00381 ON__UINT32 elements in the sUTF32[] buffer. 00382 00383 If sUTF32_count == 0, then the sUTF32 parameter is ignored. 00384 00385 error_status - [out] 00386 If error_status is not null, then bits of *error_status are 00387 set to indicate the success or failure of the conversion. 00388 When the error_mask parameter is used to used to mask some 00389 conversion errors, multiple bits may be set. 00390 0: Successful conversion with no errors. 00391 1: Invalid input parameters. This error cannot be masked. 00392 2: The sUTF32 output buffer was not large enough to hold 00393 the converted string. This error cannot be masked. 00394 4: The values of two UTF-8 encoding sequences formed a valid 00395 UTF-16 surrogate pair. This error can be masked. If the 00396 error is masked, then the surrogate pair is decoded, 00397 the code point value is added to the UTF-32 output 00398 string and parsing continues. 00399 8: An overlong UTF-8 encoding sequence was encountered. 00400 The value of the overlong sequence was a valid unicode 00401 code point. This error can be masked. If the error is masked, 00402 then the unicode code point is added to the UTF-32 00403 output string and parsing continues. 00404 16: An illegal UTF-8 encoding sequence occured or an invalid 00405 unicode code point value resulted from decoding a 00406 UTF-8 sequence. This error can be masked. If the error is 00407 masked and error_code_point is a valid unicode code point, 00408 then its value is added to the UTF-32 output string and 00409 parsing continues. 00410 00411 error_mask - [in] 00412 If 0 != (error_mask & 4), then type 4 errors are masked. 00413 If 0 != (error_mask & 8), then type 8 errors are masked. 00414 If 0 != (error_mask & 16) and error_code_point is a valid unicode 00415 code point value, then type 16 errors are masked. 00416 00417 error_code_point - [in] 00418 Unicode code point value to use in when masking type 16 errors. 00419 If 0 == (error_mask & 16), then this parameter is ignored. 00420 0xFFFD is a popular choice for the error_code_point value. 00421 00422 sNextUTF8 - [out] 00423 If sNextUTF8 is not null, then *sNextUTF8 points to the first 00424 element in the input sUTF8[] buffer that was not converted. 00425 00426 If an error occurs and is not masked, then *sNextUTF8 points to 00427 the element of sUTF8[] where the conversion failed. If no errors 00428 occur or all errors are masked, then *sNextUTF8 points to 00429 sUTF8 + sUTF8_count. 00430 00431 Returns: 00432 If sUTF32_count > 0, the return value is the number of ON__UINT32 00433 elements written to sUTF32[]. When the return value < sUTF32_count, 00434 a null terminator is written to sUTF32[return value]. 00435 00436 If sUTF32_count == 0, the return value is the minimum number of 00437 ON__UINT32 elements that are needed to hold the converted string. 00438 The return value does not include room for a null terminator. 00439 Increment the return value by one if you want to have an element 00440 to use for a null terminator. 00441 */ 00442 ON_DECL 00443 int ON_ConvertUTF8ToUTF32( 00444 const ON__UINT8* sUTF8, 00445 int sUTF8_count, 00446 ON__UINT32* sUTF32, 00447 int sUTF32_count, 00448 unsigned int* error_status, 00449 unsigned int error_mask, 00450 ON__UINT32 error_code_point, 00451 const ON__UINT8** sNextUTF8 00452 ); 00453 00454 /* 00455 Description: 00456 Convert a unicode string from a UTF-16 encoded ON__UINT16 array 00457 into a UTF-8 encoded ON__UINT8 array. 00458 00459 Parameters: 00460 bTestByteOrder - [in] 00461 If bTestByteOrder is true and the first element of sUTF16[] 00462 is 0xFEFF, then this element is ignored. 00463 00464 If bTestByteOrder is true and the first element of sUTF16[] 00465 is 0xFFFE, then this element is ignored and the subsequent 00466 elements of sUTF16[] have their bytes swapped before the 00467 conversion is calculated. 00468 00469 In all other cases the first element of sUTF16[] is 00470 converted and no byte swapping is performed. 00471 00472 sUTF16 - [in] 00473 UTF-16 string to convert. 00474 00475 If bTestByteOrder is true and the first element of sUTF16[] 00476 is 0xFEFF, then this element is skipped and it is assumed 00477 that sUTF16[] is in the CPU's native byte order. 00478 00479 If bTestByteOrder is true and the first element of sUTF16[] 00480 is 0xFFFE, then this element is skipped and it is assumed 00481 that sUTF16[] is not in the CPU's native byte order and bytes 00482 are swapped before characters are converted. 00483 00484 If bTestByteOrder is false or the first character of sUTF16[] 00485 is neither 0xFEFF nor 0xFFFE, then the sUTF16 string must match 00486 the CPU's byte order. 00487 00488 sUTF16_count - [in] 00489 If sUTF16_count >= 0, then it specifies the number of 00490 ON__UINT16 elements in sUTF16[] to convert. 00491 00492 If sUTF16_count == -1, then sUTF16 must be a null terminated 00493 string and all the elements up to the first null element are 00494 converted. 00495 00496 sUTF8 - [out] 00497 If sUTF8 is not null and sUTF8_count > 0, then the UTF-8 00498 encoded string is returned in this buffer. If there is room 00499 for the null terminator, the converted string will be null 00500 terminated. The null terminator is never included in the count 00501 of returned by this function. The converted string is in the 00502 CPU's native byte order. No byte order mark is prepended. 00503 00504 sUTF8_count - [in] 00505 If sUTF8_count > 0, then it specifies the number of available 00506 ON__UINT8 elements in the sUTF8[] buffer. 00507 00508 If sUTF8_count == 0, then the sUTF8 parameter is ignored. 00509 00510 error_status - [out] 00511 If error_status is not null, then bits of *error_status are 00512 set to indicate the success or failure of the conversion. 00513 When the error_mask parameter is used to used to mask some 00514 conversion errors, multiple bits may be set. 00515 0: Successful conversion with no errors. 00516 1: Invalid input parameters. This error cannot be masked. 00517 2: The sUTF8 output buffer was not large enough to hold 00518 the converted string. This error cannot be masked. 00519 16: An illegal UTF-16 encoding sequence occured or an invalid 00520 unicode code point value resulted from decoding a 00521 UTF-16 sequence. This error can be masked. If the error is 00522 masked and error_code_point is a valid unicode code point, 00523 then its UTF-8 encoding is added to the UTF-8 output 00524 string and parsing continues. 00525 00526 error_mask - [in] 00527 If 0 != (error_mask & 16) and error_code_point is a valid unicode 00528 code point value, then type 16 errors are masked. 00529 00530 error_code_point - [in] 00531 Unicode code point value to use in when masking type 16 errors. 00532 If 0 == (error_mask & 16), then this parameter is ignored. 00533 0xFFFD is a popular choice for the error_code_point value. 00534 00535 sNextUTF16 - [out] 00536 If sNextUTF16 is not null, then *sNextUTF16 points to the first 00537 element in the input sUTF16[] buffer that was not converted. 00538 00539 If an error occurs and is not masked, then *sNextUTF16 points to 00540 the element of sUTF16[] where the conversion failed. If no errors 00541 occur or all errors are masked, then *sNextUTF16 points to 00542 sUTF16 + sUTF16_count. 00543 00544 If sUTF8_count > 0, the return value is the number of ON__UINT8 00545 elements written to sUTF8[]. When the return value < sUTF8_count, 00546 a null terminator is written to sUTF8[return value]. 00547 00548 If sUTF8_count == 0, the return value is the minimum number of 00549 ON__UINT8 elements that are needed to hold the converted string. 00550 The return value does not include room for a null terminator. 00551 Increment the return value by one if you want to have an element 00552 to use for a null terminator. 00553 */ 00554 ON_DECL 00555 int ON_ConvertUTF16ToUTF8( 00556 int bTestByteOrder, 00557 const ON__UINT16* sUTF16, 00558 int sUTF16_count, 00559 ON__UINT8* sUTF8, 00560 int sUTF8_count, 00561 unsigned int* error_status, 00562 unsigned int error_mask, 00563 ON__UINT32 error_code_point, 00564 const ON__UINT16** sNextUTF16 00565 ); 00566 00567 /* 00568 Description: 00569 Convert a unicode string from a UTF-16 encoded ON__UINT16 array 00570 into a UTF-32 encoded ON__UINT32 array. 00571 00572 Parameters: 00573 bTestByteOrder - [in] 00574 If bTestByteOrder is true and the first element of sUTF16[] 00575 is 0xFEFF, then this element is ignored. 00576 00577 If bTestByteOrder is true and the first element of sUTF16[] 00578 is 0xFFFE, then this element is ignored and the subsequent 00579 elements of sUTF16[] have their bytes swapped before the 00580 conversion is calculated. 00581 00582 In all other cases the first element of sUTF16[] is 00583 converted and no byte swapping is performed. 00584 00585 sUTF16 - [in] 00586 UTF-16 string to convert. 00587 00588 If bTestByteOrder is true and the first element of sUTF16[] 00589 is 0xFEFF, then this element is skipped and it is assumed 00590 that sUTF16[] is in the CPU's native byte order. 00591 00592 If bTestByteOrder is true and the first element of sUTF16[] 00593 is 0xFFFE, then this element is skipped and it is assumed 00594 that sUTF16[] is not in the CPU's native byte order and bytes 00595 are swapped before characters are converted. 00596 00597 If bTestByteOrder is false or the first character of sUTF16[] 00598 is neither 0xFEFF nor 0xFFFE, then the sUTF16 string must match 00599 the CPU's byte order. 00600 00601 sUTF16_count - [in] 00602 If sUTF16_count >= 0, then it specifies the number of 00603 ON__UINT16 elements in sUTF16[] to convert. 00604 00605 If sUTF16_count == -1, then sUTF16 must be a null terminated 00606 string and all the elements up to the first null element are 00607 converted. 00608 00609 sUTF32 - [out] 00610 If sUTF32 is not null and sUTF32_count > 0, then the UTF-32 00611 encoded string is returned in this buffer. If there is room 00612 for the null terminator, the converted string will be null 00613 terminated. The null terminator is never included in the count 00614 of returned by this function. The converted string is in the 00615 CPU's native byte order. No byte order mark is prepended. 00616 00617 sUTF32_count - [in] 00618 If sUTF32_count > 0, then it specifies the number of available 00619 ON__UINT32 elements in the sUTF32[] buffer. 00620 00621 If sUTF32_count == 0, then the sUTF32 parameter is ignored. 00622 00623 error_status - [out] 00624 If error_status is not null, then bits of *error_status are 00625 set to indicate the success or failure of the conversion. 00626 When the error_mask parameter is used to used to mask some 00627 conversion errors, multiple bits may be set. 00628 0: Successful conversion with no errors. 00629 1: Invalid input parameters. This error cannot be masked. 00630 2: The sUTF32 output buffer was not large enough to hold 00631 the converted string. This error cannot be masked. 00632 16: An illegal UTF-16 encoding sequence occured or an invalid 00633 unicode code point value resulted from decoding a 00634 UTF-16 sequence. This error can be masked. If the error is 00635 masked and error_code_point is a valid unicode code point, 00636 then its value is added to the UTF-32 output string and 00637 parsing continues. 00638 00639 error_mask - [in] 00640 If 0 != (error_mask & 16) and error_code_point is a valid unicode 00641 code point value, then type 16 errors are masked. 00642 00643 error_code_point - [in] 00644 Unicode code point value to use in when masking type 16 errors. 00645 If 0 == (error_mask & 16), then this parameter is ignored. 00646 0xFFFD is a popular choice for the error_code_point value. 00647 00648 sNextUTF16 - [out] 00649 If sNextUTF16 is not null, then *sNextUTF16 points to the first 00650 element in the input sUTF16[] buffer that was not converted. 00651 00652 If an error occurs and is not masked, then *sNextUTF16 points to 00653 the element of sUTF16[] where the conversion failed. If no errors 00654 occur or all errors are masked, then *sNextUTF16 points to 00655 sUTF16 + sUTF16_count. 00656 00657 Returns: 00658 If sUTF32_count > 0, the return value is the number of ON__UINT32 00659 elements written to sUTF32[]. When the return value < sUTF32_count, 00660 a null terminator is written to sUTF32[return value]. 00661 00662 If sUTF32_count == 0, the return value is the minimum number of 00663 ON__UINT32 elements that are needed to hold the converted string. 00664 The return value does not include room for a null terminator. 00665 Increment the return value by one if you want to have an element 00666 to use for a null terminator. 00667 */ 00668 ON_DECL 00669 int ON_ConvertUTF16ToUTF32( 00670 int bTestByteOrder, 00671 const ON__UINT16* sUTF16, 00672 int sUTF16_count, 00673 unsigned int* sUTF32, 00674 int sUTF32_count, 00675 unsigned int* error_status, 00676 unsigned int error_mask, 00677 ON__UINT32 error_code_point, 00678 const ON__UINT16** sNextUTF16 00679 ); 00680 00681 /* 00682 Description: 00683 Convert a unicode string from a UTF-32 encoded ON__UINT32 array 00684 into a UTF-8 encoded ON__UINT8 array. 00685 00686 Parameters: 00687 bTestByteOrder - [in] 00688 If bTestByteOrder is true and the first element of sUTF32[] 00689 is 0x0000FEFF, then this element is ignored. 00690 00691 If bTestByteOrder is true and the first element of sUTF32[] 00692 is 0xFFFE0000, then this element is ignored and the subsequent 00693 elements of sUTF32[] have their bytes swapped before the 00694 conversion is calculated. 00695 00696 In all other cases the first element of sUTF32[] is 00697 converted and no byte swapping is performed. 00698 00699 sUTF32 - [in] 00700 UTF-32 string to convert. 00701 00702 If bTestByteOrder is true and the first element of sUTF32[] 00703 is 0x0000FEFF, then this element is skipped and it is assumed 00704 that sUTF32[] is in the CPU's native byte order. 00705 00706 If bTestByteOrder is true and the first element of sUTF32[] 00707 is 0xFFFE0000, then this element is skipped and it is assumed 00708 that sUTF32[] is not in the CPU's native byte order and bytes 00709 are swapped before characters are converted. 00710 00711 If bTestByteOrder is false or the first character of sUTF32[] 00712 is neither 0x0000FEFF nor 0xFFFE0000, then the sUTF32 string 00713 must match the CPU's byte order. 00714 00715 sUTF32_count - [in] 00716 If sUTF32_count >= 0, then it specifies the number of 00717 ON__UINT32 elements in sUTF32[] to convert. 00718 00719 If sUTF32_count == -1, then sUTF32 must be a null terminated 00720 string and all the elements up to the first null element are 00721 converted. 00722 00723 sUTF8 - [out] 00724 If sUTF8 is not null and sUTF8_count > 0, then the UTF-8 00725 encoded string is returned in this buffer. If there is room 00726 for the null terminator, the converted string will be null 00727 terminated. The null terminator is never included in the count 00728 of returned by this function. The converted string is in the 00729 CPU's native byte order. No byte order mark is prepended. 00730 00731 sUTF8_count - [in] 00732 If sUTF8_count > 0, then it specifies the number of available 00733 ON__UINT8 elements in the sUTF8[] buffer. 00734 00735 If sUTF8_count == 0, then the sUTF8 parameter is ignored. 00736 00737 error_status - [out] 00738 If error_status is not null, then bits of *error_status are 00739 set to indicate the success or failure of the conversion. 00740 When the error_mask parameter is used to used to mask some 00741 conversion errors, multiple bits may be set. 00742 0: Successful conversion with no errors. 00743 1: Invalid input parameters. This error cannot be masked. 00744 2: The sUTF8 output buffer was not large enough to hold 00745 the converted string. This error cannot be masked. 00746 4: The values of two UTF-32 elements form a valid 00747 UTF-16 surrogate pair. This error can be masked. If the 00748 error is masked, then the surrogate pair is converted 00749 to a valid unicode code point, its UTF-8 encoding is 00750 added to the UTF-8 output string and parsing continues. 00751 16: An invalid unicode code point occured in sUTF32[]. 00752 This error can be masked. If the error is masked and 00753 error_code_point is a valid unicode code point, 00754 then its UTF-8 encoding is added to the UTF-8 output 00755 string and parsing continues. 00756 00757 error_mask - [in] 00758 If 0 != (error_mask & 4), then type 4 errors are masked. 00759 If 0 != (error_mask & 16) and error_code_point is a valid unicode 00760 code point value, then type 16 errors are masked. 00761 00762 error_code_point - [in] 00763 Unicode code point value to use in when masking type 16 errors. 00764 If 0 == (error_mask & 16), then this parameter is ignored. 00765 0xFFFD is a popular choice for the error_code_point value. 00766 00767 sNextUTF32 - [out] 00768 If sNextUTF32 is not null, then *sNextUTF32 points to the first 00769 element in the input sUTF32[] buffer that was not converted. 00770 00771 If an error occurs and is not masked, then *sNextUTF32 points to 00772 the element of sUTF32[] where the conversion failed. If no errors 00773 occur or all errors are masked, then *sNextUTF32 points to 00774 sUTF32 + sUTF32_count. 00775 00776 Returns: 00777 If sUTF8_count > 0, the return value is the number of ON__UINT8 00778 elements written to sUTF8[]. When the return value < sUTF8_count, 00779 a null terminator is written to sUTF8[return value]. 00780 00781 If sUTF8_count == 0, the return value is the minimum number of 00782 ON__UINT8 elements that are needed to hold the converted string. 00783 The return value does not include room for a null terminator. 00784 Increment the return value by one if you want to have an element 00785 to use for a null terminator. 00786 */ 00787 ON_DECL 00788 int ON_ConvertUTF32ToUTF8( 00789 int bTestByteOrder, 00790 const ON__UINT32* sUTF32, 00791 int sUTF32_count, 00792 ON__UINT8* sUTF8, 00793 int sUTF8_count, 00794 unsigned int* error_status, 00795 unsigned int error_mask, 00796 ON__UINT32 error_code_point, 00797 const ON__UINT32** sNextUTF32 00798 ); 00799 00800 /* 00801 Description: 00802 Convert a unicode string from a UTF-32 encoded ON__UINT32 array 00803 into a UTF-16 encoded ON__UINT16 array. 00804 00805 Parameters: 00806 bTestByteOrder - [in] 00807 If bTestByteOrder is true and the first element of sUTF32[] 00808 is 0x0000FEFF, then this element is ignored. 00809 00810 If bTestByteOrder is true and the first element of sUTF32[] 00811 is 0xFFFE0000, then this element is ignored and the subsequent 00812 elements of sUTF32[] have their bytes swapped before the 00813 conversion is calculated. 00814 00815 In all other cases the first element of sUTF32[] is 00816 converted and no byte swapping is performed. 00817 00818 sUTF32 - [in] 00819 UTF-32 string to convert. 00820 00821 If bTestByteOrder is true and the first element of sUTF32[] 00822 is 0x0000FEFF, then this element is skipped and it is assumed 00823 that sUTF32[] is in the CPU's native byte order. 00824 00825 If bTestByteOrder is true and the first element of sUTF32[] 00826 is 0xFFFE0000, then this element is skipped and it is assumed 00827 that sUTF32[] is not in the CPU's native byte order and bytes 00828 are swapped before characters are converted. 00829 00830 If bTestByteOrder is false or the first character of sUTF32[] 00831 is neither 0x0000FEFF nor 0xFFFE0000, then the sUTF32 string 00832 must match the CPU's byte order. 00833 00834 sUTF32_count - [in] 00835 If sUTF32_count >= 0, then it specifies the number of 00836 ON__UINT32 elements in sUTF32[] to convert. 00837 00838 If sUTF32_count == -1, then sUTF32 must be a null terminated 00839 string and all the elements up to the first null element are 00840 converted. 00841 00842 sUTF16 - [out] 00843 If sUTF16 is not null and sUTF16_count > 0, then the UTF-16 00844 encoded string is returned in this buffer. If there is room 00845 for the null terminator, the converted string will be null 00846 terminated. The null terminator is never included in the count 00847 of returned by this function. The converted string is in the 00848 CPU's native byte order. No byte order mark is prepended. 00849 00850 sUTF16_count - [in] 00851 If sUTF16_count > 0, then it specifies the number of available 00852 ON__UINT16 elements in the sUTF16[] buffer. 00853 00854 If sUTF16_count == 0, then the sUTF16 parameter is ignored. 00855 00856 error_status - [out] 00857 If error_status is not null, then bits of *error_status are 00858 set to indicate the success or failure of the conversion. 00859 When the error_mask parameter is used to used to mask some 00860 conversion errors, multiple bits may be set. 00861 0: Successful conversion with no errors. 00862 1: Invalid input parameters. This error cannot be masked. 00863 2: The sUTF16 output buffer was not large enough to hold 00864 the converted string. This error cannot be masked. 00865 4: The values of two UTF-32 elements form a valid 00866 UTF-16 surrogate pair. This error can be masked. If the 00867 error is masked, then the surrogate pair is added to 00868 the UTF-16 output string and parsing continues. 00869 16: An invalid unicode code point occured in sUTF32[]. 00870 This error can be masked. If the error is masked and 00871 error_code_point is a valid unicode code point, 00872 then its UTF-16 encoding is added to the UTF-16 output 00873 string and parsing continues. 00874 00875 error_mask - [in] 00876 If 0 != (error_mask & 4), then type 4 errors are masked. 00877 If 0 != (error_mask & 16) and error_code_point is a valid unicode 00878 code point value, then type 16 errors are masked. 00879 00880 error_code_point - [in] 00881 Unicode code point value to use in when masking type 16 errors. 00882 If 0 == (error_mask & 16), then this parameter is ignored. 00883 0xFFFD is a popular choice for the error_code_point value. 00884 00885 sNextUnicode - [out] 00886 If sNextUnicode is not null, then *sNextUnicode points to the first 00887 byte in the input sNextUnicode[] buffer that was not converted. 00888 00889 If an error occurs and is not masked, then this unsigned int 00890 will be an illegal unicode code point value. 00891 00892 If an error does not occur, then (*sNextUnicode - sUnicode) 00893 is the number of values converted. 00894 00895 Returns: 00896 If sUTF16_count > 0, the return value is the number of ON__UINT16 00897 elements written to sUTF16[]. When the return value < sUTF16_count, 00898 a null terminator is written to sUTF16[return value]. 00899 00900 If sUTF16_count == 0, the return value is the minimum number of 00901 ON__UINT16 elements that are needed to hold the converted string. 00902 The return value does not include room for a null terminator. 00903 Increment the return value by one if you want to have an element 00904 to use for a null terminator. 00905 */ 00906 ON_DECL 00907 int ON_ConvertUTF32ToUTF16( 00908 int bTestByteOrder, 00909 const ON__UINT32* sUTF32, 00910 int sUTF32_count, 00911 ON__UINT16* sUTF16, 00912 int sUTF16_count, 00913 unsigned int* error_status, 00914 unsigned int error_mask, 00915 ON__UINT32 error_code_point, 00916 const ON__UINT32** sNextUTF32 00917 ); 00918 00919 /* 00920 Description: 00921 Convert a wchar_t string using the native platform's most common 00922 encoding into a unicode string encoded as a UTF-8 char array. 00923 00924 If 2 = sizeof(wchar_t), then the wchar_t array is assumed to be 00925 a UTF-16 encoded string. This is the case with current versions 00926 of Microsoft Windows. 00927 00928 If 4 = sizeof(wchar)t), then the wchar_t array is assumed to be 00929 a UTF-32 encoded string. This is the case with current versions 00930 of Apple OSX. 00931 00932 Parameters: 00933 bTestByteOrder - [in] 00934 If bTestByteOrder is true and the first element of sWideChar[] 00935 is 0xFEFF, then this element is ignored. 00936 00937 If bTestByteOrder is true and the first element of sWideChar[] 00938 is 0xFFFE, then this element is ignored and the subsequent 00939 elements of sWideChar[] have their bytes swapped before the 00940 conversion is calculated. 00941 00942 In all other cases the first element of sWideChar[] is 00943 converted and no byte swapping is performed. 00944 00945 sWideChar - [in] 00946 wchar_t string to convert. 00947 00948 If bTestByteOrder is true and the first element of sWideChar[] 00949 is 0xFEFF, then this element is skipped and it is assumed 00950 that sWideChar[] is in the CPU's native byte order. 00951 00952 If bTestByteOrder is true and the first element of sWideChar[] 00953 is 0xFFFE, then this element is skipped and it is assumed 00954 that sWideChar[] is not in the CPU's native byte order and bytes 00955 are swapped before characters are converted. 00956 00957 If bTestByteOrder is false or the first character of sWideChar[] 00958 is neither 0xFEFF nor 0xFFFE, then the sWideChar string must match 00959 the CPU's byte order. 00960 00961 sWideChar_count - [in] 00962 If sWideChar_count >= 0, then it specifies the number of 00963 wchar_t elements in sWideChar[] to convert. 00964 00965 If sWideChar_count == -1, then sWideChar must be a null terminated 00966 string and all the elements up to the first null element are 00967 converted. 00968 00969 sUTF8 - [out] 00970 If sUTF8 is not null and sUTF8_count > 0, then the UTF-8 00971 encoded string is returned in this buffer. If there is room 00972 for the null terminator, the converted string will be null 00973 terminated. The null terminator is never included in the count 00974 of returned by this function. The converted string is in the 00975 CPU's native byte order. No byte order mark is prepended. 00976 00977 sUTF8_count - [in] 00978 If sUTF8_count > 0, then it specifies the number of available 00979 ON__UINT8 elements in the sUTF8[] buffer. 00980 00981 If sUTF8_count == 0, then the sUTF8 parameter is ignored. 00982 00983 error_status - [out] 00984 If error_status is not null, then bits of *error_status are 00985 set to indicate the success or failure of the conversion. 00986 When the error_mask parameter is used to used to mask some 00987 conversion errors, multiple bits may be set. 00988 0: Successful conversion with no errors. 00989 1: Invalid input parameters. This error cannot be masked. 00990 2: The sUTF8 output buffer was not large enough to hold 00991 the converted string. This error cannot be masked. 00992 16: An illegal wchar_t encoding sequence occured or an invalid 00993 unicode code point value resulted from decoding a 00994 wchar_t sequence. This error can be masked. If the error is 00995 masked and error_code_point is a valid unicode code point, 00996 then its UTF-8 encoding is added to the UTF-8 output 00997 string and parsing continues. 00998 00999 error_mask - [in] 01000 If 0 != (error_mask & 16) and error_code_point is a valid unicode 01001 code point value, then type 16 errors are masked. 01002 01003 error_code_point - [in] 01004 Unicode code point value to use in when masking type 16 errors. 01005 If 0 == (error_mask & 16), then this parameter is ignored. 01006 0xFFFD is a popular choice for the error_code_point value. 01007 01008 sNextWideChar - [out] 01009 If sNextWideChar is not null, then *sNextWideChar points to the first 01010 element in the input sWideChar[] buffer that was not converted. 01011 01012 If an error occurs and is not masked, then *sNextWideChar points to 01013 the element of sWideChar[] where the conversion failed. If no errors 01014 occur or all errors are masked, then *sNextWideChar points to 01015 sWideChar + sWideChar_count. 01016 01017 If sUTF8_count > 0, the return value is the number of ON__UINT8 01018 elements written to sUTF8[]. When the return value < sUTF8_count, 01019 a null terminator is written to sUTF8[return value]. 01020 01021 If sUTF8_count == 0, the return value is the minimum number of 01022 ON__UINT8 elements that are needed to hold the converted string. 01023 The return value does not include room for a null terminator. 01024 Increment the return value by one if you want to have an element 01025 to use for a null terminator. 01026 */ 01027 ON_DECL 01028 int ON_ConvertWideCharToUTF8( 01029 int bTestByteOrder, 01030 const wchar_t* sWideChar, 01031 int sWideChar_count, 01032 char* sUTF8, 01033 int sUTF8_count, 01034 unsigned int* error_status, 01035 unsigned int error_mask, 01036 ON__UINT32 error_code_point, 01037 const wchar_t** sNextWideChar 01038 ); 01039 01040 /* 01041 Description: 01042 Convert a UTF-8 encoded char string to wchar_t string using 01043 the native platform's most common encoding. 01044 01045 If 2 = sizeof(wchar_t), then UTF-16 encoding is used for the 01046 output string. This is the case with current versions of 01047 Microsoft Windows. 01048 01049 If 4 = sizeof(wchar_t), then UTF-32 encoding is used for the 01050 output string. This is the case with current versions of 01051 Apple OSX. 01052 01053 Parameters: 01054 sUTF8 - [in] 01055 UTF-8 string to convert. 01056 01057 sUTF8_count - [in] 01058 If sUTF8_count >= 0, then it specifies the number of 01059 ON__UINT8 elements in sUTF8[] to convert. 01060 01061 If sUTF8_count == -1, then sUTF8 must be a null terminated 01062 string and all the elements up to the first null element are 01063 converted. 01064 01065 sWideChar - [out] 01066 If sWideChar is not null and sWideChar_count > 0, then the 01067 output string is returned in this buffer. If there is room 01068 for the null terminator, the converted string will be null 01069 terminated. The null terminator is never included in the count 01070 of returned by this function. The converted string is in the 01071 CPU's native byte order. No byte order mark is prepended. 01072 01073 sWideChar_count - [in] 01074 If sWideChar_count > 0, then it specifies the number of available 01075 wchar_t elements in the sWideChar[] buffer. 01076 01077 If sWideChar_count == 0, then the sWideChar parameter is ignored. 01078 01079 error_status - [out] 01080 If error_status is not null, then bits of *error_status are 01081 set to indicate the success or failure of the conversion. 01082 When the error_mask parameter is used to used to mask some 01083 conversion errors, multiple bits may be set. 01084 0: Successful conversion with no errors. 01085 1: Invalid input parameters. This error cannot be masked. 01086 2: The sWideChar output buffer was not large enough to hold 01087 the converted string. This error cannot be masked. 01088 4: The values of two UTF-8 encoding sequences formed a valid 01089 UTF-16 surrogate pair. This error can be masked. If the 01090 error is masked, then the surrogate pair is added 01091 to the UTF-16 output string and parsing continues. 01092 8: An overlong UTF-8 encoding sequence was encountered. 01093 The value of the overlong sequence was a valid unicode 01094 code point. This error can be masked. If the error is masked, 01095 then the unicode code point is encoded and added to the 01096 UTF-16 output string and parsing continues. 01097 16: An illegal UTF-8 encoding sequence occured or an invalid 01098 unicode code point value resulted from decoding a 01099 UTF-8 sequence. This error can be masked. If the error is 01100 masked and error_code_point is a valid unicode code point, 01101 then its encoding is added to the output string and parsing 01102 continues. 01103 01104 error_mask - [in] 01105 If 0 != (error_mask & 4), then type 4 errors are masked. 01106 If 0 != (error_mask & 8), then type 8 errors are masked. 01107 If 0 != (error_mask & 16) and error_code_point is a valid unicode 01108 code point value, then type 16 errors are masked. 01109 01110 error_code_point - [in] 01111 Unicode code point value to use in when masking type 16 errors. 01112 If 0 == (error_mask & 16), then this parameter is ignored. 01113 0xFFFD is a popular choice for the error_code_point value. 01114 01115 sNextUTF8 - [out] 01116 If sNextUTF8 is not null, then *sNextUTF8 points to the first 01117 element in the input sUTF8[] buffer that was not converted. 01118 01119 If an error occurs and is not masked, then *sNextUTF8 points to 01120 the element of sUTF8[] where the conversion failed. If no errors 01121 occur or all errors are masked, then *sNextUTF8 points to 01122 sUTF8 + sUTF8_count. 01123 01124 Returns: 01125 If sWideChar_count > 0, the return value is the number of wchar_t 01126 elements written to sWideChar[]. When the return value < sWideChar_count, 01127 a null terminator is written to sWideChar[return value]. 01128 01129 If sWideChar_count == 0, the return value is the minimum number of 01130 wchar_t elements that are needed to hold the converted string. 01131 The return value does not include room for a null terminator. 01132 Increment the return value by one if you want to have an element 01133 to use for a null terminator. 01134 */ 01135 ON_DECL 01136 int ON_ConvertUTF8ToWideChar( 01137 const char* sUTF8, 01138 int sUTF8_count, 01139 wchar_t* sWideChar, 01140 int sWideChar_count, 01141 unsigned int* error_status, 01142 unsigned int error_mask, 01143 ON__UINT32 error_code_point, 01144 const char** sNextUTF8 01145 ); 01146 01147 ON_END_EXTERNC 01148 01149 #endif