pcl: opennurbs_unicode.h Source File

Go to the documentation of this file.
00001 /* $NoKeywords: $ */
00002 /*
00003 //
00004 // Copyright (c) 1993-2012 Robert McNeel & Associates. All rights reserved.
00005 // OpenNURBS, Rhinoceros, and Rhino3D are registered trademarks of Robert
00006 // McNeel & Associates.
00007 //
00008 // THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
00009 // ALL IMPLIED WARRANTIES OF FITNESS FOR ANY PARTICULAR PURPOSE AND OF
00010 // MERCHANTABILITY ARE HEREBY DISCLAIMED.
00011 //                              
00012 // For complete openNURBS copyright information see <http://www.opennurbs.org>.
00013 //
00015 */
00016 
00017 #if !defined(OPENNURBS_UNICODE_INC_)
00018 #define OPENNURBS_UNICODE_INC_
00019 
00020 ON_BEGIN_EXTERNC
00021 
00022 struct ON_UnicodeErrorParameters
00023 {
00024   /*
00025   If an error occurs, then bits of error_status are
00026   set to indicate what type of error occured.
00027 
00028   Error types:
00029     1: The input parameters were invalid. 
00030        This error cannot be masked.
00031 
00032     2: The ouput buffer was not large enough to hold the converted
00033        string. As much conversion as possible is performed in this
00034        case and the error cannot be masked.
00035 
00036     4: When parsing a UTF-8 or UTF-32 string, the values of two
00037        consecutive encoding sequences formed a valid UTF-16 
00038        surrogate pair. 
00039           
00040        This error is masked if 0 != (4 & m_error_mask).
00041        If the error is masked, then the surrogate pair is
00042        decoded, the value of the resulting unicode code point
00043        is used, and parsing continues.
00044 
00045     8: An overlong UTF-8 encoding sequence was encountered and 
00046        the value of the overlong sUTF-8 equence was a valid unicode
00047        code point. 
00048           
00049        This error is masked if 0 != (8 & m_error_mask).
00050        If the error is masked, then the unicode code point is 
00051        used and parsing continues.
00052 
00053    16: An illegal UTF-8 encoding sequence occured or an invalid
00054        unicode code point value resulted from decoding a
00055        UTF-8 sequence. 
00056 
00057        This error is masked if 0 != (16 & m_error_mask).
00058        If the error is masked and the value of m_error_code_point is
00059        a valid unicode code point, then m_error_code_point is used
00060        and parsing continues.
00061   */
00062   unsigned int m_error_status;
00063 
00064   /*
00065   If 0 != (error_mask & 4), then type 4 errors are masked.
00066   If 0 != (error_mask & 8), then type 8 errors are masked.
00067   If 0 != (error_mask & 16) and m_error_code_point is a valid unicode
00068   code point value, then type 16 errors are masked.
00069   */
00070   unsigned int m_error_mask;
00071 
00072   /*
00073   Unicode code point value to use in when masking type 16 errors.
00074   If 0 == (error_mask & 16), then this parameter is ignored.
00075   0xFFFD is a popular choice for the m_error_code_point value.
00076   */
00077   ON__UINT32 m_error_code_point;
00078 };
00079 
00080 
00081 /*
00082 Description:
00083   Test a value to determine if it is a valid unicode code point value.
00084 Parameters:
00085   u - [in] value to test
00086 Returns:
00087   true: u is a valid unicode code point
00088   false: u is not a valid unicode code point
00089 Remarks:
00090   Valid unicode code points are 
00091   (0 <= u && u <= 0xD7FF) || (0xE000 <= u && u <= 0x10FFFF)
00092 */
00093 ON_DECL
00094 int ON_IsValidUnicodeCodePoint( ON__UINT32 u );
00095 
00096 /*
00097 Description:
00098   Convert an integer to its UTF-8 form.
00099 Parameters:
00100   u - [in]
00101     Interger in the CPU's native byte order that can be
00102     converted to UTF-8 form.
00103     Valid values are in the interval [0,2147483647].
00104   sUTF8 - [out]
00105     sUTF8 is a buffer of 6 ON__UINT8 elements and the UTF-8 form
00106     is returned in sUTF8[]. The returned value specifies how 
00107     many elements of sUTF8[] are set.
00108 Returns:
00109   0: u is too large (>=2^31) to be encode as a UTF-8 string.
00110      No changes are made to the sUTF8[] values.
00111   1: the UTF-8 form of u is 1 byte returned in sUTF8[0].
00112   2: the UTF-8 form of u is 2 byts returned in sUTF8[0],sUTF8[1].
00113   3: the UTF-8 form of u is 3 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2].
00114   4: the UTF-8 form of u is 4 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3].
00115   5: the UTF-8 form of u is 5 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3],sUTF8[4].
00116   6: the UTF-8 form of u is 6 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3],sUTF8[4],sUTF8[5].
00117   For return values requiring less than 6 bytes, no changes
00118   are made to the unused bytes in sUTF8[].
00119 Remarks:
00120   Any integer in the range 0 to 2^31 - 1 can be encoded as a UTF-8 string.
00121   When a unicode string is being encoded take steps to ensure that
00122   u is a valid unicode code point value.  The function ON_IsValidUnicodeCodePoint()
00123   can be used to determine if u is a valid unicode code point value.
00124 */
00125 ON_DECL
00126 int ON_EncodeUTF8( ON__UINT32 u, ON__UINT8 sUTF8[6] );
00127 
00128 /*
00129 Description:
00130   Decode a UTF-8 encode string to get a single unicode code point.
00131 Parameters:
00132   sUTF8 - [in]
00133     UTF-8 string to convert.
00134 
00135   sUTF8_count - [in]
00136     number of ON__UINT8 elements in sUTF8[].
00137 
00138    e - [in/out] 
00139     If e is null, errors are not masked and parsing is performed
00140     to the point where the first error occurs.
00141     If e is not null, all errors are reported by setting the appropriate
00142     e->m_error_status bits and errors are handled as described in the
00143     definition of the ON_UnicodeErrorParameters struct.
00144 
00145   unicode_code_point - [out]
00146     The unicode_code_point pointer must not be null.
00147     If a nonzero value is returned, then *unicode_code_point is
00148     a valid unicode code point value.
00149 Returns:
00150   Number of elements of sUTF8 that were parsed.
00151   0 indicates failure.
00152 */
00153 ON_DECL
00154 int ON_DecodeUTF8(
00155     const ON__UINT8* sUTF8,
00156     int sUTF8_count,
00157     struct ON_UnicodeErrorParameters* e,
00158     ON__UINT32* unicode_code_point
00159     );
00160 
00161 /*
00162 Description:
00163   Convert a 4 byte unicode code point value to its UTF-16 form.
00164 Parameters:
00165   unicode_code_point - [in]
00166     4 byte unicode code point value in the CPU's native byte order.
00167     Valid values are in the interval [0,0xD7FF] or the 
00168     interval [0xE000,0x10FFFF].
00169   sUTF16 - [out]
00170     sUTF16 is buffer of 2 ON__UINT16 elements. If the UTF-16 form
00171     is a single value, it is returned in sUTF16[0]. If the UTF-16
00172     is a surrogate pair, the first code unit (high surrogate) 
00173     is returned sUTF16[0] and the second unit (low surrogate) is
00174     returned in sUTF16[1].  The returned values are in
00175     the CPU's native byte order.
00176 Returns:
00177   0: u is not a valid Unicode code point. No changes are
00178      made to the w[] values.
00179   1: u is a valie Unicode code point with a UTF-16 form 
00180      consisting of the single value returned in w[0].
00181   2: u is a valid Unicode code point with a UTF-16 form 
00182      consisting of a surrogate pair returned in w[0] and w[1].
00183 */
00184 ON_DECL
00185 int ON_EncodeUTF16( ON__UINT32 unicode_code_point, ON__UINT16 sUTF16[2] );
00186 
00187 /*
00188 Description:
00189   Decode a UTF-16 string to get a single unicode code point.
00190 Parameters:
00191   sUTF16 - [in]
00192     UTF-16 string to convert.
00193 
00194   sUTF16_count - [in]
00195     number of ON__UINT16 elements in sUTF16[].
00196 
00197   e - [in/out] 
00198     If e is null, errors are not masked and parsing is performed
00199     to the point where the first error occurs.
00200     If e is not null, all errors are reported by setting the appropriate
00201     e->m_error_status bits and errors are handled as described in the
00202     definition of the ON_UnicodeErrorParameters struct.
00203 
00204   unicode_code_point - [out]
00205     The unicode_code_point pointer must not be null.
00206     If a nonzero value is returned, then *unicode_code_point is
00207     a valid unicode code point value in the CPU's native byte order.
00208 Returns:
00209   Number of elements of sUTF16 that were parsed.
00210   0 indicates failure.
00211 */
00212 ON_DECL
00213 int ON_DecodeUTF16(
00214     const ON__UINT16* sUTF16,
00215     int sUTF16_count,
00216     struct ON_UnicodeErrorParameters* e,
00217     ON__UINT32* unicode_code_point
00218     );
00219 
00220 /*
00221 Description:
00222   Decode a UTF-16 encode string whose elements have byte order
00223   opposite the native CPU's to get a single unicode code point.
00224 Parameters:
00225   sUTF16 - [in]
00226     UTF-16 string to convert with byte order opposite the
00227     CPU's native byte order.
00228 
00229   sUTF16_count - [in]
00230     number of ON__UINT16 elements in sUTF16[].
00231 
00232   e - [in/out] 
00233     If e is null, errors are not masked and parsing is performed
00234     to the point where the first error occurs.
00235     If e is not null, all errors are reported by setting the appropriate
00236     e->m_error_status bits and errors are handled as described in the
00237     definition of the ON_UnicodeErrorParameters struct.
00238 
00239   unicode_code_point - [out]
00240     The unicode_code_point pointer must not be null.
00241     If a nonzero value is returned, then *unicode_code_point is
00242     a valid unicode code point value in the CPU's native byte order.
00243 Returns:
00244   Number of elements of sUTF16 that were parsed.
00245   0 indicates failure.
00246 */
00247 ON_DECL
00248 int ON_DecodeSwapByteUTF16(
00249     const ON__UINT16* sUTF16,
00250     int sUTF16_count,
00251     struct ON_UnicodeErrorParameters* e,
00252     ON__UINT32* unicode_code_point
00253     );
00254 
00255 /*
00256 Description:
00257   Convert a unicode string from a UTF-8 encoded ON__UINT8 array
00258   into a UTF-16 encoded ON__UINT16 array.
00259 
00260 Parameters:
00261   sUTF8 - [in]
00262     UTF-8 string to convert.
00263 
00264   sUTF8_count - [in]
00265     If sUTF8_count >= 0, then it specifies the number of
00266     ON__UINT8 elements in sUTF8[] to convert.
00267 
00268     If sUTF8_count == -1, then sUTF8 must be a null terminated
00269     string and all the elements up to the first null element are
00270     converted.
00271 
00272   sUTF16 - [out]
00273     If sUTF16 is not null and sUTF16_count > 0, then the UTF-16
00274     encoded string is returned in this buffer. If there is room
00275     for the null terminator, the converted string will be null
00276     terminated. The null terminator is never included in the count 
00277     of returned by this function. The converted string is in the 
00278     CPU's native byte order. No byte order mark is prepended.
00279 
00280   sUTF16_count - [in]
00281     If sUTF16_count > 0, then it specifies the number of available
00282     ON__UINT16 elements in the sUTF16[] buffer.
00283     
00284     If sUTF16_count == 0, then the sUTF16 parameter is ignored.
00285 
00286   error_status - [out]
00287     If error_status is not null, then bits of *error_status are
00288     set to indicate the success or failure of the conversion.  
00289     When the error_mask parameter is used to used to mask some
00290     conversion errors, multiple bits may be set.
00291        0: Successful conversion with no errors.
00292        1: Invalid input parameters. This error cannot be masked.
00293        2: The sUTF16 output buffer was not large enough to hold 
00294           the converted string. This error cannot be masked.
00295        4: The values of two UTF-8 encoding sequences formed a valid
00296           UTF-16 surrogate pair. This error can be masked.  If the
00297           error is masked, then the surrogate pair is added
00298           to the UTF-16 output string and parsing continues.
00299        8: An overlong UTF-8 encoding sequence was encountered. 
00300           The value of the overlong sequence was a valid unicode
00301           code point. This error can be masked. If the error is masked,
00302           then the unicode code point is encoded and added to the
00303           UTF-16 output string and parsing continues.
00304       16: An illegal UTF-8 encoding sequence occured or an invalid
00305           unicode code point value resulted from decoding a
00306           UTF-8 sequence. This error can be masked. If the error is
00307           masked and error_code_point is a valid unicode code point,
00308           then its UTF-16 encoding is added to the UTF-16 output
00309           string and parsing continues.
00310 
00311   error_mask - [in]
00312     If 0 != (error_mask & 4), then type 4 errors are masked.
00313     If 0 != (error_mask & 8), then type 8 errors are masked.
00314     If 0 != (error_mask & 16) and error_code_point is a valid unicode
00315     code point value, then type 16 errors are masked.
00316 
00317   error_code_point - [in]
00318     Unicode code point value to use in when masking type 16 errors.
00319     If 0 == (error_mask & 16), then this parameter is ignored.
00320     0xFFFD is a popular choice for the error_code_point value.
00321 
00322   sNextUTF8 - [out]
00323     If sNextUTF8 is not null, then *sNextUTF8 points to the first
00324     element in the input sUTF8[] buffer that was not converted. 
00325 
00326     If an error occurs and is not masked, then *sNextUTF8 points to
00327     the element of sUTF8[] where the conversion failed.  If no errors
00328     occur or all errors are masked, then *sNextUTF8 points to
00329     sUTF8 + sUTF8_count.
00330 
00331 Returns:
00332   If sUTF16_count > 0, the return value is the number of ON__UINT16
00333   elements written to sUTF16[].  When the return value < sUTF16_count,
00334   a null terminator is written to sUTF16[return value].
00335 
00336   If sUTF16_count == 0, the return value is the minimum number of
00337   ON__UINT16 elements that are needed to hold the converted string.
00338   The return value does not include room for a null terminator.  
00339   Increment the return value by one if you want to have an element
00340   to use for a null terminator.
00341 */
00342 ON_DECL
00343 int ON_ConvertUTF8ToUTF16(
00344     const ON__UINT8* sUTF8,
00345     int sUTF8_count,
00346     ON__UINT16* sUTF16,
00347     int sUTF16_count,
00348     unsigned int* error_status,
00349     unsigned int error_mask,
00350     ON__UINT32 error_code_point,
00351     const ON__UINT8** sNextUTF8
00352     );
00353 
00354 /*
00355 Description:
00356   Convert a unicode string from a UTF-8 encoded ON__UINT8 array
00357   into a UTF-32 encoded ON__UINT32 array.
00358 
00359 Parameters:
00360   sUTF8 - [in]
00361     UTF-8 string to convert.
00362 
00363   sUTF8_count - [in]
00364     If sUTF8_count >= 0, then it specifies the number of
00365     ON__UINT8 elements in sUTF8[] to convert.
00366 
00367     If sUTF8_count == -1, then sUTF8 must be a null terminated
00368     string and all the elements up to the first null element are
00369     converted.
00370 
00371   sUTF32 - [out]
00372     If sUTF32 is not null and sUTF32_count > 0, then the UTF-32
00373     encoded string is returned in this buffer. If there is room
00374     for the null terminator, the converted string will be null
00375     terminated. The null terminator is never included in the count 
00376     of returned by this function. The converted string is in the 
00377     CPU's native byte order. No byte order mark is prepended.
00378 
00379   sUTF32_count - [in]
00380     If sUTF32_count > 0, then it specifies the number of available
00381     ON__UINT32 elements in the sUTF32[] buffer.
00382     
00383     If sUTF32_count == 0, then the sUTF32 parameter is ignored.
00384 
00385   error_status - [out]
00386     If error_status is not null, then bits of *error_status are
00387     set to indicate the success or failure of the conversion.  
00388     When the error_mask parameter is used to used to mask some
00389     conversion errors, multiple bits may be set.
00390        0: Successful conversion with no errors.
00391        1: Invalid input parameters. This error cannot be masked.
00392        2: The sUTF32 output buffer was not large enough to hold 
00393           the converted string. This error cannot be masked.
00394        4: The values of two UTF-8 encoding sequences formed a valid
00395           UTF-16 surrogate pair. This error can be masked.  If the
00396           error is masked, then the surrogate pair is decoded,
00397           the code point value is added to the UTF-32 output 
00398           string and parsing continues.
00399        8: An overlong UTF-8 encoding sequence was encountered. 
00400           The value of the overlong sequence was a valid unicode
00401           code point. This error can be masked. If the error is masked,
00402           then the unicode code point is added to the UTF-32
00403           output string and parsing continues.
00404       16: An illegal UTF-8 encoding sequence occured or an invalid
00405           unicode code point value resulted from decoding a
00406           UTF-8 sequence. This error can be masked. If the error is
00407           masked and error_code_point is a valid unicode code point,
00408           then its value is added to the UTF-32 output string and 
00409           parsing continues.
00410 
00411   error_mask - [in]
00412     If 0 != (error_mask & 4), then type 4 errors are masked.
00413     If 0 != (error_mask & 8), then type 8 errors are masked.
00414     If 0 != (error_mask & 16) and error_code_point is a valid unicode
00415     code point value, then type 16 errors are masked.
00416 
00417   error_code_point - [in]
00418     Unicode code point value to use in when masking type 16 errors.
00419     If 0 == (error_mask & 16), then this parameter is ignored.
00420     0xFFFD is a popular choice for the error_code_point value.
00421 
00422   sNextUTF8 - [out]
00423     If sNextUTF8 is not null, then *sNextUTF8 points to the first
00424     element in the input sUTF8[] buffer that was not converted. 
00425 
00426     If an error occurs and is not masked, then *sNextUTF8 points to
00427     the element of sUTF8[] where the conversion failed.  If no errors
00428     occur or all errors are masked, then *sNextUTF8 points to
00429     sUTF8 + sUTF8_count.
00430 
00431 Returns:
00432   If sUTF32_count > 0, the return value is the number of ON__UINT32
00433   elements written to sUTF32[].  When the return value < sUTF32_count,
00434   a null terminator is written to sUTF32[return value].
00435 
00436   If sUTF32_count == 0, the return value is the minimum number of
00437   ON__UINT32 elements that are needed to hold the converted string.
00438   The return value does not include room for a null terminator.  
00439   Increment the return value by one if you want to have an element
00440   to use for a null terminator.
00441 */
00442 ON_DECL
00443 int ON_ConvertUTF8ToUTF32(
00444     const ON__UINT8* sUTF8,
00445     int sUTF8_count,
00446     ON__UINT32* sUTF32,
00447     int sUTF32_count,
00448     unsigned int* error_status,
00449     unsigned int error_mask,
00450     ON__UINT32 error_code_point,
00451     const ON__UINT8** sNextUTF8
00452     );
00453 
00454 /*
00455 Description:
00456   Convert a unicode string from a UTF-16 encoded ON__UINT16 array
00457   into a UTF-8 encoded ON__UINT8 array.
00458 
00459 Parameters:
00460   bTestByteOrder - [in]
00461     If bTestByteOrder is true and the first element of sUTF16[]
00462     is 0xFEFF, then this element is ignored.
00463 
00464     If bTestByteOrder is true and the first element of sUTF16[]
00465     is 0xFFFE, then this element is ignored and the subsequent
00466     elements of sUTF16[] have their bytes swapped before the 
00467     conversion is calculated.
00468 
00469     In all other cases the first element of sUTF16[] is 
00470     converted and no byte swapping is performed.
00471 
00472   sUTF16 - [in]
00473     UTF-16 string to convert.  
00474     
00475     If bTestByteOrder is true and the first element of sUTF16[]
00476     is 0xFEFF, then this element is skipped and it is assumed 
00477     that sUTF16[] is in the CPU's native byte order.
00478     
00479     If bTestByteOrder is true and the first element of sUTF16[]
00480     is 0xFFFE, then this element is skipped and it is assumed 
00481     that sUTF16[] is not in the CPU's native byte order and bytes
00482     are swapped before characters are converted.
00483 
00484     If bTestByteOrder is false or the first character of sUTF16[]
00485     is neither 0xFEFF nor 0xFFFE, then the sUTF16 string must match
00486     the CPU's byte order.
00487 
00488   sUTF16_count - [in]
00489     If sUTF16_count >= 0, then it specifies the number of
00490     ON__UINT16 elements in sUTF16[] to convert.
00491 
00492     If sUTF16_count == -1, then sUTF16 must be a null terminated
00493     string and all the elements up to the first null element are
00494     converted.
00495     
00496   sUTF8 - [out]
00497     If sUTF8 is not null and sUTF8_count > 0, then the UTF-8
00498     encoded string is returned in this buffer. If there is room
00499     for the null terminator, the converted string will be null
00500     terminated. The null terminator is never included in the count 
00501     of returned by this function. The converted string is in the 
00502     CPU's native byte order. No byte order mark is prepended.
00503 
00504   sUTF8_count - [in]
00505     If sUTF8_count > 0, then it specifies the number of available
00506     ON__UINT8 elements in the sUTF8[] buffer.
00507     
00508     If sUTF8_count == 0, then the sUTF8 parameter is ignored.
00509 
00510   error_status - [out]
00511     If error_status is not null, then bits of *error_status are
00512     set to indicate the success or failure of the conversion.  
00513     When the error_mask parameter is used to used to mask some
00514     conversion errors, multiple bits may be set.
00515        0: Successful conversion with no errors.
00516        1: Invalid input parameters. This error cannot be masked.
00517        2: The sUTF8 output buffer was not large enough to hold 
00518           the converted string. This error cannot be masked.
00519       16: An illegal UTF-16 encoding sequence occured or an invalid
00520           unicode code point value resulted from decoding a
00521           UTF-16 sequence. This error can be masked. If the error is
00522           masked and error_code_point is a valid unicode code point,
00523           then its UTF-8 encoding is added to the UTF-8 output
00524           string and parsing continues.
00525 
00526   error_mask - [in]
00527     If 0 != (error_mask & 16) and error_code_point is a valid unicode
00528     code point value, then type 16 errors are masked.
00529 
00530   error_code_point - [in]
00531     Unicode code point value to use in when masking type 16 errors.
00532     If 0 == (error_mask & 16), then this parameter is ignored.
00533     0xFFFD is a popular choice for the error_code_point value.
00534 
00535   sNextUTF16 - [out]
00536     If sNextUTF16 is not null, then *sNextUTF16 points to the first
00537     element in the input sUTF16[] buffer that was not converted. 
00538 
00539     If an error occurs and is not masked, then *sNextUTF16 points to
00540     the element of sUTF16[] where the conversion failed.  If no errors
00541     occur or all errors are masked, then *sNextUTF16 points to
00542     sUTF16 + sUTF16_count.
00543 
00544   If sUTF8_count > 0, the return value is the number of ON__UINT8
00545   elements written to sUTF8[].  When the return value < sUTF8_count,
00546   a null terminator is written to sUTF8[return value].
00547 
00548   If sUTF8_count == 0, the return value is the minimum number of
00549   ON__UINT8 elements that are needed to hold the converted string.
00550   The return value does not include room for a null terminator.  
00551   Increment the return value by one if you want to have an element
00552   to use for a null terminator.
00553 */
00554 ON_DECL
00555 int ON_ConvertUTF16ToUTF8(
00556     int bTestByteOrder,
00557     const ON__UINT16* sUTF16,
00558     int sUTF16_count,
00559     ON__UINT8* sUTF8,
00560     int sUTF8_count,
00561     unsigned int* error_status,
00562     unsigned int error_mask,
00563     ON__UINT32 error_code_point,
00564     const ON__UINT16** sNextUTF16
00565     );
00566 
00567 /*
00568 Description:
00569   Convert a unicode string from a UTF-16 encoded ON__UINT16 array
00570   into a UTF-32 encoded ON__UINT32 array.
00571 
00572 Parameters:
00573   bTestByteOrder - [in]
00574     If bTestByteOrder is true and the first element of sUTF16[]
00575     is 0xFEFF, then this element is ignored.
00576 
00577     If bTestByteOrder is true and the first element of sUTF16[]
00578     is 0xFFFE, then this element is ignored and the subsequent
00579     elements of sUTF16[] have their bytes swapped before the 
00580     conversion is calculated.
00581 
00582     In all other cases the first element of sUTF16[] is 
00583     converted and no byte swapping is performed.
00584 
00585   sUTF16 - [in]
00586     UTF-16 string to convert.  
00587     
00588     If bTestByteOrder is true and the first element of sUTF16[]
00589     is 0xFEFF, then this element is skipped and it is assumed 
00590     that sUTF16[] is in the CPU's native byte order.
00591     
00592     If bTestByteOrder is true and the first element of sUTF16[]
00593     is 0xFFFE, then this element is skipped and it is assumed 
00594     that sUTF16[] is not in the CPU's native byte order and bytes
00595     are swapped before characters are converted.
00596 
00597     If bTestByteOrder is false or the first character of sUTF16[]
00598     is neither 0xFEFF nor 0xFFFE, then the sUTF16 string must match
00599     the CPU's byte order.
00600 
00601   sUTF16_count - [in]
00602     If sUTF16_count >= 0, then it specifies the number of
00603     ON__UINT16 elements in sUTF16[] to convert.
00604 
00605     If sUTF16_count == -1, then sUTF16 must be a null terminated
00606     string and all the elements up to the first null element are
00607     converted.
00608 
00609   sUTF32 - [out]
00610     If sUTF32 is not null and sUTF32_count > 0, then the UTF-32
00611     encoded string is returned in this buffer. If there is room
00612     for the null terminator, the converted string will be null
00613     terminated. The null terminator is never included in the count 
00614     of returned by this function. The converted string is in the 
00615     CPU's native byte order. No byte order mark is prepended.
00616 
00617   sUTF32_count - [in]
00618     If sUTF32_count > 0, then it specifies the number of available
00619     ON__UINT32 elements in the sUTF32[] buffer.
00620     
00621     If sUTF32_count == 0, then the sUTF32 parameter is ignored.
00622 
00623   error_status - [out]
00624     If error_status is not null, then bits of *error_status are
00625     set to indicate the success or failure of the conversion.  
00626     When the error_mask parameter is used to used to mask some
00627     conversion errors, multiple bits may be set.
00628        0: Successful conversion with no errors.
00629        1: Invalid input parameters. This error cannot be masked.
00630        2: The sUTF32 output buffer was not large enough to hold 
00631           the converted string. This error cannot be masked.
00632       16: An illegal UTF-16 encoding sequence occured or an invalid
00633           unicode code point value resulted from decoding a
00634           UTF-16 sequence. This error can be masked. If the error is
00635           masked and error_code_point is a valid unicode code point,
00636           then its value is added to the UTF-32 output string and 
00637           parsing continues.
00638 
00639   error_mask - [in]
00640     If 0 != (error_mask & 16) and error_code_point is a valid unicode
00641     code point value, then type 16 errors are masked.
00642 
00643   error_code_point - [in]
00644     Unicode code point value to use in when masking type 16 errors.
00645     If 0 == (error_mask & 16), then this parameter is ignored.
00646     0xFFFD is a popular choice for the error_code_point value.
00647 
00648   sNextUTF16 - [out]
00649     If sNextUTF16 is not null, then *sNextUTF16 points to the first
00650     element in the input sUTF16[] buffer that was not converted. 
00651 
00652     If an error occurs and is not masked, then *sNextUTF16 points to
00653     the element of sUTF16[] where the conversion failed.  If no errors
00654     occur or all errors are masked, then *sNextUTF16 points to
00655     sUTF16 + sUTF16_count.
00656 
00657 Returns:
00658   If sUTF32_count > 0, the return value is the number of ON__UINT32
00659   elements written to sUTF32[].  When the return value < sUTF32_count,
00660   a null terminator is written to sUTF32[return value].
00661 
00662   If sUTF32_count == 0, the return value is the minimum number of
00663   ON__UINT32 elements that are needed to hold the converted string.
00664   The return value does not include room for a null terminator.  
00665   Increment the return value by one if you want to have an element
00666   to use for a null terminator.
00667 */
00668 ON_DECL
00669 int ON_ConvertUTF16ToUTF32(
00670     int bTestByteOrder,
00671     const ON__UINT16* sUTF16,
00672     int sUTF16_count,
00673     unsigned int* sUTF32,
00674     int sUTF32_count,
00675     unsigned int* error_status,
00676     unsigned int error_mask,
00677     ON__UINT32 error_code_point,
00678     const ON__UINT16** sNextUTF16
00679     );
00680 
00681 /*
00682 Description:
00683   Convert a unicode string from a UTF-32 encoded ON__UINT32 array
00684   into a UTF-8 encoded ON__UINT8 array.
00685 
00686 Parameters:
00687   bTestByteOrder - [in]
00688     If bTestByteOrder is true and the first element of sUTF32[]
00689     is 0x0000FEFF, then this element is ignored.
00690 
00691     If bTestByteOrder is true and the first element of sUTF32[]
00692     is 0xFFFE0000, then this element is ignored and the subsequent
00693     elements of sUTF32[] have their bytes swapped before the 
00694     conversion is calculated.
00695 
00696     In all other cases the first element of sUTF32[] is 
00697     converted and no byte swapping is performed.
00698 
00699   sUTF32 - [in]
00700     UTF-32 string to convert.  
00701     
00702     If bTestByteOrder is true and the first element of sUTF32[]
00703     is 0x0000FEFF, then this element is skipped and it is assumed 
00704     that sUTF32[] is in the CPU's native byte order.
00705     
00706     If bTestByteOrder is true and the first element of sUTF32[]
00707     is 0xFFFE0000, then this element is skipped and it is assumed 
00708     that sUTF32[] is not in the CPU's native byte order and bytes
00709     are swapped before characters are converted.
00710 
00711     If bTestByteOrder is false or the first character of sUTF32[]
00712     is neither 0x0000FEFF nor 0xFFFE0000, then the sUTF32 string 
00713     must match the CPU's byte order.
00714 
00715   sUTF32_count - [in]
00716     If sUTF32_count >= 0, then it specifies the number of
00717     ON__UINT32 elements in sUTF32[] to convert.
00718 
00719     If sUTF32_count == -1, then sUTF32 must be a null terminated
00720     string and all the elements up to the first null element are
00721     converted.
00722     
00723   sUTF8 - [out]
00724     If sUTF8 is not null and sUTF8_count > 0, then the UTF-8
00725     encoded string is returned in this buffer. If there is room
00726     for the null terminator, the converted string will be null
00727     terminated. The null terminator is never included in the count 
00728     of returned by this function. The converted string is in the 
00729     CPU's native byte order. No byte order mark is prepended.
00730 
00731   sUTF8_count - [in]
00732     If sUTF8_count > 0, then it specifies the number of available
00733     ON__UINT8 elements in the sUTF8[] buffer.
00734     
00735     If sUTF8_count == 0, then the sUTF8 parameter is ignored.
00736 
00737   error_status - [out]
00738     If error_status is not null, then bits of *error_status are
00739     set to indicate the success or failure of the conversion.  
00740     When the error_mask parameter is used to used to mask some
00741     conversion errors, multiple bits may be set.
00742        0: Successful conversion with no errors.
00743        1: Invalid input parameters. This error cannot be masked.
00744        2: The sUTF8 output buffer was not large enough to hold 
00745           the converted string. This error cannot be masked.
00746        4: The values of two UTF-32 elements form a valid
00747           UTF-16 surrogate pair. This error can be masked. If the
00748           error is masked, then the surrogate pair is converted
00749           to a valid unicode code point, its UTF-8 encoding is
00750           added to the UTF-8 output string and parsing continues.
00751       16: An invalid unicode code point occured in sUTF32[].
00752           This error can be masked. If the error is masked and
00753           error_code_point is a valid unicode code point,
00754           then its UTF-8 encoding is added to the UTF-8 output
00755           string and parsing continues.
00756 
00757   error_mask - [in]
00758     If 0 != (error_mask & 4), then type 4 errors are masked.
00759     If 0 != (error_mask & 16) and error_code_point is a valid unicode
00760     code point value, then type 16 errors are masked.
00761 
00762   error_code_point - [in]
00763     Unicode code point value to use in when masking type 16 errors.
00764     If 0 == (error_mask & 16), then this parameter is ignored.
00765     0xFFFD is a popular choice for the error_code_point value.
00766 
00767   sNextUTF32 - [out]
00768     If sNextUTF32 is not null, then *sNextUTF32 points to the first
00769     element in the input sUTF32[] buffer that was not converted. 
00770 
00771     If an error occurs and is not masked, then *sNextUTF32 points to
00772     the element of sUTF32[] where the conversion failed.  If no errors
00773     occur or all errors are masked, then *sNextUTF32 points to
00774     sUTF32 + sUTF32_count.
00775 
00776 Returns:
00777   If sUTF8_count > 0, the return value is the number of ON__UINT8
00778   elements written to sUTF8[].  When the return value < sUTF8_count,
00779   a null terminator is written to sUTF8[return value].
00780 
00781   If sUTF8_count == 0, the return value is the minimum number of
00782   ON__UINT8 elements that are needed to hold the converted string.
00783   The return value does not include room for a null terminator.  
00784   Increment the return value by one if you want to have an element
00785   to use for a null terminator.
00786 */
00787 ON_DECL
00788 int ON_ConvertUTF32ToUTF8(
00789     int bTestByteOrder,
00790     const ON__UINT32* sUTF32,
00791     int sUTF32_count,
00792     ON__UINT8* sUTF8,
00793     int sUTF8_count,
00794     unsigned int* error_status,
00795     unsigned int error_mask,
00796     ON__UINT32 error_code_point,
00797     const ON__UINT32** sNextUTF32
00798     );
00799 
00800 /*
00801 Description:
00802   Convert a unicode string from a UTF-32 encoded ON__UINT32 array
00803   into a UTF-16 encoded ON__UINT16 array.
00804 
00805 Parameters:
00806   bTestByteOrder - [in]
00807     If bTestByteOrder is true and the first element of sUTF32[]
00808     is 0x0000FEFF, then this element is ignored.
00809 
00810     If bTestByteOrder is true and the first element of sUTF32[]
00811     is 0xFFFE0000, then this element is ignored and the subsequent
00812     elements of sUTF32[] have their bytes swapped before the 
00813     conversion is calculated.
00814 
00815     In all other cases the first element of sUTF32[] is 
00816     converted and no byte swapping is performed.
00817 
00818   sUTF32 - [in]
00819     UTF-32 string to convert.  
00820     
00821     If bTestByteOrder is true and the first element of sUTF32[]
00822     is 0x0000FEFF, then this element is skipped and it is assumed 
00823     that sUTF32[] is in the CPU's native byte order.
00824     
00825     If bTestByteOrder is true and the first element of sUTF32[]
00826     is 0xFFFE0000, then this element is skipped and it is assumed 
00827     that sUTF32[] is not in the CPU's native byte order and bytes
00828     are swapped before characters are converted.
00829 
00830     If bTestByteOrder is false or the first character of sUTF32[]
00831     is neither 0x0000FEFF nor 0xFFFE0000, then the sUTF32 string 
00832     must match the CPU's byte order.
00833 
00834   sUTF32_count - [in]
00835     If sUTF32_count >= 0, then it specifies the number of
00836     ON__UINT32 elements in sUTF32[] to convert.
00837 
00838     If sUTF32_count == -1, then sUTF32 must be a null terminated
00839     string and all the elements up to the first null element are
00840     converted.
00841 
00842   sUTF16 - [out]
00843     If sUTF16 is not null and sUTF16_count > 0, then the UTF-16
00844     encoded string is returned in this buffer. If there is room
00845     for the null terminator, the converted string will be null
00846     terminated. The null terminator is never included in the count 
00847     of returned by this function. The converted string is in the 
00848     CPU's native byte order. No byte order mark is prepended.
00849 
00850   sUTF16_count - [in]
00851     If sUTF16_count > 0, then it specifies the number of available
00852     ON__UINT16 elements in the sUTF16[] buffer.
00853     
00854     If sUTF16_count == 0, then the sUTF16 parameter is ignored.
00855 
00856   error_status - [out]
00857     If error_status is not null, then bits of *error_status are
00858     set to indicate the success or failure of the conversion.  
00859     When the error_mask parameter is used to used to mask some
00860     conversion errors, multiple bits may be set.
00861        0: Successful conversion with no errors.
00862        1: Invalid input parameters. This error cannot be masked.
00863        2: The sUTF16 output buffer was not large enough to hold 
00864           the converted string. This error cannot be masked.
00865        4: The values of two UTF-32 elements form a valid
00866           UTF-16 surrogate pair. This error can be masked. If the
00867           error is masked, then the surrogate pair is added to
00868           the UTF-16 output string and parsing continues.
00869       16: An invalid unicode code point occured in sUTF32[].
00870           This error can be masked. If the error is masked and
00871           error_code_point is a valid unicode code point,
00872           then its UTF-16 encoding is added to the UTF-16 output
00873           string and parsing continues.
00874 
00875   error_mask - [in]
00876     If 0 != (error_mask & 4), then type 4 errors are masked.
00877     If 0 != (error_mask & 16) and error_code_point is a valid unicode
00878     code point value, then type 16 errors are masked.
00879 
00880   error_code_point - [in]
00881     Unicode code point value to use in when masking type 16 errors.
00882     If 0 == (error_mask & 16), then this parameter is ignored.
00883     0xFFFD is a popular choice for the error_code_point value.
00884 
00885   sNextUnicode - [out]
00886     If sNextUnicode is not null, then *sNextUnicode points to the first
00887     byte in the input sNextUnicode[] buffer that was not converted. 
00888 
00889     If an error occurs and is not masked, then this unsigned int
00890     will be an illegal unicode code point value.
00891 
00892     If an error does not occur, then (*sNextUnicode - sUnicode) 
00893     is the number of values converted.
00894 
00895 Returns:
00896   If sUTF16_count > 0, the return value is the number of ON__UINT16
00897   elements written to sUTF16[].  When the return value < sUTF16_count,
00898   a null terminator is written to sUTF16[return value].
00899 
00900   If sUTF16_count == 0, the return value is the minimum number of
00901   ON__UINT16 elements that are needed to hold the converted string.
00902   The return value does not include room for a null terminator.  
00903   Increment the return value by one if you want to have an element
00904   to use for a null terminator.
00905 */
00906 ON_DECL
00907 int ON_ConvertUTF32ToUTF16(
00908     int bTestByteOrder,
00909     const ON__UINT32* sUTF32,
00910     int sUTF32_count,
00911     ON__UINT16* sUTF16,
00912     int sUTF16_count,
00913     unsigned int* error_status,
00914     unsigned int error_mask,
00915     ON__UINT32 error_code_point,
00916     const ON__UINT32** sNextUTF32
00917     );
00918 
00919 /*
00920 Description:
00921   Convert a wchar_t string using the native platform's most common
00922   encoding into a unicode string encoded as a UTF-8 char array.
00923 
00924   If 2 = sizeof(wchar_t), then the wchar_t array is assumed to be
00925   a UTF-16 encoded string. This is the case with current versions
00926   of Microsoft Windows.
00927 
00928   If 4 = sizeof(wchar)t), then the wchar_t array is assumed to be
00929   a UTF-32 encoded string. This is the case with current versions
00930   of Apple OSX.
00931 
00932 Parameters:
00933   bTestByteOrder - [in]
00934     If bTestByteOrder is true and the first element of sWideChar[]
00935     is 0xFEFF, then this element is ignored.
00936 
00937     If bTestByteOrder is true and the first element of sWideChar[]
00938     is 0xFFFE, then this element is ignored and the subsequent
00939     elements of sWideChar[] have their bytes swapped before the 
00940     conversion is calculated.
00941 
00942     In all other cases the first element of sWideChar[] is 
00943     converted and no byte swapping is performed.
00944 
00945   sWideChar - [in]
00946     wchar_t string to convert.  
00947     
00948     If bTestByteOrder is true and the first element of sWideChar[]
00949     is 0xFEFF, then this element is skipped and it is assumed 
00950     that sWideChar[] is in the CPU's native byte order.
00951     
00952     If bTestByteOrder is true and the first element of sWideChar[]
00953     is 0xFFFE, then this element is skipped and it is assumed 
00954     that sWideChar[] is not in the CPU's native byte order and bytes
00955     are swapped before characters are converted.
00956 
00957     If bTestByteOrder is false or the first character of sWideChar[]
00958     is neither 0xFEFF nor 0xFFFE, then the sWideChar string must match
00959     the CPU's byte order.
00960 
00961   sWideChar_count - [in]
00962     If sWideChar_count >= 0, then it specifies the number of
00963     wchar_t elements in sWideChar[] to convert.
00964 
00965     If sWideChar_count == -1, then sWideChar must be a null terminated
00966     string and all the elements up to the first null element are
00967     converted.
00968     
00969   sUTF8 - [out]
00970     If sUTF8 is not null and sUTF8_count > 0, then the UTF-8
00971     encoded string is returned in this buffer. If there is room
00972     for the null terminator, the converted string will be null
00973     terminated. The null terminator is never included in the count 
00974     of returned by this function. The converted string is in the 
00975     CPU's native byte order. No byte order mark is prepended.
00976 
00977   sUTF8_count - [in]
00978     If sUTF8_count > 0, then it specifies the number of available
00979     ON__UINT8 elements in the sUTF8[] buffer.
00980     
00981     If sUTF8_count == 0, then the sUTF8 parameter is ignored.
00982 
00983   error_status - [out]
00984     If error_status is not null, then bits of *error_status are
00985     set to indicate the success or failure of the conversion.  
00986     When the error_mask parameter is used to used to mask some
00987     conversion errors, multiple bits may be set.
00988        0: Successful conversion with no errors.
00989        1: Invalid input parameters. This error cannot be masked.
00990        2: The sUTF8 output buffer was not large enough to hold 
00991           the converted string. This error cannot be masked.
00992       16: An illegal wchar_t encoding sequence occured or an invalid
00993           unicode code point value resulted from decoding a
00994           wchar_t sequence. This error can be masked. If the error is
00995           masked and error_code_point is a valid unicode code point,
00996           then its UTF-8 encoding is added to the UTF-8 output
00997           string and parsing continues.
00998 
00999   error_mask - [in]
01000     If 0 != (error_mask & 16) and error_code_point is a valid unicode
01001     code point value, then type 16 errors are masked.
01002 
01003   error_code_point - [in]
01004     Unicode code point value to use in when masking type 16 errors.
01005     If 0 == (error_mask & 16), then this parameter is ignored.
01006     0xFFFD is a popular choice for the error_code_point value.
01007 
01008   sNextWideChar - [out]
01009     If sNextWideChar is not null, then *sNextWideChar points to the first
01010     element in the input sWideChar[] buffer that was not converted. 
01011 
01012     If an error occurs and is not masked, then *sNextWideChar points to
01013     the element of sWideChar[] where the conversion failed.  If no errors
01014     occur or all errors are masked, then *sNextWideChar points to
01015     sWideChar + sWideChar_count.
01016 
01017   If sUTF8_count > 0, the return value is the number of ON__UINT8
01018   elements written to sUTF8[].  When the return value < sUTF8_count,
01019   a null terminator is written to sUTF8[return value].
01020 
01021   If sUTF8_count == 0, the return value is the minimum number of
01022   ON__UINT8 elements that are needed to hold the converted string.
01023   The return value does not include room for a null terminator.  
01024   Increment the return value by one if you want to have an element
01025   to use for a null terminator.
01026 */
01027 ON_DECL
01028 int ON_ConvertWideCharToUTF8(
01029     int bTestByteOrder,
01030     const wchar_t* sWideChar,
01031     int sWideChar_count,
01032     char* sUTF8,
01033     int sUTF8_count,
01034     unsigned int* error_status,
01035     unsigned int error_mask,
01036     ON__UINT32 error_code_point,
01037     const wchar_t** sNextWideChar
01038     );
01039 
01040 /*
01041 Description:
01042   Convert a UTF-8 encoded char string to wchar_t string using
01043   the native platform's most common encoding.
01044 
01045   If 2 = sizeof(wchar_t), then UTF-16 encoding is used for the
01046   output string. This is the case with current versions of
01047   Microsoft Windows.
01048 
01049   If 4 = sizeof(wchar_t), then UTF-32 encoding is used for the
01050   output string. This is the case with current versions of
01051   Apple OSX.
01052 
01053 Parameters:
01054   sUTF8 - [in]
01055     UTF-8 string to convert.
01056 
01057   sUTF8_count - [in]
01058     If sUTF8_count >= 0, then it specifies the number of
01059     ON__UINT8 elements in sUTF8[] to convert.
01060 
01061     If sUTF8_count == -1, then sUTF8 must be a null terminated
01062     string and all the elements up to the first null element are
01063     converted.
01064 
01065   sWideChar - [out]
01066     If sWideChar is not null and sWideChar_count > 0, then the
01067     output string is returned in this buffer. If there is room
01068     for the null terminator, the converted string will be null
01069     terminated. The null terminator is never included in the count 
01070     of returned by this function. The converted string is in the 
01071     CPU's native byte order. No byte order mark is prepended.
01072 
01073   sWideChar_count - [in]
01074     If sWideChar_count > 0, then it specifies the number of available
01075     wchar_t elements in the sWideChar[] buffer.
01076     
01077     If sWideChar_count == 0, then the sWideChar parameter is ignored.
01078 
01079   error_status - [out]
01080     If error_status is not null, then bits of *error_status are
01081     set to indicate the success or failure of the conversion.  
01082     When the error_mask parameter is used to used to mask some
01083     conversion errors, multiple bits may be set.
01084        0: Successful conversion with no errors.
01085        1: Invalid input parameters. This error cannot be masked.
01086        2: The sWideChar output buffer was not large enough to hold 
01087           the converted string. This error cannot be masked.
01088        4: The values of two UTF-8 encoding sequences formed a valid
01089           UTF-16 surrogate pair. This error can be masked.  If the
01090           error is masked, then the surrogate pair is added
01091           to the UTF-16 output string and parsing continues.
01092        8: An overlong UTF-8 encoding sequence was encountered. 
01093           The value of the overlong sequence was a valid unicode
01094           code point. This error can be masked. If the error is masked,
01095           then the unicode code point is encoded and added to the
01096           UTF-16 output string and parsing continues.
01097       16: An illegal UTF-8 encoding sequence occured or an invalid
01098           unicode code point value resulted from decoding a
01099           UTF-8 sequence. This error can be masked. If the error is
01100           masked and error_code_point is a valid unicode code point,
01101           then its encoding is added to the output string and parsing
01102           continues.
01103 
01104   error_mask - [in]
01105     If 0 != (error_mask & 4), then type 4 errors are masked.
01106     If 0 != (error_mask & 8), then type 8 errors are masked.
01107     If 0 != (error_mask & 16) and error_code_point is a valid unicode
01108     code point value, then type 16 errors are masked.
01109 
01110   error_code_point - [in]
01111     Unicode code point value to use in when masking type 16 errors.
01112     If 0 == (error_mask & 16), then this parameter is ignored.
01113     0xFFFD is a popular choice for the error_code_point value.
01114 
01115   sNextUTF8 - [out]
01116     If sNextUTF8 is not null, then *sNextUTF8 points to the first
01117     element in the input sUTF8[] buffer that was not converted. 
01118 
01119     If an error occurs and is not masked, then *sNextUTF8 points to
01120     the element of sUTF8[] where the conversion failed.  If no errors
01121     occur or all errors are masked, then *sNextUTF8 points to
01122     sUTF8 + sUTF8_count.
01123 
01124 Returns:
01125   If sWideChar_count > 0, the return value is the number of wchar_t
01126   elements written to sWideChar[].  When the return value < sWideChar_count,
01127   a null terminator is written to sWideChar[return value].
01128 
01129   If sWideChar_count == 0, the return value is the minimum number of
01130   wchar_t elements that are needed to hold the converted string.
01131   The return value does not include room for a null terminator.  
01132   Increment the return value by one if you want to have an element
01133   to use for a null terminator.
01134 */
01135 ON_DECL
01136 int ON_ConvertUTF8ToWideChar(
01137     const char* sUTF8,
01138     int sUTF8_count,
01139     wchar_t* sWideChar,
01140     int sWideChar_count,
01141     unsigned int* error_status,
01142     unsigned int error_mask,
01143     ON__UINT32 error_code_point,
01144     const char** sNextUTF8
01145     );
01146 
01147 ON_END_EXTERNC
01148 
01149 #endif