00001 /************************************************************************************ 00002 00003 PublicHeader: OVR.h 00004 Filename : OVR_UTF8Util.h 00005 Content : UTF8 Unicode character encoding/decoding support 00006 Created : September 19, 2012 00007 Notes : 00008 00009 Copyright : Copyright 2012 Oculus VR, Inc. All Rights reserved. 00010 00011 Use of this software is subject to the terms of the Oculus license 00012 agreement provided at the time of installation or download, or which 00013 otherwise accompanies this software in either electronic or hard copy form. 00014 00015 ************************************************************************************/ 00016 00017 #ifndef OVR_UTF8Util_h 00018 #define OVR_UTF8Util_h 00019 00020 #include "OVR_Types.h" 00021 00022 namespace OVR { namespace UTF8Util { 00023 00024 //----------------------------------------------------------------------------------- 00025 00026 // *** UTF8 string length and indexing. 00027 00028 // Determines the length of UTF8 string in characters. 00029 // If source length is specified (in bytes), null 0 character is counted properly. 00030 SPInt OVR_STDCALL GetLength(const char* putf8str, SPInt length = -1); 00031 00032 // Gets a decoded UTF8 character at index; you can access up to the index returned 00033 // by GetLength. 0 will be returned for out of bounds access. 00034 UInt32 OVR_STDCALL GetCharAt(SPInt index, const char* putf8str, SPInt length = -1); 00035 00036 // Converts UTF8 character index into byte offset. 00037 // -1 is returned if index was out of bounds. 00038 SPInt OVR_STDCALL GetByteIndex(SPInt index, const char* putf8str, SPInt length = -1); 00039 00040 00041 // *** 16-bit Unicode string Encoding/Decoding routines. 00042 00043 // Determines the number of bytes necessary to encode a string. 00044 // Does not count the terminating 0 (null) character. 00045 SPInt OVR_STDCALL GetEncodeStringSize(const wchar_t* pchar, SPInt length = -1); 00046 00047 // Encodes a unicode (UCS-2 only) string into a buffer. The size of buffer must be at 00048 // least GetEncodeStringSize() + 1. 00049 void OVR_STDCALL EncodeString(char *pbuff, const wchar_t* pchar, SPInt length = -1); 00050 00051 // Decode UTF8 into a wchar_t buffer. Must have GetLength()+1 characters available. 00052 // Characters over 0xFFFF are replaced with 0xFFFD. 00053 // Returns the length of resulting string (number of characters) 00054 UPInt OVR_STDCALL DecodeString(wchar_t *pbuff, const char* putf8str, SPInt bytesLen = -1); 00055 00056 00057 // *** Individual character Encoding/Decoding. 00058 00059 // Determined the number of bytes necessary to encode a UCS character. 00060 int OVR_STDCALL GetEncodeCharSize(UInt32 ucsCharacter); 00061 00062 // Encodes the given UCS character into the given UTF-8 buffer. 00063 // Writes the data starting at buffer[offset], and 00064 // increments offset by the number of bytes written. 00065 // May write up to 6 bytes, so make sure there's room in the buffer 00066 void OVR_STDCALL EncodeChar(char* pbuffer, SPInt* poffset, UInt32 ucsCharacter); 00067 00068 // Return the next Unicode character in the UTF-8 encoded buffer. 00069 // Invalid UTF-8 sequences produce a U+FFFD character as output. 00070 // Advances *utf8_buffer past the character returned. Pointer advance 00071 // occurs even if the terminating 0 character is hit, since that allows 00072 // strings with middle '\0' characters to be supported. 00073 UInt32 OVR_STDCALL DecodeNextChar_Advance0(const char** putf8Buffer); 00074 00075 // Safer version of DecodeNextChar, which doesn't advance pointer if 00076 // null character is hit. 00077 inline UInt32 DecodeNextChar(const char** putf8Buffer) 00078 { 00079 UInt32 ch = DecodeNextChar_Advance0(putf8Buffer); 00080 if (ch == 0) 00081 (*putf8Buffer)--; 00082 return ch; 00083 } 00084 00085 00086 }} // OVR::UTF8Util 00087 00088 #endif