OVR_String.h
Go to the documentation of this file.
00001 /************************************************************************************
00002 
00003 PublicHeader:   OVR.h
00004 Filename    :   OVR_String.h
00005 Content     :   String UTF8 string implementation with copy-on-write semantics
00006                 (thread-safe for assignment but not modification).
00007 Created     :   September 19, 2012
00008 Notes       : 
00009 
00010 Copyright   :   Copyright 2012 Oculus VR, Inc. All Rights reserved.
00011 
00012 Use of this software is subject to the terms of the Oculus license
00013 agreement provided at the time of installation or download, or which
00014 otherwise accompanies this software in either electronic or hard copy form.
00015 
00016 ************************************************************************************/
00017 
00018 #ifndef OVR_String_h
00019 #define OVR_String_h
00020 
00021 #include "OVR_Types.h"
00022 #include "OVR_Allocator.h"
00023 #include "OVR_UTF8Util.h"
00024 #include "OVR_Atomic.h"
00025 #include "OVR_Std.h"
00026 #include "OVR_Alg.h"
00027 
00028 namespace OVR {
00029 
00030 // ***** Classes
00031 
00032 class String;
00033 class StringBuffer;
00034 
00035 
00036 //-----------------------------------------------------------------------------------
00037 // ***** String Class 
00038 
00039 // String is UTF8 based string class with copy-on-write implementation
00040 // for assignment.
00041 
00042 class String
00043 {
00044 protected:
00045 
00046     enum FlagConstants
00047     {
00048         //Flag_GetLength      = 0x7FFFFFFF,
00049         // This flag is set if GetLength() == GetSize() for a string.
00050         // Avoid extra scanning is Substring and indexing logic.
00051         Flag_LengthIsSizeShift   = (sizeof(UPInt)*8 - 1)
00052     };
00053 
00054 
00055     // Internal structure to hold string data
00056     struct DataDesc
00057     {
00058         // Number of bytes. Will be the same as the number of chars if the characters
00059         // are ascii, may not be equal to number of chars in case string data is UTF8.
00060         UPInt   Size;       
00061         volatile SInt32 RefCount;
00062         char    Data[1];
00063 
00064         void    AddRef()
00065         {
00066             AtomicOps<SInt32>::ExchangeAdd_NoSync(&RefCount, 1);
00067         }
00068         // Decrement ref count. This needs to be thread-safe, since
00069         // a different thread could have also decremented the ref count.
00070         // For example, if u start off with a ref count = 2. Now if u
00071         // decrement the ref count and check against 0 in different
00072         // statements, a different thread can also decrement the ref count
00073         // in between our decrement and checking against 0 and will find
00074         // the ref count = 0 and delete the object. This will lead to a crash
00075         // when context switches to our thread and we'll be trying to delete
00076         // an already deleted object. Hence decrementing the ref count and
00077         // checking against 0 needs to made an atomic operation.
00078         void    Release()
00079         {
00080             if ((AtomicOps<SInt32>::ExchangeAdd_NoSync(&RefCount, -1) - 1) == 0)
00081                 OVR_FREE(this);
00082         }
00083 
00084         static UPInt GetLengthFlagBit()     { return UPInt(1) << Flag_LengthIsSizeShift; }
00085         UPInt       GetSize() const         { return Size & ~GetLengthFlagBit() ; }
00086         UPInt       GetLengthFlag()  const  { return Size & GetLengthFlagBit(); }
00087         bool        LengthIsSize() const    { return GetLengthFlag() != 0; }
00088     };
00089 
00090     // Heap type of the string is encoded in the lower bits.
00091     enum HeapType
00092     {
00093         HT_Global   = 0,    // Heap is global.
00094         HT_Local    = 1,    // SF::String_loc: Heap is determined based on string's address.
00095         HT_Dynamic  = 2,    // SF::String_temp: Heap is stored as a part of the class.
00096         HT_Mask     = 3
00097     };
00098 
00099     union {
00100         DataDesc* pData;
00101         UPInt     HeapTypeBits;
00102     };
00103     typedef union {
00104         DataDesc* pData;
00105         UPInt     HeapTypeBits;
00106     } DataDescUnion;
00107 
00108     inline HeapType    GetHeapType() const { return (HeapType) (HeapTypeBits & HT_Mask); }
00109 
00110     inline DataDesc*   GetData() const
00111     {
00112         DataDescUnion u;
00113         u.pData    = pData;
00114         u.HeapTypeBits = (u.HeapTypeBits & ~(UPInt)HT_Mask);
00115         return u.pData;
00116     }
00117     
00118     inline void        SetData(DataDesc* pdesc)
00119     {
00120         HeapType ht = GetHeapType();
00121         pData = pdesc;
00122         OVR_ASSERT((HeapTypeBits & HT_Mask) == 0);
00123         HeapTypeBits |= ht;        
00124     }
00125 
00126     
00127     DataDesc*   AllocData(UPInt size, UPInt lengthIsSize);
00128     DataDesc*   AllocDataCopy1(UPInt size, UPInt lengthIsSize,
00129                                const char* pdata, UPInt copySize);
00130     DataDesc*   AllocDataCopy2(UPInt size, UPInt lengthIsSize,
00131                                const char* pdata1, UPInt copySize1,
00132                                const char* pdata2, UPInt copySize2);
00133 
00134     // Special constructor to avoid data initalization when used in derived class.
00135     struct NoConstructor { };
00136     String(const NoConstructor&) { }
00137 
00138 public:
00139 
00140     // For initializing string with dynamic buffer
00141     struct InitStruct
00142     {
00143         virtual ~InitStruct() { }
00144         virtual void InitString(char* pbuffer, UPInt size) const = 0;
00145     };
00146 
00147 
00148     // Constructors / Destructors.
00149     String();
00150     String(const char* data);
00151     String(const char* data1, const char* pdata2, const char* pdata3 = 0);
00152     String(const char* data, UPInt buflen);
00153     String(const String& src);
00154     String(const StringBuffer& src);
00155     String(const InitStruct& src, UPInt size);
00156     explicit String(const wchar_t* data);      
00157 
00158     // Destructor (Captain Obvious guarantees!)
00159     ~String()
00160     {
00161         GetData()->Release();
00162     }
00163 
00164     // Declaration of NullString
00165     static DataDesc NullData;
00166 
00167 
00168     // *** General Functions
00169 
00170     void        Clear();
00171 
00172     // For casting to a pointer to char.
00173     operator const char*() const        { return GetData()->Data; }
00174     // Pointer to raw buffer.
00175     const char* ToCStr() const          { return GetData()->Data; }
00176 
00177     // Returns number of bytes
00178     UPInt       GetSize() const         { return GetData()->GetSize() ; }
00179     // Tells whether or not the string is empty
00180     bool        IsEmpty() const         { return GetSize() == 0; }
00181 
00182     // Returns  number of characters
00183     UPInt       GetLength() const;
00184 
00185     // Returns  character at the specified index
00186     UInt32      GetCharAt(UPInt index) const;
00187     UInt32      GetFirstCharAt(UPInt index, const char** offset) const;
00188     UInt32      GetNextChar(const char** offset) const;
00189 
00190     // Appends a character
00191     void        AppendChar(UInt32 ch);
00192 
00193     // Append a string
00194     void        AppendString(const wchar_t* pstr, SPInt len = -1);
00195     void        AppendString(const char* putf8str, SPInt utf8StrSz = -1);
00196 
00197     // Assigned a string with dynamic data (copied through initializer).
00198     void        AssignString(const InitStruct& src, UPInt size);
00199     // Assigns string with known size.
00200     void        AssignString(const char* putf8str, UPInt size);
00201 
00202     //  Resize the string to the new size
00203 //  void        Resize(UPInt _size);
00204 
00205     // Removes the character at posAt
00206     void        Remove(UPInt posAt, SPInt len = 1);
00207 
00208     // Returns a String that's a substring of this.
00209     //  -start is the index of the first UTF8 character you want to include.
00210     //  -end is the index one past the last UTF8 character you want to include.
00211     String   Substring(UPInt start, UPInt end) const;
00212 
00213     // Case-conversion
00214     String   ToUpper() const;
00215     String   ToLower() const;
00216 
00217     // Inserts substr at posAt
00218     String&    Insert (const char* substr, UPInt posAt, SPInt len = -1);
00219 
00220     // Inserts character at posAt
00221     UPInt       InsertCharAt(UInt32 c, UPInt posAt);
00222 
00223     // Inserts substr at posAt, which is an index of a character (not byte).
00224     // Of size is specified, it is in bytes.
00225 //  String&    Insert(const UInt32* substr, UPInt posAt, SPInt size = -1);
00226 
00227     // Get Byte index of the character at position = index
00228     UPInt       GetByteIndex(UPInt index) const { return (UPInt)UTF8Util::GetByteIndex(index, GetData()->Data); }
00229 
00230     // Utility: case-insensitive string compare.  stricmp() & strnicmp() are not
00231     // ANSI or POSIX, do not seem to appear in Linux.
00232     static int OVR_STDCALL   CompareNoCase(const char* a, const char* b);
00233     static int OVR_STDCALL   CompareNoCase(const char* a, const char* b, SPInt len);
00234 
00235     // Hash function, case-insensitive
00236     static UPInt OVR_STDCALL BernsteinHashFunctionCIS(const void* pdataIn, UPInt size, UPInt seed = 5381);
00237 
00238     // Hash function, case-sensitive
00239     static UPInt OVR_STDCALL BernsteinHashFunction(const void* pdataIn, UPInt size, UPInt seed = 5381);
00240 
00241 
00242     // ***** File path parsing helper functions.
00243     // Implemented in OVR_String_FilePath.cpp.
00244 
00245     // Absolute paths can star with:
00246     //  - protocols:        'file://', 'http://'
00247     //  - windows drive:    'c:\'
00248     //  - UNC share name:   '\\share'
00249     //  - unix root         '/'
00250     static bool HasAbsolutePath(const char* path);
00251     static bool HasExtension(const char* path);
00252     static bool HasProtocol(const char* path);
00253 
00254     bool    HasAbsolutePath() const { return HasAbsolutePath(ToCStr()); }
00255     bool    HasExtension() const    { return HasExtension(ToCStr()); }
00256     bool    HasProtocol() const     { return HasProtocol(ToCStr()); }
00257 
00258     String  GetProtocol() const;    // Returns protocol, if any, with trailing '://'.
00259     String  GetPath() const;        // Returns path with trailing '/'.
00260     String  GetFilename() const;    // Returns filename, including extension.
00261     String  GetExtension() const;   // Returns extension with a dot.
00262 
00263     void    StripProtocol();        // Strips front protocol, if any, from the string.
00264     void    StripExtension();       // Strips off trailing extension.
00265     
00266 
00267     // Operators
00268     // Assignment
00269     void        operator =  (const char* str);
00270     void        operator =  (const wchar_t* str);
00271     void        operator =  (const String& src);
00272     void        operator =  (const StringBuffer& src);
00273 
00274     // Addition
00275     void        operator += (const String& src);
00276     void        operator += (const char* psrc)       { AppendString(psrc); }
00277     void        operator += (const wchar_t* psrc)    { AppendString(psrc); }
00278     void        operator += (char  ch)               { AppendChar(ch); }
00279     String      operator +  (const char* str) const;
00280     String      operator +  (const String& src)  const;
00281 
00282     // Comparison
00283     bool        operator == (const String& str) const
00284     {
00285         return (OVR_strcmp(GetData()->Data, str.GetData()->Data)== 0);
00286     }
00287 
00288     bool        operator != (const String& str) const
00289     {
00290         return !operator == (str);
00291     }
00292 
00293     bool        operator == (const char* str) const
00294     {
00295         return OVR_strcmp(GetData()->Data, str) == 0;
00296     }
00297 
00298     bool        operator != (const char* str) const
00299     {
00300         return !operator == (str);
00301     }
00302 
00303     bool        operator <  (const char* pstr) const
00304     {
00305         return OVR_strcmp(GetData()->Data, pstr) < 0;
00306     }
00307 
00308     bool        operator <  (const String& str) const
00309     {
00310         return *this < str.GetData()->Data;
00311     }
00312 
00313     bool        operator >  (const char* pstr) const
00314     {
00315         return OVR_strcmp(GetData()->Data, pstr) > 0;
00316     }
00317 
00318     bool        operator >  (const String& str) const
00319     {
00320         return *this > str.GetData()->Data;
00321     }
00322 
00323     int CompareNoCase(const char* pstr) const
00324     {
00325         return CompareNoCase(GetData()->Data, pstr);
00326     }
00327     int CompareNoCase(const String& str) const
00328     {
00329         return CompareNoCase(GetData()->Data, str.ToCStr());
00330     }
00331 
00332     // Accesses raw bytes
00333     const char&     operator [] (int index) const
00334     {
00335         OVR_ASSERT(index >= 0 && (UPInt)index < GetSize());
00336         return GetData()->Data[index];
00337     }
00338     const char&     operator [] (UPInt index) const
00339     {
00340         OVR_ASSERT(index < GetSize());
00341         return GetData()->Data[index];
00342     }
00343 
00344 
00345     // Case insensitive keys are used to look up insensitive string in hash tables
00346     // for SWF files with version before SWF 7.
00347     struct NoCaseKey
00348     {   
00349         const String* pStr;
00350         NoCaseKey(const String &str) : pStr(&str){};
00351     };
00352 
00353     bool    operator == (const NoCaseKey& strKey) const
00354     {
00355         return (CompareNoCase(ToCStr(), strKey.pStr->ToCStr()) == 0);
00356     }
00357     bool    operator != (const NoCaseKey& strKey) const
00358     {
00359         return !(CompareNoCase(ToCStr(), strKey.pStr->ToCStr()) == 0);
00360     }
00361 
00362     // Hash functor used for strings.
00363     struct HashFunctor
00364     {    
00365         UPInt  operator()(const String& data) const
00366         {
00367             UPInt  size = data.GetSize();
00368             return String::BernsteinHashFunction((const char*)data, size);
00369         }        
00370     };
00371     // Case-insensitive hash functor used for strings. Supports additional
00372     // lookup based on NoCaseKey.
00373     struct NoCaseHashFunctor
00374     {    
00375         UPInt  operator()(const String& data) const
00376         {
00377             UPInt  size = data.GetSize();
00378             return String::BernsteinHashFunctionCIS((const char*)data, size);
00379         }
00380         UPInt  operator()(const NoCaseKey& data) const
00381         {       
00382             UPInt  size = data.pStr->GetSize();
00383             return String::BernsteinHashFunctionCIS((const char*)data.pStr->ToCStr(), size);
00384         }
00385     };
00386 
00387 };
00388 
00389 
00390 //-----------------------------------------------------------------------------------
00391 // ***** String Buffer used for Building Strings
00392 
00393 class StringBuffer
00394 {
00395     char*           pData;
00396     UPInt           Size;
00397     UPInt           BufferSize;
00398     UPInt           GrowSize;    
00399     mutable bool    LengthIsSize;    
00400 
00401 public:
00402 
00403     // Constructors / Destructor.    
00404     StringBuffer();
00405     explicit StringBuffer(UPInt growSize);
00406     StringBuffer(const char* data);
00407     StringBuffer(const char* data, UPInt buflen);
00408     StringBuffer(const String& src);
00409     StringBuffer(const StringBuffer& src);
00410     explicit StringBuffer(const wchar_t* data);
00411     ~StringBuffer();
00412     
00413 
00414     // Modify grow size used for growing/shrinking the buffer.
00415     UPInt       GetGrowSize() const         { return GrowSize; }
00416     void        SetGrowSize(UPInt growSize);
00417     
00418 
00419     // *** General Functions
00420     // Does not release memory, just sets Size to 0
00421     void        Clear();
00422 
00423     // For casting to a pointer to char.
00424     operator const char*() const        { return (pData) ? pData : ""; }
00425     // Pointer to raw buffer.
00426     const char* ToCStr() const          { return (pData) ? pData : ""; }
00427 
00428     // Returns number of bytes.
00429     UPInt       GetSize() const         { return Size ; }
00430     // Tells whether or not the string is empty.
00431     bool        IsEmpty() const         { return GetSize() == 0; }
00432 
00433     // Returns  number of characters
00434     UPInt       GetLength() const;
00435 
00436     // Returns  character at the specified index
00437     UInt32      GetCharAt(UPInt index) const;
00438     UInt32      GetFirstCharAt(UPInt index, const char** offset) const;
00439     UInt32      GetNextChar(const char** offset) const;
00440 
00441 
00442     //  Resize the string to the new size
00443     void        Resize(UPInt _size);
00444     void        Reserve(UPInt _size);
00445 
00446     // Appends a character
00447     void        AppendChar(UInt32 ch);
00448 
00449     // Append a string
00450     void        AppendString(const wchar_t* pstr, SPInt len = -1);
00451     void        AppendString(const char* putf8str, SPInt utf8StrSz = -1);
00452     void        AppendFormat(const char* format, ...);
00453 
00454     // Assigned a string with dynamic data (copied through initializer).
00455     //void        AssignString(const InitStruct& src, UPInt size);
00456 
00457     // Inserts substr at posAt
00458     void        Insert (const char* substr, UPInt posAt, SPInt len = -1);
00459     // Inserts character at posAt
00460     UPInt       InsertCharAt(UInt32 c, UPInt posAt);
00461 
00462     // Assignment
00463     void        operator =  (const char* str);
00464     void        operator =  (const wchar_t* str);
00465     void        operator =  (const String& src);
00466 
00467     // Addition
00468     void        operator += (const String& src)      { AppendString(src.ToCStr(),src.GetSize()); }
00469     void        operator += (const char* psrc)       { AppendString(psrc); }
00470     void        operator += (const wchar_t* psrc)    { AppendString(psrc); }
00471     void        operator += (char  ch)               { AppendChar(ch); }
00472     //String   operator +  (const char* str) const ;
00473     //String   operator +  (const String& src)  const ;
00474 
00475     // Accesses raw bytes
00476     char&       operator [] (int index)
00477     {
00478         OVR_ASSERT(((UPInt)index) < GetSize());
00479         return pData[index];
00480     }
00481     char&       operator [] (UPInt index)
00482     {
00483         OVR_ASSERT(index < GetSize());
00484         return pData[index];
00485     }
00486 
00487     const char&     operator [] (int index) const 
00488     {
00489         OVR_ASSERT(((UPInt)index) < GetSize());
00490         return pData[index];
00491     }
00492     const char&     operator [] (UPInt index) const
00493     {
00494         OVR_ASSERT(index < GetSize());
00495         return pData[index];
00496     }
00497 };
00498 
00499 
00500 //
00501 // Wrapper for string data. The data must have a guaranteed 
00502 // lifespan throughout the usage of the wrapper. Not intended for 
00503 // cached usage. Not thread safe.
00504 //
00505 class StringDataPtr
00506 {
00507 public:
00508     StringDataPtr() : pStr(NULL), Size(0) {}
00509     StringDataPtr(const StringDataPtr& p)
00510         : pStr(p.pStr), Size(p.Size) {}
00511     StringDataPtr(const char* pstr, UPInt sz)
00512         : pStr(pstr), Size(sz) {}
00513     StringDataPtr(const char* pstr)
00514         : pStr(pstr), Size((pstr != NULL) ? OVR_strlen(pstr) : 0) {}
00515     explicit StringDataPtr(const String& str)
00516         : pStr(str.ToCStr()), Size(str.GetSize()) {}
00517     template <typename T, int N> 
00518     StringDataPtr(const T (&v)[N])
00519         : pStr(v), Size(N) {}
00520 
00521 public:
00522     const char* ToCStr() const { return pStr; }
00523     UPInt       GetSize() const { return Size; }
00524     bool        IsEmpty() const { return GetSize() == 0; }
00525 
00526     // value is a prefix of this string
00527     // Character's values are not compared.
00528     bool        IsPrefix(const StringDataPtr& value) const
00529     {
00530         return ToCStr() == value.ToCStr() && GetSize() >= value.GetSize();
00531     }
00532     // value is a suffix of this string
00533     // Character's values are not compared.
00534     bool        IsSuffix(const StringDataPtr& value) const
00535     {
00536         return ToCStr() <= value.ToCStr() && (End()) == (value.End());
00537     }
00538 
00539     // Find first character.
00540     // init_ind - initial index.
00541     SPInt       FindChar(char c, UPInt init_ind = 0) const 
00542     {
00543         for (UPInt i = init_ind; i < GetSize(); ++i)
00544             if (pStr[i] == c)
00545                 return static_cast<SPInt>(i);
00546 
00547         return -1; 
00548     }
00549 
00550     // Find last character.
00551     // init_ind - initial index.
00552     SPInt       FindLastChar(char c, UPInt init_ind = ~0) const 
00553     {
00554         if (init_ind == (UPInt)~0 || init_ind > GetSize())
00555             init_ind = GetSize();
00556         else
00557             ++init_ind;
00558 
00559         for (UPInt i = init_ind; i > 0; --i)
00560             if (pStr[i - 1] == c)
00561                 return static_cast<SPInt>(i - 1);
00562 
00563         return -1; 
00564     }
00565 
00566     // Create new object and trim size bytes from the left.
00567     StringDataPtr  GetTrimLeft(UPInt size) const
00568     {
00569         // Limit trim size to the size of the string.
00570         size = Alg::PMin(GetSize(), size);
00571 
00572         return StringDataPtr(ToCStr() + size, GetSize() - size);
00573     }
00574     // Create new object and trim size bytes from the right.
00575     StringDataPtr  GetTrimRight(UPInt size) const
00576     {
00577         // Limit trim to the size of the string.
00578         size = Alg::PMin(GetSize(), size);
00579 
00580         return StringDataPtr(ToCStr(), GetSize() - size);
00581     }
00582 
00583     // Create new object, which contains next token.
00584     // Useful for parsing.
00585     StringDataPtr GetNextToken(char separator = ':') const
00586     {
00587         UPInt cur_pos = 0;
00588         const char* cur_str = ToCStr();
00589 
00590         for (; cur_pos < GetSize() && cur_str[cur_pos]; ++cur_pos)
00591         {
00592             if (cur_str[cur_pos] == separator)
00593             {
00594                 break;
00595             }
00596         }
00597 
00598         return StringDataPtr(ToCStr(), cur_pos);
00599     }
00600 
00601     // Trim size bytes from the left.
00602     StringDataPtr& TrimLeft(UPInt size)
00603     {
00604         // Limit trim size to the size of the string.
00605         size = Alg::PMin(GetSize(), size);
00606         pStr += size;
00607         Size -= size;
00608 
00609         return *this;
00610     }
00611     // Trim size bytes from the right.
00612     StringDataPtr& TrimRight(UPInt size)
00613     {
00614         // Limit trim to the size of the string.
00615         size = Alg::PMin(GetSize(), size);
00616         Size -= size;
00617 
00618         return *this;
00619     }
00620 
00621     const char* Begin() const { return ToCStr(); }
00622     const char* End() const { return ToCStr() + GetSize(); }
00623 
00624     // Hash functor used string data pointers
00625     struct HashFunctor
00626     {    
00627         UPInt operator()(const StringDataPtr& data) const
00628         {
00629             return String::BernsteinHashFunction(data.ToCStr(), data.GetSize());
00630         }        
00631     };
00632 
00633     bool operator== (const StringDataPtr& data) const 
00634     {
00635         return (OVR_strncmp(pStr, data.pStr, data.Size) == 0);
00636     }
00637 
00638 protected:
00639     const char* pStr;
00640     UPInt       Size;
00641 };
00642 
00643 } // OVR
00644 
00645 #endif


oculus_sdk
Author(s):
autogenerated on Mon Oct 6 2014 03:01:19