Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
00029 #define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
00030
00031 #include "core.h"
00032
00033 namespace utf8
00034 {
00035 namespace unchecked
00036 {
00037 template <typename octet_iterator>
00038 octet_iterator append(uint32_t cp, octet_iterator result)
00039 {
00040 if (cp < 0x80)
00041 *(result++) = static_cast<uint8_t>(cp);
00042 else if (cp < 0x800) {
00043 *(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
00044 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
00045 }
00046 else if (cp < 0x10000) {
00047 *(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
00048 *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
00049 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
00050 }
00051 else {
00052 *(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
00053 *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
00054 *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
00055 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
00056 }
00057 return result;
00058 }
00059
00060 template <typename octet_iterator>
00061 uint32_t next(octet_iterator& it)
00062 {
00063 uint32_t cp = utf8::internal::mask8(*it);
00064 typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
00065 switch (length) {
00066 case 1:
00067 break;
00068 case 2:
00069 it++;
00070 cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
00071 break;
00072 case 3:
00073 ++it;
00074 cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
00075 ++it;
00076 cp += (*it) & 0x3f;
00077 break;
00078 case 4:
00079 ++it;
00080 cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
00081 ++it;
00082 cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
00083 ++it;
00084 cp += (*it) & 0x3f;
00085 break;
00086 }
00087 ++it;
00088 return cp;
00089 }
00090
00091 template <typename octet_iterator>
00092 uint32_t peek_next(octet_iterator it)
00093 {
00094 return utf8::unchecked::next(it);
00095 }
00096
00097 template <typename octet_iterator>
00098 uint32_t prior(octet_iterator& it)
00099 {
00100 while (utf8::internal::is_trail(*(--it))) ;
00101 octet_iterator temp = it;
00102 return utf8::unchecked::next(temp);
00103 }
00104
00105
00106 template <typename octet_iterator>
00107 inline uint32_t previous(octet_iterator& it)
00108 {
00109 return utf8::unchecked::prior(it);
00110 }
00111
00112 template <typename octet_iterator, typename distance_type>
00113 void advance (octet_iterator& it, distance_type n)
00114 {
00115 for (distance_type i = 0; i < n; ++i)
00116 utf8::unchecked::next(it);
00117 }
00118
00119 template <typename octet_iterator>
00120 typename std::iterator_traits<octet_iterator>::difference_type
00121 distance (octet_iterator first, octet_iterator last)
00122 {
00123 typename std::iterator_traits<octet_iterator>::difference_type dist;
00124 for (dist = 0; first < last; ++dist)
00125 utf8::unchecked::next(first);
00126 return dist;
00127 }
00128
00129 template <typename u16bit_iterator, typename octet_iterator>
00130 octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
00131 {
00132 while (start != end) {
00133 uint32_t cp = utf8::internal::mask16(*start++);
00134
00135 if (utf8::internal::is_lead_surrogate(cp)) {
00136 uint32_t trail_surrogate = utf8::internal::mask16(*start++);
00137 cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
00138 }
00139 result = utf8::unchecked::append(cp, result);
00140 }
00141 return result;
00142 }
00143
00144 template <typename u16bit_iterator, typename octet_iterator>
00145 u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
00146 {
00147 while (start < end) {
00148 uint32_t cp = utf8::unchecked::next(start);
00149 if (cp > 0xffff) {
00150 *result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
00151 *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
00152 }
00153 else
00154 *result++ = static_cast<uint16_t>(cp);
00155 }
00156 return result;
00157 }
00158
00159 template <typename octet_iterator, typename u32bit_iterator>
00160 octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
00161 {
00162 while (start != end)
00163 result = utf8::unchecked::append(*(start++), result);
00164
00165 return result;
00166 }
00167
00168 template <typename octet_iterator, typename u32bit_iterator>
00169 u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
00170 {
00171 while (start < end)
00172 (*result++) = utf8::unchecked::next(start);
00173
00174 return result;
00175 }
00176
00177
00178 template <typename octet_iterator>
00179 class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
00180 octet_iterator it;
00181 public:
00182 iterator () {}
00183 explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
00184
00185 octet_iterator base () const { return it; }
00186 uint32_t operator * () const
00187 {
00188 octet_iterator temp = it;
00189 return utf8::unchecked::next(temp);
00190 }
00191 bool operator == (const iterator& rhs) const
00192 {
00193 return (it == rhs.it);
00194 }
00195 bool operator != (const iterator& rhs) const
00196 {
00197 return !(operator == (rhs));
00198 }
00199 iterator& operator ++ ()
00200 {
00201 ::std::advance(it, utf8::internal::sequence_length(it));
00202 return *this;
00203 }
00204 iterator operator ++ (int)
00205 {
00206 iterator temp = *this;
00207 ::std::advance(it, utf8::internal::sequence_length(it));
00208 return temp;
00209 }
00210 iterator& operator -- ()
00211 {
00212 utf8::unchecked::prior(it);
00213 return *this;
00214 }
00215 iterator operator -- (int)
00216 {
00217 iterator temp = *this;
00218 utf8::unchecked::prior(it);
00219 return temp;
00220 }
00221 };
00222
00223 }
00224 }
00225
00226
00227 #endif // header guard
00228