core.h
Go to the documentation of this file.
00001 // Copyright 2006 Nemanja Trifunovic
00002 
00003 /*
00004 Permission is hereby granted, free of charge, to any person or organization
00005 obtaining a copy of the software and accompanying documentation covered by
00006 this license (the "Software") to use, reproduce, display, distribute,
00007 execute, and transmit the Software, and to prepare derivative works of the
00008 Software, and to permit third-parties to whom the Software is furnished to
00009 do so, all subject to the following:
00010 
00011 The copyright notices in the Software and this entire statement, including
00012 the above license grant, this restriction and the following disclaimer,
00013 must be included in all copies of the Software, in whole or in part, and
00014 all derivative works of the Software, unless such copies or derivative
00015 works are solely in the form of machine-executable object code generated by
00016 a source language processor.
00017 
00018 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00019 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00020 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
00021 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
00022 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
00023 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00024 DEALINGS IN THE SOFTWARE.
00025 */
00026 
00027 
00028 #ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
00029 #define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
00030 
00031 #include <iterator>
00032 
00033 namespace utf8
00034 {
00035     // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
00036     // You may need to change them to match your system.
00037     // These typedefs have the same names as ones from cstdint, or boost/cstdint
00038     typedef unsigned char   uint8_t;
00039     typedef unsigned short  uint16_t;
00040     typedef unsigned int    uint32_t;
00041 
00042 // Helper code - not intended to be directly called by the library users. May be changed at any time
00043 namespace internal
00044 {
00045     // Unicode constants
00046     // Leading (high) surrogates: 0xd800 - 0xdbff
00047     // Trailing (low) surrogates: 0xdc00 - 0xdfff
00048     const uint16_t LEAD_SURROGATE_MIN  = 0xd800u;
00049     const uint16_t LEAD_SURROGATE_MAX  = 0xdbffu;
00050     const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
00051     const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
00052     const uint16_t LEAD_OFFSET         = LEAD_SURROGATE_MIN - (0x10000 >> 10);
00053     const uint32_t SURROGATE_OFFSET    = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
00054 
00055     // Maximum valid value for a Unicode code point
00056     const uint32_t CODE_POINT_MAX      = 0x0010ffffu;
00057 
00058     template<typename octet_type>
00059     inline uint8_t mask8(octet_type oc)
00060     {
00061         return static_cast<uint8_t>(0xff & oc);
00062     }
00063     template<typename u16_type>
00064     inline uint16_t mask16(u16_type oc)
00065     {
00066         return static_cast<uint16_t>(0xffff & oc);
00067     }
00068     template<typename octet_type>
00069     inline bool is_trail(octet_type oc)
00070     {
00071         return ((utf8::internal::mask8(oc) >> 6) == 0x2);
00072     }
00073 
00074     template <typename u16>
00075     inline bool is_lead_surrogate(u16 cp)
00076     {
00077         return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
00078     }
00079 
00080     template <typename u16>
00081     inline bool is_trail_surrogate(u16 cp)
00082     {
00083         return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
00084     }
00085 
00086     template <typename u16>
00087     inline bool is_surrogate(u16 cp)
00088     {
00089         return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
00090     }
00091 
00092     template <typename u32>
00093     inline bool is_code_point_valid(u32 cp)
00094     {
00095         return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
00096     }
00097 
00098     template <typename octet_iterator>
00099     inline typename std::iterator_traits<octet_iterator>::difference_type
00100     sequence_length(octet_iterator lead_it)
00101     {
00102         uint8_t lead = utf8::internal::mask8(*lead_it);
00103         if (lead < 0x80)
00104             return 1;
00105         else if ((lead >> 5) == 0x6)
00106             return 2;
00107         else if ((lead >> 4) == 0xe)
00108             return 3;
00109         else if ((lead >> 3) == 0x1e)
00110             return 4;
00111         else
00112             return 0;
00113     }
00114 
00115     template <typename octet_difference_type>
00116     inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
00117     {
00118         if (cp < 0x80) {
00119             if (length != 1) 
00120                 return true;
00121         }
00122         else if (cp < 0x800) {
00123             if (length != 2) 
00124                 return true;
00125         }
00126         else if (cp < 0x10000) {
00127             if (length != 3) 
00128                 return true;
00129         }
00130 
00131         return false;
00132     }
00133 
00134     enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
00135 
00137     template <typename octet_iterator>
00138     utf_error increase_safely(octet_iterator& it, octet_iterator end)
00139     {
00140         if (++it == end)
00141             return NOT_ENOUGH_ROOM;
00142 
00143         if (!utf8::internal::is_trail(*it))
00144             return INCOMPLETE_SEQUENCE;
00145         
00146         return UTF8_OK;
00147     }
00148 
00149     #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}    
00150 
00152     template <typename octet_iterator>
00153     utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
00154     {
00155         if (it == end)
00156             return NOT_ENOUGH_ROOM;
00157 
00158         code_point = utf8::internal::mask8(*it);
00159 
00160         return UTF8_OK;
00161     }
00162 
00163     template <typename octet_iterator>
00164     utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
00165     {
00166         if (it == end) 
00167             return NOT_ENOUGH_ROOM;
00168         
00169         code_point = utf8::internal::mask8(*it);
00170 
00171         UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
00172 
00173         code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
00174 
00175         return UTF8_OK;
00176     }
00177 
00178     template <typename octet_iterator>
00179     utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
00180     {
00181         if (it == end)
00182             return NOT_ENOUGH_ROOM;
00183             
00184         code_point = utf8::internal::mask8(*it);
00185 
00186         UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
00187 
00188         code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
00189 
00190         UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
00191 
00192         code_point += (*it) & 0x3f;
00193 
00194         return UTF8_OK;
00195     }
00196 
00197     template <typename octet_iterator>
00198     utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
00199     {
00200         if (it == end)
00201            return NOT_ENOUGH_ROOM;
00202 
00203         code_point = utf8::internal::mask8(*it);
00204 
00205         UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
00206 
00207         code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
00208 
00209         UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
00210 
00211         code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
00212 
00213         UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
00214 
00215         code_point += (*it) & 0x3f;
00216 
00217         return UTF8_OK;
00218     }
00219 
00220     #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
00221 
00222     template <typename octet_iterator>
00223     utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
00224     {
00225         // Save the original value of it so we can go back in case of failure
00226         // Of course, it does not make much sense with i.e. stream iterators
00227         octet_iterator original_it = it;
00228 
00229         uint32_t cp = 0;
00230         // Determine the sequence length based on the lead octet
00231         typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
00232         const octet_difference_type length = utf8::internal::sequence_length(it);
00233 
00234         // Get trail octets and calculate the code point
00235         utf_error err = UTF8_OK;
00236         switch (length) {
00237             case 0: 
00238                 return INVALID_LEAD;
00239             case 1:
00240                 err = utf8::internal::get_sequence_1(it, end, cp);
00241                 break;
00242             case 2:
00243                 err = utf8::internal::get_sequence_2(it, end, cp);
00244             break;
00245             case 3:
00246                 err = utf8::internal::get_sequence_3(it, end, cp);
00247             break;
00248             case 4:
00249                 err = utf8::internal::get_sequence_4(it, end, cp);
00250             break;
00251         }
00252 
00253         if (err == UTF8_OK) {
00254             // Decoding succeeded. Now, security checks...
00255             if (utf8::internal::is_code_point_valid(cp)) {
00256                 if (!utf8::internal::is_overlong_sequence(cp, length)){
00257                     // Passed! Return here.
00258                     code_point = cp;
00259                     ++it;
00260                     return UTF8_OK;
00261                 }
00262                 else
00263                     err = OVERLONG_SEQUENCE;
00264             }
00265             else 
00266                 err = INVALID_CODE_POINT;
00267         }
00268 
00269         // Failure branch - restore the original value of the iterator
00270         it = original_it;
00271         return err;
00272     }
00273 
00274     template <typename octet_iterator>
00275     inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
00276         uint32_t ignored;
00277         return utf8::internal::validate_next(it, end, ignored);
00278     }
00279 
00280 } // namespace internal
00281 
00283 
00284     // Byte order mark
00285     const uint8_t bom[] = {0xef, 0xbb, 0xbf};
00286 
00287     template <typename octet_iterator>
00288     octet_iterator find_invalid(octet_iterator start, octet_iterator end)
00289     {
00290         octet_iterator result = start;
00291         while (result != end) {
00292             utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
00293             if (err_code != internal::UTF8_OK)
00294                 return result;
00295         }
00296         return result;
00297     }
00298 
00299     template <typename octet_iterator>
00300     inline bool is_valid(octet_iterator start, octet_iterator end)
00301     {
00302         return (utf8::find_invalid(start, end) == end);
00303     }
00304 
00305     template <typename octet_iterator>
00306     inline bool starts_with_bom (octet_iterator it, octet_iterator end)
00307     {
00308         return (
00309             ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
00310             ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
00311             ((it != end) && (utf8::internal::mask8(*it))   == bom[2])
00312            );
00313     }
00314         
00315     //Deprecated in release 2.3 
00316     template <typename octet_iterator>
00317     inline bool is_bom (octet_iterator it)
00318     {
00319         return (
00320             (utf8::internal::mask8(*it++)) == bom[0] &&
00321             (utf8::internal::mask8(*it++)) == bom[1] &&
00322             (utf8::internal::mask8(*it))   == bom[2]
00323            );
00324     }
00325 } // namespace utf8
00326 
00327 #endif // header guard
00328 
00329 


denso_controller
Author(s): Ryohei Ueda
autogenerated on Thu Jun 6 2019 20:15:19