checked.h
Go to the documentation of this file.
00001 // Copyright 2006 Nemanja Trifunovic
00002 
00003 /*
00004 Permission is hereby granted, free of charge, to any person or organization
00005 obtaining a copy of the software and accompanying documentation covered by
00006 this license (the "Software") to use, reproduce, display, distribute,
00007 execute, and transmit the Software, and to prepare derivative works of the
00008 Software, and to permit third-parties to whom the Software is furnished to
00009 do so, all subject to the following:
00010 
00011 The copyright notices in the Software and this entire statement, including
00012 the above license grant, this restriction and the following disclaimer,
00013 must be included in all copies of the Software, in whole or in part, and
00014 all derivative works of the Software, unless such copies or derivative
00015 works are solely in the form of machine-executable object code generated by
00016 a source language processor.
00017 
00018 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00019 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00020 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
00021 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
00022 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
00023 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00024 DEALINGS IN THE SOFTWARE.
00025 */
00026 
00027 
00028 #ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
00029 #define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
00030 
00031 #include "core.h"
00032 #include <stdexcept>
00033 
00034 namespace utf8
00035 {
00036     // Base for the exceptions that may be thrown from the library
00037     class exception : public ::std::exception {
00038     };
00039 
00040     // Exceptions that may be thrown from the library functions.
00041     class invalid_code_point : public exception {
00042         uint32_t cp;
00043     public:
00044         invalid_code_point(uint32_t cp) : cp(cp) {}
00045         virtual const char* what() const throw() { return "Invalid code point"; }
00046         uint32_t code_point() const {return cp;}
00047     };
00048 
00049     class invalid_utf8 : public exception {
00050         uint8_t u8;
00051     public:
00052         invalid_utf8 (uint8_t u) : u8(u) {}
00053         virtual const char* what() const throw() { return "Invalid UTF-8"; }
00054         uint8_t utf8_octet() const {return u8;}
00055     };
00056 
00057     class invalid_utf16 : public exception {
00058         uint16_t u16;
00059     public:
00060         invalid_utf16 (uint16_t u) : u16(u) {}
00061         virtual const char* what() const throw() { return "Invalid UTF-16"; }
00062         uint16_t utf16_word() const {return u16;}
00063     };
00064 
00065     class not_enough_room : public exception {
00066     public:
00067         virtual const char* what() const throw() { return "Not enough space"; }
00068     };
00069 
00071 
00072     template <typename octet_iterator>
00073     octet_iterator append(uint32_t cp, octet_iterator result)
00074     {
00075         if (!utf8::internal::is_code_point_valid(cp))
00076             throw invalid_code_point(cp);
00077 
00078         if (cp < 0x80)                        // one octet
00079             *(result++) = static_cast<uint8_t>(cp);
00080         else if (cp < 0x800) {                // two octets
00081             *(result++) = static_cast<uint8_t>((cp >> 6)            | 0xc0);
00082             *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
00083         }
00084         else if (cp < 0x10000) {              // three octets
00085             *(result++) = static_cast<uint8_t>((cp >> 12)           | 0xe0);
00086             *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
00087             *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
00088         }
00089         else {                                // four octets
00090             *(result++) = static_cast<uint8_t>((cp >> 18)           | 0xf0);
00091             *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)  | 0x80);
00092             *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
00093             *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
00094         }
00095         return result;
00096     }
00097 
00098     template <typename octet_iterator, typename output_iterator>
00099     output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
00100     {
00101         while (start != end) {
00102             octet_iterator sequence_start = start;
00103             internal::utf_error err_code = utf8::internal::validate_next(start, end);
00104             switch (err_code) {
00105                 case internal::UTF8_OK :
00106                     for (octet_iterator it = sequence_start; it != start; ++it)
00107                         *out++ = *it;
00108                     break;
00109                 case internal::NOT_ENOUGH_ROOM:
00110                     throw not_enough_room();
00111                 case internal::INVALID_LEAD:
00112                     out = utf8::append (replacement, out);
00113                     ++start;
00114                     break;
00115                 case internal::INCOMPLETE_SEQUENCE:
00116                 case internal::OVERLONG_SEQUENCE:
00117                 case internal::INVALID_CODE_POINT:
00118                     out = utf8::append (replacement, out);
00119                     ++start;
00120                     // just one replacement mark for the sequence
00121                     while (start != end && utf8::internal::is_trail(*start))
00122                         ++start;
00123                     break;
00124             }
00125         }
00126         return out;
00127     }
00128 
00129     template <typename octet_iterator, typename output_iterator>
00130     inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
00131     {
00132         static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
00133         return utf8::replace_invalid(start, end, out, replacement_marker);
00134     }
00135 
00136     template <typename octet_iterator>
00137     uint32_t next(octet_iterator& it, octet_iterator end)
00138     {
00139         uint32_t cp = 0;
00140         internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
00141         switch (err_code) {
00142             case internal::UTF8_OK :
00143                 break;
00144             case internal::NOT_ENOUGH_ROOM :
00145                 throw not_enough_room();
00146             case internal::INVALID_LEAD :
00147             case internal::INCOMPLETE_SEQUENCE :
00148             case internal::OVERLONG_SEQUENCE :
00149                 throw invalid_utf8(*it);
00150             case internal::INVALID_CODE_POINT :
00151                 throw invalid_code_point(cp);
00152         }
00153         return cp;
00154     }
00155 
00156     template <typename octet_iterator>
00157     uint32_t peek_next(octet_iterator it, octet_iterator end)
00158     {
00159         return utf8::next(it, end);
00160     }
00161 
00162     template <typename octet_iterator>
00163     uint32_t prior(octet_iterator& it, octet_iterator start)
00164     {
00165         // can't do much if it == start
00166         if (it == start)
00167             throw not_enough_room();
00168 
00169         octet_iterator end = it;
00170         // Go back until we hit either a lead octet or start
00171         while (utf8::internal::is_trail(*(--it)))
00172             if (it == start)
00173                 throw invalid_utf8(*it); // error - no lead byte in the sequence
00174         return utf8::peek_next(it, end);
00175     }
00176 
00178     template <typename octet_iterator>
00179     uint32_t previous(octet_iterator& it, octet_iterator pass_start)
00180     {
00181         octet_iterator end = it;
00182         while (utf8::internal::is_trail(*(--it)))
00183             if (it == pass_start)
00184                 throw invalid_utf8(*it); // error - no lead byte in the sequence
00185         octet_iterator temp = it;
00186         return utf8::next(temp, end);
00187     }
00188 
00189     template <typename octet_iterator, typename distance_type>
00190     void advance (octet_iterator& it, distance_type n, octet_iterator end)
00191     {
00192         for (distance_type i = 0; i < n; ++i)
00193             utf8::next(it, end);
00194     }
00195 
00196     template <typename octet_iterator>
00197     typename std::iterator_traits<octet_iterator>::difference_type
00198     distance (octet_iterator first, octet_iterator last)
00199     {
00200         typename std::iterator_traits<octet_iterator>::difference_type dist;
00201         for (dist = 0; first < last; ++dist)
00202             utf8::next(first, last);
00203         return dist;
00204     }
00205 
00206     template <typename u16bit_iterator, typename octet_iterator>
00207     octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
00208     {
00209         while (start != end) {
00210             uint32_t cp = utf8::internal::mask16(*start++);
00211             // Take care of surrogate pairs first
00212             if (utf8::internal::is_lead_surrogate(cp)) {
00213                 if (start != end) {
00214                     uint32_t trail_surrogate = utf8::internal::mask16(*start++);
00215                     if (utf8::internal::is_trail_surrogate(trail_surrogate))
00216                         cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
00217                     else
00218                         throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
00219                 }
00220                 else
00221                     throw invalid_utf16(static_cast<uint16_t>(cp));
00222 
00223             }
00224             // Lone trail surrogate
00225             else if (utf8::internal::is_trail_surrogate(cp))
00226                 throw invalid_utf16(static_cast<uint16_t>(cp));
00227 
00228             result = utf8::append(cp, result);
00229         }
00230         return result;
00231     }
00232 
00233     template <typename u16bit_iterator, typename octet_iterator>
00234     u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
00235     {
00236         while (start != end) {
00237             uint32_t cp = utf8::next(start, end);
00238             if (cp > 0xffff) { //make a surrogate pair
00239                 *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
00240                 *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
00241             }
00242             else
00243                 *result++ = static_cast<uint16_t>(cp);
00244         }
00245         return result;
00246     }
00247 
00248     template <typename octet_iterator, typename u32bit_iterator>
00249     octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
00250     {
00251         while (start != end)
00252             result = utf8::append(*(start++), result);
00253 
00254         return result;
00255     }
00256 
00257     template <typename octet_iterator, typename u32bit_iterator>
00258     u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
00259     {
00260         while (start != end)
00261             (*result++) = utf8::next(start, end);
00262 
00263         return result;
00264     }
00265 
00266     // The iterator class
00267     template <typename octet_iterator>
00268     class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
00269       octet_iterator it;
00270       octet_iterator range_start;
00271       octet_iterator range_end;
00272       public:
00273       iterator () {}
00274       explicit iterator (const octet_iterator& octet_it,
00275                          const octet_iterator& range_start,
00276                          const octet_iterator& range_end) :
00277                it(octet_it), range_start(range_start), range_end(range_end)
00278       {
00279           if (it < range_start || it > range_end)
00280               throw std::out_of_range("Invalid utf-8 iterator position");
00281       }
00282       // the default "big three" are OK
00283       octet_iterator base () const { return it; }
00284       uint32_t operator * () const
00285       {
00286           octet_iterator temp = it;
00287           return utf8::next(temp, range_end);
00288       }
00289       bool operator == (const iterator& rhs) const
00290       {
00291           if (range_start != rhs.range_start || range_end != rhs.range_end)
00292               throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
00293           return (it == rhs.it);
00294       }
00295       bool operator != (const iterator& rhs) const
00296       {
00297           return !(operator == (rhs));
00298       }
00299       iterator& operator ++ ()
00300       {
00301           utf8::next(it, range_end);
00302           return *this;
00303       }
00304       iterator operator ++ (int)
00305       {
00306           iterator temp = *this;
00307           utf8::next(it, range_end);
00308           return temp;
00309       }
00310       iterator& operator -- ()
00311       {
00312           utf8::prior(it, range_start);
00313           return *this;
00314       }
00315       iterator operator -- (int)
00316       {
00317           iterator temp = *this;
00318           utf8::prior(it, range_start);
00319           return temp;
00320       }
00321     }; // class iterator
00322 
00323 } // namespace utf8
00324 
00325 #endif //header guard
00326 
00327 


denso_controller
Author(s): Ryohei Ueda
autogenerated on Thu Jun 6 2019 20:15:19