nan_string_bytes.h
Go to the documentation of this file.
00001 // Copyright Joyent, Inc. and other Node contributors.
00002 //
00003 // Permission is hereby granted, free of charge, to any person obtaining a
00004 // copy of this software and associated documentation files (the
00005 // "Software"), to deal in the Software without restriction, including
00006 // without limitation the rights to use, copy, modify, merge, publish,
00007 // distribute, sublicense, and/or sell copies of the Software, and to permit
00008 // persons to whom the Software is furnished to do so, subject to the
00009 // following conditions:
00010 //
00011 // The above copyright notice and this permission notice shall be included
00012 // in all copies or substantial portions of the Software.
00013 //
00014 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00015 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00016 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
00017 // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
00018 // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
00019 // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
00020 // USE OR OTHER DEALINGS IN THE SOFTWARE.
00021 
00022 #ifndef NAN_STRING_BYTES_H_
00023 #define NAN_STRING_BYTES_H_
00024 
00025 // Decodes a v8::Local<v8::String> or Buffer to a raw char*
00026 
00027 namespace imp {
00028 
00029 using v8::Local;
00030 using v8::Object;
00031 using v8::String;
00032 using v8::Value;
00033 
00034 
00036 
00037 #define base64_encoded_size(size) ((size + 2 - ((size + 2) % 3)) / 3 * 4)
00038 
00039 
00040 
00042 
00043 static bool contains_non_ascii_slow(const char* buf, size_t len) {
00044   for (size_t i = 0; i < len; ++i) {
00045     if (buf[i] & 0x80) return true;
00046   }
00047   return false;
00048 }
00049 
00050 
00051 static bool contains_non_ascii(const char* src, size_t len) {
00052   if (len < 16) {
00053     return contains_non_ascii_slow(src, len);
00054   }
00055 
00056   const unsigned bytes_per_word = sizeof(void*);
00057   const unsigned align_mask = bytes_per_word - 1;
00058   const unsigned unaligned = reinterpret_cast<uintptr_t>(src) & align_mask;
00059 
00060   if (unaligned > 0) {
00061     const unsigned n = bytes_per_word - unaligned;
00062     if (contains_non_ascii_slow(src, n)) return true;
00063     src += n;
00064     len -= n;
00065   }
00066 
00067 
00068 #if defined(__x86_64__) || defined(_WIN64)
00069   const uintptr_t mask = 0x8080808080808080ll;
00070 #else
00071   const uintptr_t mask = 0x80808080l;
00072 #endif
00073 
00074   const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
00075 
00076   for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
00077     if (srcw[i] & mask) return true;
00078   }
00079 
00080   const unsigned remainder = len & align_mask;
00081   if (remainder > 0) {
00082     const size_t offset = len - remainder;
00083     if (contains_non_ascii_slow(src + offset, remainder)) return true;
00084   }
00085 
00086   return false;
00087 }
00088 
00089 
00090 static void force_ascii_slow(const char* src, char* dst, size_t len) {
00091   for (size_t i = 0; i < len; ++i) {
00092     dst[i] = src[i] & 0x7f;
00093   }
00094 }
00095 
00096 
00097 static void force_ascii(const char* src, char* dst, size_t len) {
00098   if (len < 16) {
00099     force_ascii_slow(src, dst, len);
00100     return;
00101   }
00102 
00103   const unsigned bytes_per_word = sizeof(void*);
00104   const unsigned align_mask = bytes_per_word - 1;
00105   const unsigned src_unalign = reinterpret_cast<uintptr_t>(src) & align_mask;
00106   const unsigned dst_unalign = reinterpret_cast<uintptr_t>(dst) & align_mask;
00107 
00108   if (src_unalign > 0) {
00109     if (src_unalign == dst_unalign) {
00110       const unsigned unalign = bytes_per_word - src_unalign;
00111       force_ascii_slow(src, dst, unalign);
00112       src += unalign;
00113       dst += unalign;
00114       len -= src_unalign;
00115     } else {
00116       force_ascii_slow(src, dst, len);
00117       return;
00118     }
00119   }
00120 
00121 #if defined(__x86_64__) || defined(_WIN64)
00122   const uintptr_t mask = ~0x8080808080808080ll;
00123 #else
00124   const uintptr_t mask = ~0x80808080l;
00125 #endif
00126 
00127   const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
00128   uintptr_t* dstw = reinterpret_cast<uintptr_t*>(dst);
00129 
00130   for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
00131     dstw[i] = srcw[i] & mask;
00132   }
00133 
00134   const unsigned remainder = len & align_mask;
00135   if (remainder > 0) {
00136     const size_t offset = len - remainder;
00137     force_ascii_slow(src + offset, dst + offset, remainder);
00138   }
00139 }
00140 
00141 
00142 static size_t base64_encode(const char* src,
00143                             size_t slen,
00144                             char* dst,
00145                             size_t dlen) {
00146   // We know how much we'll write, just make sure that there's space.
00147   assert(dlen >= base64_encoded_size(slen) &&
00148       "not enough space provided for base64 encode");
00149 
00150   dlen = base64_encoded_size(slen);
00151 
00152   unsigned a;
00153   unsigned b;
00154   unsigned c;
00155   unsigned i;
00156   unsigned k;
00157   unsigned n;
00158 
00159   static const char table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
00160                               "abcdefghijklmnopqrstuvwxyz"
00161                               "0123456789+/";
00162 
00163   i = 0;
00164   k = 0;
00165   n = slen / 3 * 3;
00166 
00167   while (i < n) {
00168     a = src[i + 0] & 0xff;
00169     b = src[i + 1] & 0xff;
00170     c = src[i + 2] & 0xff;
00171 
00172     dst[k + 0] = table[a >> 2];
00173     dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
00174     dst[k + 2] = table[((b & 0x0f) << 2) | (c >> 6)];
00175     dst[k + 3] = table[c & 0x3f];
00176 
00177     i += 3;
00178     k += 4;
00179   }
00180 
00181   if (n != slen) {
00182     switch (slen - n) {
00183       case 1:
00184         a = src[i + 0] & 0xff;
00185         dst[k + 0] = table[a >> 2];
00186         dst[k + 1] = table[(a & 3) << 4];
00187         dst[k + 2] = '=';
00188         dst[k + 3] = '=';
00189         break;
00190 
00191       case 2:
00192         a = src[i + 0] & 0xff;
00193         b = src[i + 1] & 0xff;
00194         dst[k + 0] = table[a >> 2];
00195         dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
00196         dst[k + 2] = table[(b & 0x0f) << 2];
00197         dst[k + 3] = '=';
00198         break;
00199     }
00200   }
00201 
00202   return dlen;
00203 }
00204 
00205 
00206 static size_t hex_encode(const char* src, size_t slen, char* dst, size_t dlen) {
00207   // We know how much we'll write, just make sure that there's space.
00208   assert(dlen >= slen * 2 &&
00209       "not enough space provided for hex encode");
00210 
00211   dlen = slen * 2;
00212   for (uint32_t i = 0, k = 0; k < dlen; i += 1, k += 2) {
00213     static const char hex[] = "0123456789abcdef";
00214     uint8_t val = static_cast<uint8_t>(src[i]);
00215     dst[k + 0] = hex[val >> 4];
00216     dst[k + 1] = hex[val & 15];
00217   }
00218 
00219   return dlen;
00220 }
00221 
00222 
00223 
00224 static Local<Value> Encode(const char* buf,
00225                            size_t buflen,
00226                            enum Encoding encoding) {
00227   assert(buflen <= node::Buffer::kMaxLength);
00228   if (!buflen && encoding != BUFFER)
00229     return New("").ToLocalChecked();
00230 
00231   Local<String> val;
00232   switch (encoding) {
00233     case BUFFER:
00234       return CopyBuffer(buf, buflen).ToLocalChecked();
00235 
00236     case ASCII:
00237       if (contains_non_ascii(buf, buflen)) {
00238         char* out = new char[buflen];
00239         force_ascii(buf, out, buflen);
00240         val = New<String>(out, buflen).ToLocalChecked();
00241         delete[] out;
00242       } else {
00243         val = New<String>(buf, buflen).ToLocalChecked();
00244       }
00245       break;
00246 
00247     case UTF8:
00248       val = New<String>(buf, buflen).ToLocalChecked();
00249       break;
00250 
00251     case BINARY: {
00252       // TODO(isaacs) use ExternalTwoByteString?
00253       const unsigned char *cbuf = reinterpret_cast<const unsigned char*>(buf);
00254       uint16_t * twobytebuf = new uint16_t[buflen];
00255       for (size_t i = 0; i < buflen; i++) {
00256         // XXX is the following line platform independent?
00257         twobytebuf[i] = cbuf[i];
00258       }
00259       val = New<String>(twobytebuf, buflen).ToLocalChecked();
00260       delete[] twobytebuf;
00261       break;
00262     }
00263 
00264     case BASE64: {
00265       size_t dlen = base64_encoded_size(buflen);
00266       char* dst = new char[dlen];
00267 
00268       size_t written = base64_encode(buf, buflen, dst, dlen);
00269       assert(written == dlen);
00270 
00271       val = New<String>(dst, dlen).ToLocalChecked();
00272       delete[] dst;
00273       break;
00274     }
00275 
00276     case UCS2: {
00277       const uint16_t* data = reinterpret_cast<const uint16_t*>(buf);
00278       val = New<String>(data, buflen / 2).ToLocalChecked();
00279       break;
00280     }
00281 
00282     case HEX: {
00283       size_t dlen = buflen * 2;
00284       char* dst = new char[dlen];
00285       size_t written = hex_encode(buf, buflen, dst, dlen);
00286       assert(written == dlen);
00287 
00288       val = New<String>(dst, dlen).ToLocalChecked();
00289       delete[] dst;
00290       break;
00291     }
00292 
00293     default:
00294       assert(0 && "unknown encoding");
00295       break;
00296   }
00297 
00298   return val;
00299 }
00300 
00301 #undef base64_encoded_size
00302 
00303 }  // end of namespace imp
00304 
00305 #endif  // NAN_STRING_BYTES_H_


dji_ronin
Author(s):
autogenerated on Sat Jun 8 2019 20:15:31