00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef NAN_STRING_BYTES_H_
00023 #define NAN_STRING_BYTES_H_
00024
00025
00026
00027 namespace imp {
00028
00029 using v8::Local;
00030 using v8::Object;
00031 using v8::String;
00032 using v8::Value;
00033
00034
00036
00037 #define base64_encoded_size(size) ((size + 2 - ((size + 2) % 3)) / 3 * 4)
00038
00039
00040
00042
00043 static bool contains_non_ascii_slow(const char* buf, size_t len) {
00044 for (size_t i = 0; i < len; ++i) {
00045 if (buf[i] & 0x80) return true;
00046 }
00047 return false;
00048 }
00049
00050
00051 static bool contains_non_ascii(const char* src, size_t len) {
00052 if (len < 16) {
00053 return contains_non_ascii_slow(src, len);
00054 }
00055
00056 const unsigned bytes_per_word = sizeof(void*);
00057 const unsigned align_mask = bytes_per_word - 1;
00058 const unsigned unaligned = reinterpret_cast<uintptr_t>(src) & align_mask;
00059
00060 if (unaligned > 0) {
00061 const unsigned n = bytes_per_word - unaligned;
00062 if (contains_non_ascii_slow(src, n)) return true;
00063 src += n;
00064 len -= n;
00065 }
00066
00067
00068 #if defined(__x86_64__) || defined(_WIN64)
00069 const uintptr_t mask = 0x8080808080808080ll;
00070 #else
00071 const uintptr_t mask = 0x80808080l;
00072 #endif
00073
00074 const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
00075
00076 for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
00077 if (srcw[i] & mask) return true;
00078 }
00079
00080 const unsigned remainder = len & align_mask;
00081 if (remainder > 0) {
00082 const size_t offset = len - remainder;
00083 if (contains_non_ascii_slow(src + offset, remainder)) return true;
00084 }
00085
00086 return false;
00087 }
00088
00089
00090 static void force_ascii_slow(const char* src, char* dst, size_t len) {
00091 for (size_t i = 0; i < len; ++i) {
00092 dst[i] = src[i] & 0x7f;
00093 }
00094 }
00095
00096
00097 static void force_ascii(const char* src, char* dst, size_t len) {
00098 if (len < 16) {
00099 force_ascii_slow(src, dst, len);
00100 return;
00101 }
00102
00103 const unsigned bytes_per_word = sizeof(void*);
00104 const unsigned align_mask = bytes_per_word - 1;
00105 const unsigned src_unalign = reinterpret_cast<uintptr_t>(src) & align_mask;
00106 const unsigned dst_unalign = reinterpret_cast<uintptr_t>(dst) & align_mask;
00107
00108 if (src_unalign > 0) {
00109 if (src_unalign == dst_unalign) {
00110 const unsigned unalign = bytes_per_word - src_unalign;
00111 force_ascii_slow(src, dst, unalign);
00112 src += unalign;
00113 dst += unalign;
00114 len -= src_unalign;
00115 } else {
00116 force_ascii_slow(src, dst, len);
00117 return;
00118 }
00119 }
00120
00121 #if defined(__x86_64__) || defined(_WIN64)
00122 const uintptr_t mask = ~0x8080808080808080ll;
00123 #else
00124 const uintptr_t mask = ~0x80808080l;
00125 #endif
00126
00127 const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
00128 uintptr_t* dstw = reinterpret_cast<uintptr_t*>(dst);
00129
00130 for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
00131 dstw[i] = srcw[i] & mask;
00132 }
00133
00134 const unsigned remainder = len & align_mask;
00135 if (remainder > 0) {
00136 const size_t offset = len - remainder;
00137 force_ascii_slow(src + offset, dst + offset, remainder);
00138 }
00139 }
00140
00141
00142 static size_t base64_encode(const char* src,
00143 size_t slen,
00144 char* dst,
00145 size_t dlen) {
00146
00147 assert(dlen >= base64_encoded_size(slen) &&
00148 "not enough space provided for base64 encode");
00149
00150 dlen = base64_encoded_size(slen);
00151
00152 unsigned a;
00153 unsigned b;
00154 unsigned c;
00155 unsigned i;
00156 unsigned k;
00157 unsigned n;
00158
00159 static const char table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
00160 "abcdefghijklmnopqrstuvwxyz"
00161 "0123456789+/";
00162
00163 i = 0;
00164 k = 0;
00165 n = slen / 3 * 3;
00166
00167 while (i < n) {
00168 a = src[i + 0] & 0xff;
00169 b = src[i + 1] & 0xff;
00170 c = src[i + 2] & 0xff;
00171
00172 dst[k + 0] = table[a >> 2];
00173 dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
00174 dst[k + 2] = table[((b & 0x0f) << 2) | (c >> 6)];
00175 dst[k + 3] = table[c & 0x3f];
00176
00177 i += 3;
00178 k += 4;
00179 }
00180
00181 if (n != slen) {
00182 switch (slen - n) {
00183 case 1:
00184 a = src[i + 0] & 0xff;
00185 dst[k + 0] = table[a >> 2];
00186 dst[k + 1] = table[(a & 3) << 4];
00187 dst[k + 2] = '=';
00188 dst[k + 3] = '=';
00189 break;
00190
00191 case 2:
00192 a = src[i + 0] & 0xff;
00193 b = src[i + 1] & 0xff;
00194 dst[k + 0] = table[a >> 2];
00195 dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
00196 dst[k + 2] = table[(b & 0x0f) << 2];
00197 dst[k + 3] = '=';
00198 break;
00199 }
00200 }
00201
00202 return dlen;
00203 }
00204
00205
00206 static size_t hex_encode(const char* src, size_t slen, char* dst, size_t dlen) {
00207
00208 assert(dlen >= slen * 2 &&
00209 "not enough space provided for hex encode");
00210
00211 dlen = slen * 2;
00212 for (uint32_t i = 0, k = 0; k < dlen; i += 1, k += 2) {
00213 static const char hex[] = "0123456789abcdef";
00214 uint8_t val = static_cast<uint8_t>(src[i]);
00215 dst[k + 0] = hex[val >> 4];
00216 dst[k + 1] = hex[val & 15];
00217 }
00218
00219 return dlen;
00220 }
00221
00222
00223
00224 static Local<Value> Encode(const char* buf,
00225 size_t buflen,
00226 enum Encoding encoding) {
00227 assert(buflen <= node::Buffer::kMaxLength);
00228 if (!buflen && encoding != BUFFER)
00229 return New("").ToLocalChecked();
00230
00231 Local<String> val;
00232 switch (encoding) {
00233 case BUFFER:
00234 return CopyBuffer(buf, buflen).ToLocalChecked();
00235
00236 case ASCII:
00237 if (contains_non_ascii(buf, buflen)) {
00238 char* out = new char[buflen];
00239 force_ascii(buf, out, buflen);
00240 val = New<String>(out, buflen).ToLocalChecked();
00241 delete[] out;
00242 } else {
00243 val = New<String>(buf, buflen).ToLocalChecked();
00244 }
00245 break;
00246
00247 case UTF8:
00248 val = New<String>(buf, buflen).ToLocalChecked();
00249 break;
00250
00251 case BINARY: {
00252
00253 const unsigned char *cbuf = reinterpret_cast<const unsigned char*>(buf);
00254 uint16_t * twobytebuf = new uint16_t[buflen];
00255 for (size_t i = 0; i < buflen; i++) {
00256
00257 twobytebuf[i] = cbuf[i];
00258 }
00259 val = New<String>(twobytebuf, buflen).ToLocalChecked();
00260 delete[] twobytebuf;
00261 break;
00262 }
00263
00264 case BASE64: {
00265 size_t dlen = base64_encoded_size(buflen);
00266 char* dst = new char[dlen];
00267
00268 size_t written = base64_encode(buf, buflen, dst, dlen);
00269 assert(written == dlen);
00270
00271 val = New<String>(dst, dlen).ToLocalChecked();
00272 delete[] dst;
00273 break;
00274 }
00275
00276 case UCS2: {
00277 const uint16_t* data = reinterpret_cast<const uint16_t*>(buf);
00278 val = New<String>(data, buflen / 2).ToLocalChecked();
00279 break;
00280 }
00281
00282 case HEX: {
00283 size_t dlen = buflen * 2;
00284 char* dst = new char[dlen];
00285 size_t written = hex_encode(buf, buflen, dst, dlen);
00286 assert(written == dlen);
00287
00288 val = New<String>(dst, dlen).ToLocalChecked();
00289 delete[] dst;
00290 break;
00291 }
00292
00293 default:
00294 assert(0 && "unknown encoding");
00295 break;
00296 }
00297
00298 return val;
00299 }
00300
00301 #undef base64_encoded_size
00302
00303 }
00304
00305 #endif // NAN_STRING_BYTES_H_