00001
00008 #include <v8.h>
00009 #include <node.h>
00010 #include <node_version.h>
00011 #include <node_buffer.h>
00012 #include <node_object_wrap.h>
00013 #include <stdlib.h>
00014 #include <wchar.h>
00015 #include <stdio.h>
00016 #include "nan.h"
00017
00018 using namespace v8;
00019 using namespace node;
00020
00021 #define UNI_SUR_HIGH_START (uint32_t) 0xD800
00022 #define UNI_SUR_LOW_END (uint32_t) 0xDFFF
00023 #define UNI_REPLACEMENT_CHAR (uint32_t) 0x0000FFFD
00024 #define UNI_MAX_LEGAL_UTF32 (uint32_t) 0x0010FFFF
00025
00026 static const uint8_t trailingBytesForUTF8[256] = {
00027 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00028 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00029 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00030 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00031 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00032 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00033 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00034 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
00035 };
00036
00037 static const uint32_t offsetsFromUTF8[6] = {
00038 0x00000000, 0x00003080, 0x000E2080,
00039 0x03C82080, 0xFA082080, 0x82082080
00040 };
00041
00042 static int isLegalUTF8(const uint8_t *source, const int length)
00043 {
00044 uint8_t a;
00045 const uint8_t *srcptr = source+length;
00046 switch (length) {
00047 default: return 0;
00048
00049
00050
00051
00052 case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
00053 case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
00054 case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
00055 switch (*source) {
00056
00057 case 0xE0: if (a < 0xA0) return 0; break;
00058 case 0xED: if (a > 0x9F) return 0; break;
00059 case 0xF0: if (a < 0x90) return 0; break;
00060 case 0xF4: if (a > 0x8F) return 0; break;
00061 default: if (a < 0x80) return 0;
00062 }
00063
00064 case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
00065 }
00066 if (*source > 0xF4) return 0;
00067 return 1;
00068 }
00069
00070 int is_valid_utf8 (size_t len, char *value)
00071 {
00072
00073 for (unsigned int i = 0; i < len; i++) {
00074 uint32_t ch = 0;
00075 uint8_t extrabytes = trailingBytesForUTF8[(uint8_t) value[i]];
00076
00077 if (extrabytes + i >= len)
00078 return 0;
00079
00080 if (isLegalUTF8 ((uint8_t *) (value + i), extrabytes + 1) == 0) return 0;
00081
00082 switch (extrabytes) {
00083 case 5 : ch += (uint8_t) value[i++]; ch <<= 6;
00084 case 4 : ch += (uint8_t) value[i++]; ch <<= 6;
00085 case 3 : ch += (uint8_t) value[i++]; ch <<= 6;
00086 case 2 : ch += (uint8_t) value[i++]; ch <<= 6;
00087 case 1 : ch += (uint8_t) value[i++]; ch <<= 6;
00088 case 0 : ch += (uint8_t) value[i];
00089 }
00090
00091 ch -= offsetsFromUTF8[extrabytes];
00092
00093 if (ch <= UNI_MAX_LEGAL_UTF32) {
00094 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
00095 return 0;
00096 } else {
00097 return 0;
00098 }
00099 }
00100
00101 return 1;
00102 }
00103
00104 class Validation : public ObjectWrap
00105 {
00106 public:
00107
00108 static void Initialize(v8::Handle<v8::Object> target)
00109 {
00110 Nan::HandleScope scope;
00111 Local<FunctionTemplate> t = Nan::New<FunctionTemplate>(New);
00112 t->InstanceTemplate()->SetInternalFieldCount(1);
00113 Nan::SetMethod(t, "isValidUTF8", Validation::IsValidUTF8);
00114 Nan::Set(target, Nan::New<String>("Validation").ToLocalChecked(), t->GetFunction());
00115 }
00116
00117 protected:
00118
00119 static NAN_METHOD(New)
00120 {
00121 Nan::HandleScope scope;
00122 Validation* validation = new Validation();
00123 validation->Wrap(info.This());
00124 info.GetReturnValue().Set(info.This());
00125 }
00126
00127 static NAN_METHOD(IsValidUTF8)
00128 {
00129 Nan::HandleScope scope;
00130 if (!Buffer::HasInstance(info[0])) {
00131 return Nan::ThrowTypeError("First argument needs to be a buffer");
00132 }
00133 Local<Object> buffer_obj = info[0]->ToObject();
00134 char *buffer_data = Buffer::Data(buffer_obj);
00135 size_t buffer_length = Buffer::Length(buffer_obj);
00136 info.GetReturnValue().Set(is_valid_utf8(buffer_length, buffer_data) == 1 ? Nan::True() : Nan::False());
00137 }
00138 };
00139 #if !NODE_VERSION_AT_LEAST(0,10,0)
00140 extern "C"
00141 #endif
00142 void init (Handle<Object> target)
00143 {
00144 Nan::HandleScope scope;
00145 Validation::Initialize(target);
00146 }
00147
00148 NODE_MODULE(validation, init)