validation.cc
Go to the documentation of this file.
00001 
00008 #include <v8.h>
00009 #include <node.h>
00010 #include <node_version.h>
00011 #include <node_buffer.h>
00012 #include <node_object_wrap.h>
00013 #include <stdlib.h>
00014 #include <wchar.h>
00015 #include <stdio.h>
00016 #include "nan.h"
00017 
00018 using namespace v8;
00019 using namespace node;
00020 
00021 #define UNI_SUR_HIGH_START   (uint32_t) 0xD800
00022 #define UNI_SUR_LOW_END    (uint32_t) 0xDFFF
00023 #define UNI_REPLACEMENT_CHAR (uint32_t) 0x0000FFFD
00024 #define UNI_MAX_LEGAL_UTF32  (uint32_t) 0x0010FFFF
00025 
00026 static const uint8_t trailingBytesForUTF8[256] = {
00027   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00028   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00029   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00030   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00031   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00032   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00033   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00034   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
00035 };
00036 
00037 static const uint32_t offsetsFromUTF8[6] = {
00038   0x00000000, 0x00003080, 0x000E2080,
00039   0x03C82080, 0xFA082080, 0x82082080
00040 };
00041 
00042 static int isLegalUTF8(const uint8_t *source, const int length)
00043 {
00044   uint8_t a;
00045   const uint8_t *srcptr = source+length;
00046   switch (length) {
00047   default: return 0;
00048   /* Everything else falls through when "true"... */
00049   /* RFC3629 makes 5 & 6 bytes UTF-8 illegal
00050   case 6: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
00051   case 5: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; */
00052   case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
00053   case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
00054   case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
00055     switch (*source) {
00056       /* no fall-through in this inner switch */
00057       case 0xE0: if (a < 0xA0) return 0; break;
00058       case 0xED: if (a > 0x9F) return 0; break;
00059       case 0xF0: if (a < 0x90) return 0; break;
00060       case 0xF4: if (a > 0x8F) return 0; break;
00061       default:   if (a < 0x80) return 0;
00062     }
00063 
00064   case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
00065   }
00066   if (*source > 0xF4) return 0;
00067   return 1;
00068 }
00069 
00070 int is_valid_utf8 (size_t len, char *value)
00071 {
00072   /* is the string valid UTF-8? */
00073   for (unsigned int i = 0; i < len; i++) {
00074     uint32_t ch = 0;
00075     uint8_t  extrabytes = trailingBytesForUTF8[(uint8_t) value[i]];
00076 
00077     if (extrabytes + i >= len)
00078       return 0;
00079 
00080     if (isLegalUTF8 ((uint8_t *) (value + i), extrabytes + 1) == 0) return 0;
00081 
00082     switch (extrabytes) {
00083       case 5 : ch += (uint8_t) value[i++]; ch <<= 6;
00084       case 4 : ch += (uint8_t) value[i++]; ch <<= 6;
00085       case 3 : ch += (uint8_t) value[i++]; ch <<= 6;
00086       case 2 : ch += (uint8_t) value[i++]; ch <<= 6;
00087       case 1 : ch += (uint8_t) value[i++]; ch <<= 6;
00088       case 0 : ch += (uint8_t) value[i];
00089     }
00090 
00091     ch -= offsetsFromUTF8[extrabytes];
00092 
00093     if (ch <= UNI_MAX_LEGAL_UTF32) {
00094       if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
00095         return 0;
00096     } else {
00097       return 0;
00098     }
00099   }
00100 
00101   return 1;
00102 }
00103 
00104 class Validation : public ObjectWrap
00105 {
00106 public:
00107 
00108   static void Initialize(v8::Handle<v8::Object> target)
00109   {
00110     Nan::HandleScope scope;
00111     Local<FunctionTemplate> t = Nan::New<FunctionTemplate>(New);
00112     t->InstanceTemplate()->SetInternalFieldCount(1);
00113     Nan::SetMethod(t, "isValidUTF8", Validation::IsValidUTF8);
00114     Nan::Set(target, Nan::New<String>("Validation").ToLocalChecked(), t->GetFunction());
00115   }
00116 
00117 protected:
00118 
00119   static NAN_METHOD(New)
00120   {
00121     Nan::HandleScope scope;
00122     Validation* validation = new Validation();
00123     validation->Wrap(info.This());
00124     info.GetReturnValue().Set(info.This());
00125   }
00126 
00127   static NAN_METHOD(IsValidUTF8)
00128   {
00129     Nan::HandleScope scope;
00130     if (!Buffer::HasInstance(info[0])) {
00131       return Nan::ThrowTypeError("First argument needs to be a buffer");
00132     }
00133     Local<Object> buffer_obj = info[0]->ToObject();
00134     char *buffer_data = Buffer::Data(buffer_obj);
00135     size_t buffer_length = Buffer::Length(buffer_obj);
00136     info.GetReturnValue().Set(is_valid_utf8(buffer_length, buffer_data) == 1 ? Nan::True() : Nan::False());
00137   }
00138 };
00139 #if !NODE_VERSION_AT_LEAST(0,10,0)
00140 extern "C"
00141 #endif
00142 void init (Handle<Object> target)
00143 {
00144   Nan::HandleScope scope;
00145   Validation::Initialize(target);
00146 }
00147 
00148 NODE_MODULE(validation, init)


dji_ronin
Author(s):
autogenerated on Sat Jun 8 2019 20:15:31