00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #include "encoding_helpers.h"
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045 static const char trailingBytesForUTF8[256] = {
00046 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00047 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00048 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00049 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00050 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00051 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00052 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00053 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
00054 };
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068 static unsigned char isLegalUTF8(const unsigned char* source, int length) {
00069 unsigned char a;
00070 const unsigned char* srcptr = source + length;
00071 switch (length) {
00072 default: return 0;
00073
00074 case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
00075 case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
00076 case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
00077 switch (*source) {
00078
00079 case 0xE0: if (a < 0xA0) return 0; break;
00080 case 0xF0: if (a < 0x90) return 0; break;
00081 case 0xF4: if (a > 0x8F) return 0; break;
00082 default: if (a < 0x80) return 0;
00083 }
00084 case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
00085 if (*source > 0xF4) return 0;
00086 }
00087 return 1;
00088 }
00089
00090 result_t check_string(const unsigned char* string, const int length,
00091 const char check_utf8, const char check_null) {
00092 int position = 0;
00093
00094
00095 int sequence_length = 1;
00096
00097 if (!check_utf8 && !check_null) {
00098 return VALID;
00099 }
00100
00101 while (position < length) {
00102 if (check_null && *(string + position) == 0) {
00103 return HAS_NULL;
00104 }
00105 if (check_utf8) {
00106 sequence_length = trailingBytesForUTF8[*(string + position)] + 1;
00107 if ((position + sequence_length) > length) {
00108 return NOT_UTF_8;
00109 }
00110 if (!isLegalUTF8(string + position, sequence_length)) {
00111 return NOT_UTF_8;
00112 }
00113 }
00114 position += sequence_length;
00115 }
00116
00117 return VALID;
00118 }