utf-8.c
Go to the documentation of this file.
1 /*******************************************************************************
2  * Copyright (c) 2009, 2018 IBM Corp.
3  *
4  * All rights reserved. This program and the accompanying materials
5  * are made available under the terms of the Eclipse Public License v2.0
6  * and Eclipse Distribution License v1.0 which accompany this distribution.
7  *
8  * The Eclipse Public License is available at
9  * https://www.eclipse.org/legal/epl-2.0/
10  * and the Eclipse Distribution License is available at
11  * http://www.eclipse.org/org/documents/edl-v10.php.
12  *
13  * Contributors:
14  * Ian Craggs - initial API and implementation and/or initial documentation
15  *******************************************************************************/
16 
17 
26 #include "utf-8.h"
27 
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "StackTrace.h"
32 
36 #if !defined(ARRAY_SIZE)
37 #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
38 #endif
39 
40 
44 struct
45 {
46  int len;
47  struct
48  {
49  char lower;
50  char upper;
51  } bytes[4];
52 }
53 valid_ranges[] =
54 {
55  {1, { {00, 0x7F} } },
56  {2, { {0xC2, 0xDF}, {0x80, 0xBF} } },
57  {3, { {0xE0, 0xE0}, {0xA0, 0xBF}, {0x80, 0xBF} } },
58  {3, { {0xE1, 0xEC}, {0x80, 0xBF}, {0x80, 0xBF} } },
59  {3, { {0xED, 0xED}, {0x80, 0x9F}, {0x80, 0xBF} } },
60  {3, { {0xEE, 0xEF}, {0x80, 0xBF}, {0x80, 0xBF} } },
61  {4, { {0xF0, 0xF0}, {0x90, 0xBF}, {0x80, 0xBF}, {0x80, 0xBF} } },
62  {4, { {0xF1, 0xF3}, {0x80, 0xBF}, {0x80, 0xBF}, {0x80, 0xBF} } },
63  {4, { {0xF4, 0xF4}, {0x80, 0x8F}, {0x80, 0xBF}, {0x80, 0xBF} } },
64 };
65 
66 
67 static const char* UTF8_char_validate(int len, const char* data);
68 
69 
76 static const char* UTF8_char_validate(int len, const char* data)
77 {
78  int good = 0;
79  int charlen = 2;
80  int i, j;
81  const char *rc = NULL;
82 
83  if (data == NULL)
84  goto exit; /* don't have data, can't continue */
85 
86  /* first work out how many bytes this char is encoded in */
87  if ((data[0] & 128) == 0)
88  charlen = 1;
89  else if ((data[0] & 0xF0) == 0xF0)
90  charlen = 4;
91  else if ((data[0] & 0xE0) == 0xE0)
92  charlen = 3;
93 
94  if (charlen > len)
95  goto exit; /* not enough characters in the string we were given */
96 
97  for (i = 0; i < ARRAY_SIZE(valid_ranges); ++i)
98  { /* just has to match one of these rows */
99  if (valid_ranges[i].len == charlen)
100  {
101  good = 1;
102  for (j = 0; j < charlen; ++j)
103  {
104  if (data[j] < valid_ranges[i].bytes[j].lower ||
105  data[j] > valid_ranges[i].bytes[j].upper)
106  {
107  good = 0; /* failed the check */
108  break;
109  }
110  }
111  if (good)
112  break;
113  }
114  }
115 
116  if (good)
117  rc = data + charlen;
118  exit:
119  return rc;
120 }
121 
122 
129 int UTF8_validate(int len, const char* data)
130 {
131  const char* curdata = NULL;
132  int rc = 0;
133 
134  FUNC_ENTRY;
135  if (len == 0 || data == NULL)
136  {
137  rc = 1;
138  goto exit;
139  }
140  curdata = UTF8_char_validate(len, data);
141  while (curdata && (curdata < data + len))
142  curdata = UTF8_char_validate((int)(data + len - curdata), curdata);
143 
144  rc = curdata != NULL;
145 exit:
146  FUNC_EXIT_RC(rc);
147  return rc;
148 }
149 
150 
156 int UTF8_validateString(const char* string)
157 {
158  int rc = 0;
159 
160  FUNC_ENTRY;
161  if (string != NULL)
162  {
163  rc = UTF8_validate((int)strlen(string), string);
164  }
165  FUNC_EXIT_RC(rc);
166  return rc;
167 }
168 
169 
170 
171 #if defined(UNIT_TESTS)
172 #include <stdio.h>
173 
174 typedef struct
175 {
176  int len;
177  char data[20];
178 } tests;
179 
180 tests valid_strings[] =
181 {
182  {3, "hjk" },
183  {7, {0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E} },
184  {3, {'f', 0xC9, 0xB1 } },
185  {9, {0xED, 0x95, 0x9C, 0xEA, 0xB5, 0xAD, 0xEC, 0x96, 0xB4} },
186  {9, {0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E} },
187  {4, {0x2F, 0x2E, 0x2E, 0x2F} },
188  {7, {0xEF, 0xBB, 0xBF, 0xF0, 0xA3, 0x8E, 0xB4} },
189 };
190 
191 tests invalid_strings[] =
192 {
193  {2, {0xC0, 0x80} },
194  {5, {0x2F, 0xC0, 0xAE, 0x2E, 0x2F} },
195  {6, {0xED, 0xA1, 0x8C, 0xED, 0xBE, 0xB4} },
196  {1, {0xF4} },
197 };
198 
199 int main (int argc, char *argv[])
200 {
201  int i, failed = 0;
202 
203  for (i = 0; i < ARRAY_SIZE(valid_strings); ++i)
204  {
205  if (!UTF8_validate(valid_strings[i].len, valid_strings[i].data))
206  {
207  printf("valid test %d failed\n", i);
208  failed = 1;
209  }
210  else
211  printf("valid test %d passed\n", i);
212  }
213 
214  for (i = 0; i < ARRAY_SIZE(invalid_strings); ++i)
215  {
216  if (UTF8_validate(invalid_strings[i].len, invalid_strings[i].data))
217  {
218  printf("invalid test %d failed\n", i);
219  failed = 1;
220  }
221  else
222  printf("invalid test %d passed\n", i);
223  }
224 
225  if (failed)
226  printf("Failed\n");
227  else
228  printf("Passed\n");
229 
230  //Don't crash on null data
231  UTF8_validateString(NULL);
232  UTF8_validate(1, NULL);
233  UTF8_char_validate(1, NULL);
234 
235  return 0;
236 } /* End of main function*/
237 
238 #endif
239 
int UTF8_validateString(const char *string)
Definition: utf-8.c:156
static const char * UTF8_char_validate(int len, const char *data)
Definition: utf-8.c:76
struct @73 valid_ranges[]
#define FUNC_EXIT_RC(x)
Definition: StackTrace.h:63
int UTF8_validate(int len, const char *data)
Definition: utf-8.c:129
char upper
Definition: utf-8.c:50
#define FUNC_ENTRY
Definition: StackTrace.h:55
#define ARRAY_SIZE(a)
Definition: utf-8.c:37
char lower
Definition: utf-8.c:49
dictionary data
Definition: mqtt_test.py:22
list tests
Definition: MQTTV311.py:899
enum MQTTReasonCodes rc
Definition: test10.c:1112
int main(int argc, char **argv)
Definition: lua.c:619
Definition: format.h:3618
int len
Definition: utf-8.c:46


plotjuggler
Author(s): Davide Faconti
autogenerated on Sun Dec 6 2020 04:02:48