test-idna.c
Go to the documentation of this file.
1 /* Copyright The libuv project and contributors. All rights reserved.
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to
5  * deal in the Software without restriction, including without limitation the
6  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7  * sell copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19  * IN THE SOFTWARE.
20  */
21 
22 #include "task.h"
23 #include "../src/idna.c"
24 #include <string.h>
25 
26 TEST_IMPL(utf8_decode1) {
27  const char* p;
28  char b[32];
29  int i;
30 
31  /* ASCII. */
32  p = b;
33  snprintf(b, sizeof(b), "%c\x7F", 0x00);
34  ASSERT(0 == uv__utf8_decode1(&p, b + sizeof(b)));
35  ASSERT(p == b + 1);
36  ASSERT(127 == uv__utf8_decode1(&p, b + sizeof(b)));
37  ASSERT(p == b + 2);
38 
39  /* Two-byte sequences. */
40  p = b;
41  snprintf(b, sizeof(b), "\xC2\x80\xDF\xBF");
42  ASSERT(128 == uv__utf8_decode1(&p, b + sizeof(b)));
43  ASSERT(p == b + 2);
44  ASSERT(0x7FF == uv__utf8_decode1(&p, b + sizeof(b)));
45  ASSERT(p == b + 4);
46 
47  /* Three-byte sequences. */
48  p = b;
49  snprintf(b, sizeof(b), "\xE0\xA0\x80\xEF\xBF\xBF");
50  ASSERT(0x800 == uv__utf8_decode1(&p, b + sizeof(b)));
51  ASSERT(p == b + 3);
52  ASSERT(0xFFFF == uv__utf8_decode1(&p, b + sizeof(b)));
53  ASSERT(p == b + 6);
54 
55  /* Four-byte sequences. */
56  p = b;
57  snprintf(b, sizeof(b), "\xF0\x90\x80\x80\xF4\x8F\xBF\xBF");
58  ASSERT(0x10000 == uv__utf8_decode1(&p, b + sizeof(b)));
59  ASSERT(p == b + 4);
60  ASSERT(0x10FFFF == uv__utf8_decode1(&p, b + sizeof(b)));
61  ASSERT(p == b + 8);
62 
63  /* Four-byte sequences > U+10FFFF; disallowed. */
64  p = b;
65  snprintf(b, sizeof(b), "\xF4\x90\xC0\xC0\xF7\xBF\xBF\xBF");
66  ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
67  ASSERT(p == b + 4);
68  ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
69  ASSERT(p == b + 8);
70 
71  /* Overlong; disallowed. */
72  p = b;
73  snprintf(b, sizeof(b), "\xC0\x80\xC1\x80");
74  ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
75  ASSERT(p == b + 2);
76  ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
77  ASSERT(p == b + 4);
78 
79  /* Surrogate pairs; disallowed. */
80  p = b;
81  snprintf(b, sizeof(b), "\xED\xA0\x80\xED\xA3\xBF");
82  ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
83  ASSERT(p == b + 3);
84  ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
85  ASSERT(p == b + 6);
86 
87  /* Simply illegal. */
88  p = b;
89  snprintf(b, sizeof(b), "\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
90 
91  for (i = 1; i <= 8; i++) {
92  ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
93  ASSERT(p == b + i);
94  }
95 
96  return 0;
97 }
98 
99 /* Doesn't work on z/OS because that platform uses EBCDIC, not ASCII. */
100 #ifndef __MVS__
101 
102 #define F(input, err) \
103  do { \
104  char d[256] = {0}; \
105  static const char s[] = "" input ""; \
106  ASSERT(err == uv__idna_toascii(s, s + sizeof(s) - 1, d, d + sizeof(d))); \
107  } while (0)
108 
109 #define T(input, expected) \
110  do { \
111  long n; \
112  char d1[256] = {0}; \
113  char d2[256] = {0}; \
114  static const char s[] = "" input ""; \
115  n = uv__idna_toascii(s, s + sizeof(s) - 1, d1, d1 + sizeof(d1)); \
116  ASSERT(n == sizeof(expected)); \
117  ASSERT(0 == memcmp(d1, expected, n)); \
118  /* Sanity check: encoding twice should not change the output. */ \
119  n = uv__idna_toascii(d1, d1 + strlen(d1), d2, d2 + sizeof(d2)); \
120  ASSERT(n == sizeof(expected)); \
121  ASSERT(0 == memcmp(d2, expected, n)); \
122  ASSERT(0 == memcmp(d1, d2, sizeof(d2))); \
123  } while (0)
124 
125 TEST_IMPL(idna_toascii) {
126  /* Illegal inputs. */
127  F("\xC0\x80\xC1\x80", UV_EINVAL); /* Overlong UTF-8 sequence. */
128  F("\xC0\x80\xC1\x80.com", UV_EINVAL); /* Overlong UTF-8 sequence. */
129  /* No conversion. */
130  T("", "");
131  T(".", ".");
132  T(".com", ".com");
133  T("example", "example");
134  T("example-", "example-");
135  T("straße.de", "xn--strae-oqa.de");
136  /* Test cases adapted from punycode.js. Most are from RFC 3492. */
137  T("foo.bar", "foo.bar");
138  T("mañana.com", "xn--maana-pta.com");
139  T("example.com.", "example.com.");
140  T("bücher.com", "xn--bcher-kva.com");
141  T("café.com", "xn--caf-dma.com");
142  T("café.café.com", "xn--caf-dma.xn--caf-dma.com");
143  T("☃-⌘.com", "xn----dqo34k.com");
144  T("퐀☃-⌘.com", "xn----dqo34kn65z.com");
145  T("💩.la", "xn--ls8h.la");
146  T("mañana.com", "xn--maana-pta.com");
147  T("mañana。com", "xn--maana-pta.com");
148  T("mañana.com", "xn--maana-pta.com");
149  T("mañana。com", "xn--maana-pta.com");
150  T("ü", "xn--tda");
151  T(".ü", ".xn--tda");
152  T("ü.ü", "xn--tda.xn--tda");
153  T("ü.ü.", "xn--tda.xn--tda.");
154  T("üëäö♥", "xn--4can8av2009b");
155  T("Willst du die Blüthe des frühen, die Früchte des späteren Jahres",
156  "xn--Willst du die Blthe des frhen, "
157  "die Frchte des spteren Jahres-x9e96lkal");
158  T("ليهمابتكلموشعربي؟", "xn--egbpdaj6bu4bxfgehfvwxn");
159  T("他们为什么不说中文", "xn--ihqwcrb4cv8a8dqg056pqjye");
160  T("他們爲什麽不說中文", "xn--ihqwctvzc91f659drss3x8bo0yb");
161  T("Pročprostěnemluvíčesky", "xn--Proprostnemluvesky-uyb24dma41a");
162  T("למההםפשוטלאמדבריםעברית", "xn--4dbcagdahymbxekheh6e0a7fei0b");
163  T("यहलोगहिन्दीक्योंनहींबोलसकतेहैं",
164  "xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd");
165  T("なぜみんな日本語を話してくれないのか",
166  "xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa");
167  T("세계의모든사람들이한국어를이해한다면얼마나좋을까",
168  "xn--989aomsvi5e83db1d2a355cv1e0vak1d"
169  "wrv93d5xbh15a0dt30a5jpsd879ccm6fea98c");
170  T("почемужеонинеговорятпорусски", "xn--b1abfaaepdrnnbgefbadotcwatmq2g4l");
171  T("PorquénopuedensimplementehablarenEspañol",
172  "xn--PorqunopuedensimplementehablarenEspaol-fmd56a");
173  T("TạisaohọkhôngthểchỉnóitiếngViệt",
174  "xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g");
175  T("3年B組金八先生", "xn--3B-ww4c5e180e575a65lsy2b");
176  T("安室奈美恵-with-SUPER-MONKEYS",
177  "xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n");
178  T("Hello-Another-Way-それぞれの場所",
179  "xn--Hello-Another-Way--fc4qua05auwb3674vfr0b");
180  T("ひとつ屋根の下2", "xn--2-u9tlzr9756bt3uc0v");
181  T("MajiでKoiする5秒前", "xn--MajiKoi5-783gue6qz075azm5e");
182  T("パフィーdeルンバ", "xn--de-jg4avhby1noc0d");
183  T("そのスピードで", "xn--d9juau41awczczp");
184  T("-> $1.00 <-", "-> $1.00 <-");
185  /* Test cases from https://unicode.org/reports/tr46/ */
186  T("faß.de", "xn--fa-hia.de");
187  T("βόλος.com", "xn--nxasmm1c.com");
188  T("ශ්‍රී.com", "xn--10cl1a0b660p.com");
189  T("نامه‌ای.com", "xn--mgba3gch31f060k.com");
190  return 0;
191 }
192 
193 #undef T
194 
195 #endif /* __MVS__ */
task.h
uv__utf8_decode1
unsigned uv__utf8_decode1(const char **p, const char *pe)
Definition: idna.c:88
string.h
T
#define T(input, expected)
Definition: test-idna.c:109
ASSERT
#define ASSERT(expr)
Definition: task.h:102
xds_manager.p
p
Definition: xds_manager.py:60
b
uint64_t b
Definition: abseil-cpp/absl/container/internal/layout_test.cc:53
F
#define F(input, err)
Definition: test-idna.c:102
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
TEST_IMPL
TEST_IMPL(utf8_decode1)
Definition: test-idna.c:26


grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:26