DecodeUtf8Test.java
Go to the documentation of this file.
1 package com.google.protobuf;
2 
6 import java.nio.ByteBuffer;
7 import java.util.ArrayList;
8 import java.util.List;
9 import java.util.logging.Logger;
10 import junit.framework.TestCase;
11 
12 public class DecodeUtf8Test extends TestCase {
13  private static Logger logger = Logger.getLogger(DecodeUtf8Test.class.getName());
14 
15  private static final Processor SAFE_PROCESSOR = new SafeProcessor();
16  private static final Processor UNSAFE_PROCESSOR = new UnsafeProcessor();
17 
18  public void testRoundTripAllValidChars() throws Exception {
19  for (int i = Character.MIN_CODE_POINT; i < Character.MAX_CODE_POINT; i++) {
20  if (i < Character.MIN_SURROGATE || i > Character.MAX_SURROGATE) {
21  String str = new String(Character.toChars(i));
23  }
24  }
25  }
26 
27  // Test all 1, 2, 3 invalid byte combinations. Valid ones would have been covered above.
28 
29  public void testOneByte() throws Exception {
30  int valid = 0;
31  for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
32  ByteString bs = ByteString.copyFrom(new byte[] { (byte) i });
33  if (!bs.isValidUtf8()) {
34  assertInvalid(bs.toByteArray());
35  } else {
36  valid++;
37  }
38  }
39  assertEquals(IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, valid);
40  }
41 
42  public void testTwoBytes() throws Exception {
43  int valid = 0;
44  for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
45  for (int j = Byte.MIN_VALUE; j <= Byte.MAX_VALUE; j++) {
46  ByteString bs = ByteString.copyFrom(new byte[]{(byte) i, (byte) j});
47  if (!bs.isValidUtf8()) {
48  assertInvalid(bs.toByteArray());
49  } else {
50  valid++;
51  }
52  }
53  }
54  assertEquals(IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT, valid);
55  }
56 
57  public void testThreeBytes() throws Exception {
58  // Travis' OOM killer doesn't like this test
59  if (System.getenv("TRAVIS") == null) {
60  int count = 0;
61  int valid = 0;
62  for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
63  for (int j = Byte.MIN_VALUE; j <= Byte.MAX_VALUE; j++) {
64  for (int k = Byte.MIN_VALUE; k <= Byte.MAX_VALUE; k++) {
65  byte[] bytes = new byte[]{(byte) i, (byte) j, (byte) k};
66  ByteString bs = ByteString.copyFrom(bytes);
67  if (!bs.isValidUtf8()) {
69  } else {
70  valid++;
71  }
72  count++;
73  if (count % 1000000L == 0) {
74  logger.info("Processed " + (count / 1000000L) + " million characters");
75  }
76  }
77  }
78  }
79  assertEquals(IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT, valid);
80  }
81  }
82 
86  public void testInvalid_4BytesSamples() throws Exception {
87  // Bad trailing bytes
88  assertInvalid(0xF0, 0xA4, 0xAD, 0x7F);
89  assertInvalid(0xF0, 0xA4, 0xAD, 0xC0);
90 
91  // Special cases for byte2
92  assertInvalid(0xF0, 0x8F, 0xAD, 0xA2);
93  assertInvalid(0xF4, 0x90, 0xAD, 0xA2);
94  }
95 
96  public void testRealStrings() throws Exception {
97  // English
98  assertRoundTrips("The quick brown fox jumps over the lazy dog");
99  // German
100  assertRoundTrips("Quizdeltagerne spiste jordb\u00e6r med fl\u00f8de, mens cirkusklovnen");
101  // Japanese
103  "\u3044\u308d\u306f\u306b\u307b\u3078\u3068\u3061\u308a\u306c\u308b\u3092");
104  // Hebrew
106  "\u05d3\u05d2 \u05e1\u05e7\u05e8\u05df \u05e9\u05d8 \u05d1\u05d9\u05dd "
107  + "\u05de\u05d0\u05d5\u05db\u05d6\u05d1 \u05d5\u05dc\u05e4\u05ea\u05e2"
108  + " \u05de\u05e6\u05d0 \u05dc\u05d5 \u05d7\u05d1\u05e8\u05d4 "
109  + "\u05d0\u05d9\u05da \u05d4\u05e7\u05dc\u05d9\u05d8\u05d4");
110  // Thai
112  " \u0e08\u0e07\u0e1d\u0e48\u0e32\u0e1f\u0e31\u0e19\u0e1e\u0e31\u0e12"
113  + "\u0e19\u0e32\u0e27\u0e34\u0e0a\u0e32\u0e01\u0e32\u0e23");
114  // Chinese
116  "\u8fd4\u56de\u94fe\u4e2d\u7684\u4e0b\u4e00\u4e2a\u4ee3\u7406\u9879\u9009\u62e9\u5668");
117  // Chinese with 4-byte chars
118  assertRoundTrips("\uD841\uDF0E\uD841\uDF31\uD841\uDF79\uD843\uDC53\uD843\uDC78"
119  + "\uD843\uDC96\uD843\uDCCF\uD843\uDCD5\uD843\uDD15\uD843\uDD7C\uD843\uDD7F"
120  + "\uD843\uDE0E\uD843\uDE0F\uD843\uDE77\uD843\uDE9D\uD843\uDEA2");
121  // Mixed
123  "The quick brown \u3044\u308d\u306f\u306b\u307b\u3078\u8fd4\u56de\u94fe"
124  + "\u4e2d\u7684\u4e0b\u4e00");
125  }
126 
127  public void testOverlong() throws Exception {
128  assertInvalid(0xc0, 0xaf);
129  assertInvalid(0xe0, 0x80, 0xaf);
130  assertInvalid(0xf0, 0x80, 0x80, 0xaf);
131 
132  // Max overlong
133  assertInvalid(0xc1, 0xbf);
134  assertInvalid(0xe0, 0x9f, 0xbf);
135  assertInvalid(0xf0 ,0x8f, 0xbf, 0xbf);
136 
137  // null overlong
138  assertInvalid(0xc0, 0x80);
139  assertInvalid(0xe0, 0x80, 0x80);
140  assertInvalid(0xf0, 0x80, 0x80, 0x80);
141  }
142 
143  public void testIllegalCodepoints() throws Exception {
144  // Single surrogate
145  assertInvalid(0xed, 0xa0, 0x80);
146  assertInvalid(0xed, 0xad, 0xbf);
147  assertInvalid(0xed, 0xae, 0x80);
148  assertInvalid(0xed, 0xaf, 0xbf);
149  assertInvalid(0xed, 0xb0, 0x80);
150  assertInvalid(0xed, 0xbe, 0x80);
151  assertInvalid(0xed, 0xbf, 0xbf);
152 
153  // Paired surrogates
154  assertInvalid(0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80);
155  assertInvalid(0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf);
156  assertInvalid(0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80);
157  assertInvalid(0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf);
158  assertInvalid(0xed, 0xae, 0x80, 0xed, 0xb0, 0x80);
159  assertInvalid(0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf);
160  assertInvalid(0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80);
161  assertInvalid(0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf);
162  }
163 
164  public void testBufferSlice() throws Exception {
165  String str = "The quick brown fox jumps over the lazy dog";
166  assertRoundTrips(str, 10, 4);
167  assertRoundTrips(str, str.length(), 0);
168  }
169 
170  public void testInvalidBufferSlice() throws Exception {
171  byte[] bytes = "The quick brown fox jumps over the lazy dog".getBytes(Internal.UTF_8);
172  assertInvalidSlice(bytes, bytes.length - 3, 4);
173  assertInvalidSlice(bytes, bytes.length, 1);
174  assertInvalidSlice(bytes, bytes.length + 1, 0);
175  assertInvalidSlice(bytes, 0, bytes.length + 1);
176  }
177 
178  private void assertInvalid(int... bytesAsInt) throws Exception {
179  byte[] bytes = new byte[bytesAsInt.length];
180  for (int i = 0; i < bytesAsInt.length; i++) {
181  bytes[i] = (byte) bytesAsInt[i];
182  }
184  }
185 
186  private void assertInvalid(byte[] bytes) throws Exception {
187  try {
188  UNSAFE_PROCESSOR.decodeUtf8(bytes, 0, bytes.length);
189  fail();
190  } catch (InvalidProtocolBufferException e) {
191  // Expected.
192  }
193  try {
194  SAFE_PROCESSOR.decodeUtf8(bytes, 0, bytes.length);
195  fail();
196  } catch (InvalidProtocolBufferException e) {
197  // Expected.
198  }
199 
200  ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
201  direct.put(bytes);
202  direct.flip();
203  try {
204  UNSAFE_PROCESSOR.decodeUtf8(direct, 0, bytes.length);
205  fail();
206  } catch (InvalidProtocolBufferException e) {
207  // Expected.
208  }
209  try {
210  SAFE_PROCESSOR.decodeUtf8(direct, 0, bytes.length);
211  fail();
212  } catch (InvalidProtocolBufferException e) {
213  // Expected.
214  }
215 
216  ByteBuffer heap = ByteBuffer.allocate(bytes.length);
217  heap.put(bytes);
218  heap.flip();
219  try {
220  UNSAFE_PROCESSOR.decodeUtf8(heap, 0, bytes.length);
221  fail();
222  } catch (InvalidProtocolBufferException e) {
223  // Expected.
224  }
225  try {
226  SAFE_PROCESSOR.decodeUtf8(heap, 0, bytes.length);
227  fail();
228  } catch (InvalidProtocolBufferException e) {
229  // Expected.
230  }
231  }
232 
233  private void assertInvalidSlice(byte[] bytes, int index, int size) throws Exception {
234  try {
235  UNSAFE_PROCESSOR.decodeUtf8(bytes, index, size);
236  fail();
237  } catch (ArrayIndexOutOfBoundsException e) {
238  // Expected.
239  }
240  try {
241  SAFE_PROCESSOR.decodeUtf8(bytes, index, size);
242  fail();
243  } catch (ArrayIndexOutOfBoundsException e) {
244  // Expected.
245  }
246 
247  ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
248  direct.put(bytes);
249  direct.flip();
250  try {
251  UNSAFE_PROCESSOR.decodeUtf8(direct, index, size);
252  fail();
253  } catch (ArrayIndexOutOfBoundsException e) {
254  // Expected.
255  }
256  try {
257  SAFE_PROCESSOR.decodeUtf8(direct, index, size);
258  fail();
259  } catch (ArrayIndexOutOfBoundsException e) {
260  // Expected.
261  }
262 
263  ByteBuffer heap = ByteBuffer.allocate(bytes.length);
264  heap.put(bytes);
265  heap.flip();
266  try {
267  UNSAFE_PROCESSOR.decodeUtf8(heap, index, size);
268  fail();
269  } catch (ArrayIndexOutOfBoundsException e) {
270  // Expected.
271  }
272  try {
273  SAFE_PROCESSOR.decodeUtf8(heap, index, size);
274  fail();
275  } catch (ArrayIndexOutOfBoundsException e) {
276  // Expected.
277  }
278  }
279 
280  private void assertRoundTrips(String str) throws Exception {
281  assertRoundTrips(str, 0, -1);
282  }
283 
284  private void assertRoundTrips(String str, int index, int size) throws Exception {
285  byte[] bytes = str.getBytes(Internal.UTF_8);
286  if (size == -1) {
287  size = bytes.length;
288  }
289  assertDecode(new String(bytes, index, size, Internal.UTF_8),
290  UNSAFE_PROCESSOR.decodeUtf8(bytes, index, size));
291  assertDecode(new String(bytes, index, size, Internal.UTF_8),
292  SAFE_PROCESSOR.decodeUtf8(bytes, index, size));
293 
294  ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
295  direct.put(bytes);
296  direct.flip();
297  assertDecode(new String(bytes, index, size, Internal.UTF_8),
298  UNSAFE_PROCESSOR.decodeUtf8(direct, index, size));
299  assertDecode(new String(bytes, index, size, Internal.UTF_8),
300  SAFE_PROCESSOR.decodeUtf8(direct, index, size));
301 
302  ByteBuffer heap = ByteBuffer.allocate(bytes.length);
303  heap.put(bytes);
304  heap.flip();
305  assertDecode(new String(bytes, index, size, Internal.UTF_8),
306  UNSAFE_PROCESSOR.decodeUtf8(heap, index, size));
307  assertDecode(new String(bytes, index, size, Internal.UTF_8),
308  SAFE_PROCESSOR.decodeUtf8(heap, index, size));
309  }
310 
311  private void assertDecode(String expected, String actual) {
312  if (!expected.equals(actual)) {
313  fail("Failure: Expected (" + codepoints(expected) + ") Actual (" + codepoints(actual) + ")");
314  }
315  }
316 
317  private List<String> codepoints(String str) {
318  List<String> codepoints = new ArrayList<String>();
319  for (int i = 0; i < str.length(); i++) {
320  codepoints.add(Long.toHexString(str.charAt(i)));
321  }
322  return codepoints;
323  }
324 
325 }
com.google.protobuf.Utf8.SafeProcessor
Definition: Utf8.java:860
com.google.protobuf.DecodeUtf8Test.testTwoBytes
void testTwoBytes()
Definition: DecodeUtf8Test.java:42
com.google.protobuf.DecodeUtf8Test.assertRoundTrips
void assertRoundTrips(String str, int index, int size)
Definition: DecodeUtf8Test.java:284
com.google.protobuf
Definition: ProtoCaliperBenchmark.java:2
com.google.protobuf.Utf8
Definition: Utf8.java:76
bytes
uint8 bytes[10]
Definition: coded_stream_unittest.cc:153
com.google.protobuf.DecodeUtf8Test.assertInvalid
void assertInvalid(byte[] bytes)
Definition: DecodeUtf8Test.java:186
com.google.protobuf.DecodeUtf8Test.testOverlong
void testOverlong()
Definition: DecodeUtf8Test.java:127
com.google.protobuf.DecodeUtf8Test.assertInvalid
void assertInvalid(int... bytesAsInt)
Definition: DecodeUtf8Test.java:178
update_failure_list.str
str
Definition: update_failure_list.py:41
com.google.protobuf.DecodeUtf8Test.assertDecode
void assertDecode(String expected, String actual)
Definition: DecodeUtf8Test.java:311
com.google.protobuf.DecodeUtf8Test.codepoints
List< String > codepoints(String str)
Definition: DecodeUtf8Test.java:317
com.google.protobuf.Utf8.Processor
Definition: Utf8.java:380
com.google.protobuf.DecodeUtf8Test.testIllegalCodepoints
void testIllegalCodepoints()
Definition: DecodeUtf8Test.java:143
byte
SETUP_TEARDOWN_TESTCONTEXT typedef uint8_t byte
Definition: test_stream.cpp:12
com.google.protobuf.DecodeUtf8Test.testRoundTripAllValidChars
void testRoundTripAllValidChars()
Definition: DecodeUtf8Test.java:18
com.google.protobuf.DecodeUtf8Test.testInvalidBufferSlice
void testInvalidBufferSlice()
Definition: DecodeUtf8Test.java:170
i
int i
Definition: gmock-matchers_test.cc:764
com.google.protobuf.DecodeUtf8Test.testThreeBytes
void testThreeBytes()
Definition: DecodeUtf8Test.java:57
java
com.google.protobuf.DecodeUtf8Test.assertRoundTrips
void assertRoundTrips(String str)
Definition: DecodeUtf8Test.java:280
com.google.protobuf.DecodeUtf8Test.logger
static Logger logger
Definition: DecodeUtf8Test.java:13
size
GLsizeiptr size
Definition: glcorearb.h:2943
com.google.protobuf.DecodeUtf8Test
Definition: DecodeUtf8Test.java:12
com.google.protobuf.DecodeUtf8Test.testRealStrings
void testRealStrings()
Definition: DecodeUtf8Test.java:96
com.google
com
com.google.protobuf.DecodeUtf8Test.testOneByte
void testOneByte()
Definition: DecodeUtf8Test.java:29
com.google.protobuf.Internal
Definition: Internal.java:54
com.google.protobuf.InvalidProtocolBufferException
Definition: InvalidProtocolBufferException.java:41
com.google.protobuf.DecodeUtf8Test.testInvalid_4BytesSamples
void testInvalid_4BytesSamples()
Definition: DecodeUtf8Test.java:86
gmock_test_utils.TestCase
TestCase
Definition: gmock_test_utils.py:97
com.google.protobuf.Utf8.UnsafeProcessor
Definition: Utf8.java:1169
count
GLint GLsizei count
Definition: glcorearb.h:2830
index
GLuint index
Definition: glcorearb.h:3055
com.google.protobuf.DecodeUtf8Test.UNSAFE_PROCESSOR
static final Processor UNSAFE_PROCESSOR
Definition: DecodeUtf8Test.java:16
com.google.protobuf.DecodeUtf8Test.testBufferSlice
void testBufferSlice()
Definition: DecodeUtf8Test.java:164
com.google.protobuf.DecodeUtf8Test.assertInvalidSlice
void assertInvalidSlice(byte[] bytes, int index, int size)
Definition: DecodeUtf8Test.java:233
com.google.protobuf.DecodeUtf8Test.SAFE_PROCESSOR
static final Processor SAFE_PROCESSOR
Definition: DecodeUtf8Test.java:15
com.google.protobuf.ByteString
Definition: ByteString.java:67


libaditof
Author(s):
autogenerated on Wed May 21 2025 02:06:49