IsValidUtf8Test.java
Go to the documentation of this file.
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 import static com.google.protobuf.IsValidUtf8TestUtil.DIRECT_NIO_FACTORY;
34 import static com.google.protobuf.IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT;
35 import static com.google.protobuf.IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT;
36 import static com.google.protobuf.IsValidUtf8TestUtil.HEAP_NIO_FACTORY;
37 import static com.google.protobuf.IsValidUtf8TestUtil.LITERAL_FACTORY;
38 import static com.google.protobuf.IsValidUtf8TestUtil.testBytes;
39 
42 import junit.framework.TestCase;
43 
56 public class IsValidUtf8Test extends TestCase {
58  public void testIsValidUtf8_1Byte() {
59  testBytes(LITERAL_FACTORY, 1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT);
60  testBytes(HEAP_NIO_FACTORY, 1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT);
61  testBytes(DIRECT_NIO_FACTORY, 1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT);
62  }
63 
65  public void testIsValidUtf8_2Bytes() {
66  testBytes(LITERAL_FACTORY, 2, IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT);
67  testBytes(HEAP_NIO_FACTORY, 2, IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT);
68  testBytes(DIRECT_NIO_FACTORY, 2, IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT);
69  }
70 
72  public void testIsValidUtf8_3Bytes() {
73  // Travis' OOM killer doesn't like this test
74  if (System.getenv("TRAVIS") == null) {
75  testBytes(LITERAL_FACTORY, 3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT);
76  testBytes(HEAP_NIO_FACTORY, 3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT);
77  testBytes(DIRECT_NIO_FACTORY, 3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT);
78  }
79  }
80 
87  // Valid 4 byte.
88  assertValidUtf8(0xF0, 0xA4, 0xAD, 0xA2);
89 
90  // Bad trailing bytes
91  assertInvalidUtf8(0xF0, 0xA4, 0xAD, 0x7F);
92  assertInvalidUtf8(0xF0, 0xA4, 0xAD, 0xC0);
93 
94  // Special cases for byte2
95  assertInvalidUtf8(0xF0, 0x8F, 0xAD, 0xA2);
96  assertInvalidUtf8(0xF4, 0x90, 0xAD, 0xA2);
97  }
98 
100  public void testSomeSequences() {
101  // Empty
102  assertTrue(asBytes("").isValidUtf8());
103 
104  // One-byte characters, including control characters
105  assertTrue(asBytes("\u0000abc\u007f").isValidUtf8());
106 
107  // Two-byte characters
108  assertTrue(asBytes("\u00a2\u00a2").isValidUtf8());
109 
110  // Three-byte characters
111  assertTrue(asBytes("\u020ac\u020ac").isValidUtf8());
112 
113  // Four-byte characters
114  assertTrue(asBytes("\u024B62\u024B62").isValidUtf8());
115 
116  // Mixed string
117  assertTrue(asBytes("a\u020ac\u00a2b\\u024B62u020acc\u00a2de\u024B62").isValidUtf8());
118 
119  // Not a valid string
120  assertInvalidUtf8(-1, 0, -1, 0);
121  }
122 
123  private byte[] toByteArray(int... bytes) {
124  byte[] realBytes = new byte[bytes.length];
125  for (int i = 0; i < bytes.length; i++) {
126  realBytes[i] = (byte) bytes[i];
127  }
128  return realBytes;
129  }
130 
131  private void assertValidUtf8(ByteStringFactory factory, int[] bytes, boolean not) {
132  byte[] realBytes = toByteArray(bytes);
133  assertTrue(not ^ Utf8.isValidUtf8(realBytes));
134  assertTrue(not ^ Utf8.isValidUtf8(realBytes, 0, bytes.length));
135  ByteString leaf = factory.newByteString(realBytes);
136  ByteString sub = leaf.substring(0, bytes.length);
137  assertTrue(not ^ leaf.isValidUtf8());
138  assertTrue(not ^ sub.isValidUtf8());
139  ByteString[] ropes = {
140  RopeByteString.newInstanceForTest(ByteString.EMPTY, leaf),
141  RopeByteString.newInstanceForTest(ByteString.EMPTY, sub),
142  RopeByteString.newInstanceForTest(leaf, ByteString.EMPTY),
143  RopeByteString.newInstanceForTest(sub, ByteString.EMPTY),
144  RopeByteString.newInstanceForTest(sub, leaf)
145  };
146  for (ByteString rope : ropes) {
147  assertTrue(not ^ rope.isValidUtf8());
148  }
149  }
150 
151  private void assertValidUtf8(int... bytes) {
152  assertValidUtf8(LITERAL_FACTORY, bytes, false);
153  assertValidUtf8(HEAP_NIO_FACTORY, bytes, false);
154  assertValidUtf8(DIRECT_NIO_FACTORY, bytes, false);
155  }
156 
157  private void assertInvalidUtf8(int... bytes) {
158  assertValidUtf8(LITERAL_FACTORY, bytes, true);
159  assertValidUtf8(HEAP_NIO_FACTORY, bytes, true);
160  assertValidUtf8(DIRECT_NIO_FACTORY, bytes, true);
161  }
162 
163  private static ByteString asBytes(String s) {
164  return ByteString.copyFromUtf8(s);
165  }
166 
168  // A sanity check.
169  int actual = 0;
170  for (Shard shard : IsValidUtf8TestUtil.FOUR_BYTE_SHARDS) {
171  actual = (int) (actual + shard.expected);
172  }
173  assertEquals(IsValidUtf8TestUtil.EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT, actual);
174  }
175 }
s
XmlRpcServer s
com.google.protobuf.IsValidUtf8Test.toByteArray
byte[] toByteArray(int... bytes)
Definition: IsValidUtf8Test.java:123
com.google.protobuf
Definition: ProtoCaliperBenchmark.java:2
com.google.protobuf.ByteString.EMPTY
static final ByteString EMPTY
Definition: ByteString.java:85
com.google.protobuf.IsValidUtf8TestUtil
Definition: IsValidUtf8TestUtil.java:58
bytes
uint8 bytes[10]
Definition: coded_stream_unittest.cc:153
com.google.protobuf.IsValidUtf8TestUtil.Shard
Definition: IsValidUtf8TestUtil.java:167
com.google.protobuf.IsValidUtf8TestUtil.ByteStringFactory
Definition: IsValidUtf8TestUtil.java:63
com.google.protobuf.IsValidUtf8Test.assertValidUtf8
void assertValidUtf8(ByteStringFactory factory, int[] bytes, boolean not)
Definition: IsValidUtf8Test.java:131
com.google.protobuf.IsValidUtf8Test.asBytes
static ByteString asBytes(String s)
Definition: IsValidUtf8Test.java:163
com.google.protobuf.IsValidUtf8Test
Definition: IsValidUtf8Test.java:56
com.google.protobuf.IsValidUtf8Test.assertInvalidUtf8
void assertInvalidUtf8(int... bytes)
Definition: IsValidUtf8Test.java:157
com.google.protobuf.IsValidUtf8Test.testIsValidUtf8_4BytesSamples
void testIsValidUtf8_4BytesSamples()
Definition: IsValidUtf8Test.java:86
byte
SETUP_TEARDOWN_TESTCONTEXT typedef uint8_t byte
Definition: test_stream.cpp:12
com.google.protobuf.IsValidUtf8Test.testIsValidUtf8_3Bytes
void testIsValidUtf8_3Bytes()
Definition: IsValidUtf8Test.java:72
i
int i
Definition: gmock-matchers_test.cc:764
com.google.protobuf.IsValidUtf8Test.testShardsHaveExpectedRoundTrippables
void testShardsHaveExpectedRoundTrippables()
Definition: IsValidUtf8Test.java:167
com.google.protobuf.IsValidUtf8Test.testSomeSequences
void testSomeSequences()
Definition: IsValidUtf8Test.java:100
com.google.protobuf.IsValidUtf8Test.testIsValidUtf8_2Bytes
void testIsValidUtf8_2Bytes()
Definition: IsValidUtf8Test.java:65
com.google.protobuf.IsValidUtf8Test.testIsValidUtf8_1Byte
void testIsValidUtf8_1Byte()
Definition: IsValidUtf8Test.java:58
com.google
com
com.google.protobuf.IsValidUtf8Test.assertValidUtf8
void assertValidUtf8(int... bytes)
Definition: IsValidUtf8Test.java:151
gmock_test_utils.TestCase
TestCase
Definition: gmock_test_utils.py:97
com.google.protobuf.ByteString
Definition: ByteString.java:67


libaditof
Author(s):
autogenerated on Wed May 21 2025 02:06:54