31 package com.google.protobuf;
33 import static java.
lang.Character.MIN_HIGH_SURROGATE;
34 import static java.
lang.Character.MIN_LOW_SURROGATE;
35 import static java.
lang.Character.MIN_SURROGATE;
37 import java.util.Random;
40 final class Utf8Utils {
41 private Utf8Utils() {}
43 static class MaxCodePoint {
50 private static int decode(String userFriendly) {
52 return Integer.decode(userFriendly);
53 }
catch (NumberFormatException ignored) {
54 if (userFriendly.matches(
"(?i)(?:American|English|ASCII)")) {
57 }
else if (userFriendly.matches(
"(?i)(?:Danish|Latin|Western.*European)")) {
61 }
else if (userFriendly.matches(
"(?i)(?:Greek|Cyrillic|European|ISO.?8859)")) {
64 }
else if (userFriendly.matches(
"(?i)(?:Chinese|Han|Asian|BMP)")) {
66 return Character.MIN_SUPPLEMENTARY_CODE_POINT;
67 }
else if (userFriendly.matches(
"(?i)(?:Cuneiform|rare|exotic|supplementary.*)")) {
69 return Character.MAX_CODE_POINT;
71 throw new IllegalArgumentException(
"Can't decode codepoint " + userFriendly);
76 public static MaxCodePoint valueOf(String userFriendly) {
77 return new MaxCodePoint(userFriendly);
80 public MaxCodePoint(String userFriendly) {
81 value = decode(userFriendly);
93 GMM_UTF8_DISTRIBUTION {
95 public int[] getDistribution() {
96 return new int[] {53059, 104, 0, 0};
99 GSR_UTF8_DISTRIBUTION {
101 public int[] getDistribution() {
102 return new int[] {119458, 74, 2706, 0};
106 public abstract int[] getDistribution();
117 static String[] randomStrings(
int stringCount,
int charCount, MaxCodePoint maxCodePoint) {
118 final long seed = 99;
119 final Random rnd =
new Random(seed);
120 String[]
strings =
new String[stringCount];
121 for (
int i = 0;
i < stringCount;
i++) {
122 strings[
i] = randomString(rnd, charCount, maxCodePoint);
134 static String randomString(Random rnd,
int charCount, MaxCodePoint maxCodePoint) {
135 StringBuilder
sb =
new StringBuilder();
136 for (
int i = 0;
i < charCount;
i++) {
139 codePoint = rnd.nextInt(maxCodePoint.value);
140 }
while (Utf8Utils.isSurrogate(codePoint));
141 sb.appendCodePoint(codePoint);
143 return sb.toString();
147 static boolean isSurrogate(
int c) {
148 return Character.MIN_HIGH_SURROGATE <= c && c <= Character.MAX_LOW_SURROGATE;
157 static String[] randomStringsWithDistribution(
158 int stringCount,
int charCount, Utf8Distribution utf8Distribution) {
159 final int[] distribution = utf8Distribution.getDistribution();
160 for (
int i = 0;
i < 3;
i++) {
161 distribution[
i + 1] += distribution[
i];
163 final long seed = 99;
164 final Random rnd =
new Random(seed);
165 String[]
strings =
new String[stringCount];
166 for (
int i = 0;
i < stringCount;
i++) {
167 StringBuilder
sb =
new StringBuilder();
168 for (
int j = 0; j < charCount; j++) {
171 codePoint = rnd.nextInt(distribution[3]);
172 if (codePoint < distribution[0]) {
175 }
else if (codePoint < distribution[1]) {
178 }
else if (codePoint < distribution[2]) {
180 sb.append(MIN_SURROGATE - 1);
183 sb.append(MIN_HIGH_SURROGATE);
184 sb.append(MIN_LOW_SURROGATE);
186 }
while (Utf8Utils.isSurrogate(codePoint));
187 sb.appendCodePoint(codePoint);