Utf8.java
Go to the documentation of this file.
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 import static com.google.protobuf.UnsafeUtil.addressOffset;
34 import static com.google.protobuf.UnsafeUtil.hasUnsafeArrayOperations;
35 import static com.google.protobuf.UnsafeUtil.hasUnsafeByteBufferOperations;
36 import static java.lang.Character.MAX_SURROGATE;
37 import static java.lang.Character.MIN_HIGH_SURROGATE;
38 import static java.lang.Character.MIN_LOW_SURROGATE;
39 import static java.lang.Character.MIN_SUPPLEMENTARY_CODE_POINT;
40 import static java.lang.Character.MIN_SURROGATE;
41 import static java.lang.Character.isSurrogatePair;
42 import static java.lang.Character.toCodePoint;
43 
44 import java.nio.ByteBuffer;
45 
75 // TODO(nathanmittler): Copy changes in this class back to Guava
76 final class Utf8 {
77 
83  private static final Processor processor =
84  (UnsafeProcessor.isAvailable() && !Android.isOnAndroidDevice())
85  ? new UnsafeProcessor()
86  : new SafeProcessor();
87 
92  private static final long ASCII_MASK_LONG = 0x8080808080808080L;
93 
99  static final int MAX_BYTES_PER_CHAR = 3;
100 
105  public static final int COMPLETE = 0;
106 
108  public static final int MALFORMED = -1;
109 
119  private static final int UNSAFE_COUNT_ASCII_THRESHOLD = 16;
120 
121  // Other state values include the partial bytes of the incomplete
122  // character to be decoded in the simplest way: we pack the bytes
123  // into the state int in little-endian order. For example:
124  //
125  // int state = byte1 ^ (byte2 << 8) ^ (byte3 << 16);
126  //
127  // Such a state is unpacked thus (note the ~ operation for byte2 to
128  // undo byte1's sign-extension bits):
129  //
130  // int byte1 = (byte) state;
131  // int byte2 = (byte) ~(state >> 8);
132  // int byte3 = (byte) (state >> 16);
133  //
134  // We cannot store a zero byte in the state because it would be
135  // indistinguishable from the absence of a byte. But we don't need
136  // to, because partial bytes must always be negative. When building
137  // a state, we ensure that byte1 is negative and subsequent bytes
138  // are valid trailing bytes.
139 
146  public static boolean isValidUtf8(byte[] bytes) {
147  return processor.isValidUtf8(bytes, 0, bytes.length);
148  }
149 
158  public static boolean isValidUtf8(byte[] bytes, int index, int limit) {
159  return processor.isValidUtf8(bytes, index, limit);
160  }
161 
175  public static int partialIsValidUtf8(int state, byte[] bytes, int index, int limit) {
176  return processor.partialIsValidUtf8(state, bytes, index, limit);
177  }
178 
179  private static int incompleteStateFor(int byte1) {
180  return (byte1 > (byte) 0xF4) ? MALFORMED : byte1;
181  }
182 
183  private static int incompleteStateFor(int byte1, int byte2) {
184  return (byte1 > (byte) 0xF4 || byte2 > (byte) 0xBF) ? MALFORMED : byte1 ^ (byte2 << 8);
185  }
186 
187  private static int incompleteStateFor(int byte1, int byte2, int byte3) {
188  return (byte1 > (byte) 0xF4 || byte2 > (byte) 0xBF || byte3 > (byte) 0xBF)
189  ? MALFORMED
190  : byte1 ^ (byte2 << 8) ^ (byte3 << 16);
191  }
192 
193  private static int incompleteStateFor(byte[] bytes, int index, int limit) {
194  int byte1 = bytes[index - 1];
195  switch (limit - index) {
196  case 0:
197  return incompleteStateFor(byte1);
198  case 1:
199  return incompleteStateFor(byte1, bytes[index]);
200  case 2:
201  return incompleteStateFor(byte1, bytes[index], bytes[index + 1]);
202  default:
203  throw new AssertionError();
204  }
205  }
206 
207  private static int incompleteStateFor(
208  final ByteBuffer buffer, final int byte1, final int index, final int remaining) {
209  switch (remaining) {
210  case 0:
211  return incompleteStateFor(byte1);
212  case 1:
213  return incompleteStateFor(byte1, buffer.get(index));
214  case 2:
215  return incompleteStateFor(byte1, buffer.get(index), buffer.get(index + 1));
216  default:
217  throw new AssertionError();
218  }
219  }
220 
221  // These UTF-8 handling methods are copied from Guava's Utf8 class with a modification to throw
222  // a protocol buffer local exception. This exception is then caught in CodedOutputStream so it can
223  // fallback to more lenient behavior.
224 
225  static class UnpairedSurrogateException extends IllegalArgumentException {
226  UnpairedSurrogateException(int index, int length) {
227  super("Unpaired surrogate at index " + index + " of " + length);
228  }
229  }
230 
239  static int encodedLength(CharSequence sequence) {
240  // Warning to maintainers: this implementation is highly optimized.
241  int utf16Length = sequence.length();
242  int utf8Length = utf16Length;
243  int i = 0;
244 
245  // This loop optimizes for pure ASCII.
246  while (i < utf16Length && sequence.charAt(i) < 0x80) {
247  i++;
248  }
249 
250  // This loop optimizes for chars less than 0x800.
251  for (; i < utf16Length; i++) {
252  char c = sequence.charAt(i);
253  if (c < 0x800) {
254  utf8Length += ((0x7f - c) >>> 31); // branch free!
255  } else {
256  utf8Length += encodedLengthGeneral(sequence, i);
257  break;
258  }
259  }
260 
261  if (utf8Length < utf16Length) {
262  // Necessary and sufficient condition for overflow because of maximum 3x expansion
263  throw new IllegalArgumentException(
264  "UTF-8 length does not fit in int: " + (utf8Length + (1L << 32)));
265  }
266  return utf8Length;
267  }
268 
269  private static int encodedLengthGeneral(CharSequence sequence, int start) {
270  int utf16Length = sequence.length();
271  int utf8Length = 0;
272  for (int i = start; i < utf16Length; i++) {
273  char c = sequence.charAt(i);
274  if (c < 0x800) {
275  utf8Length += (0x7f - c) >>> 31; // branch free!
276  } else {
277  utf8Length += 2;
278  // jdk7+: if (Character.isSurrogate(c)) {
279  if (Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE) {
280  // Check that we have a well-formed surrogate pair.
281  int cp = Character.codePointAt(sequence, i);
282  if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
283  throw new UnpairedSurrogateException(i, utf16Length);
284  }
285  i++;
286  }
287  }
288  }
289  return utf8Length;
290  }
291 
292  static int encode(CharSequence in, byte[] out, int offset, int length) {
293  return processor.encodeUtf8(in, out, offset, length);
294  }
295  // End Guava UTF-8 methods.
296 
306  static boolean isValidUtf8(ByteBuffer buffer) {
307  return processor.isValidUtf8(buffer, buffer.position(), buffer.remaining());
308  }
309 
319  static int partialIsValidUtf8(int state, ByteBuffer buffer, int index, int limit) {
320  return processor.partialIsValidUtf8(state, buffer, index, limit);
321  }
322 
328  static String decodeUtf8(ByteBuffer buffer, int index, int size)
329  throws InvalidProtocolBufferException {
330  return processor.decodeUtf8(buffer, index, size);
331  }
332 
338  static String decodeUtf8(byte[] bytes, int index, int size)
339  throws InvalidProtocolBufferException {
340  return processor.decodeUtf8(bytes, index, size);
341  }
342 
353  static void encodeUtf8(CharSequence in, ByteBuffer out) {
354  processor.encodeUtf8(in, out);
355  }
356 
368  private static int estimateConsecutiveAscii(ByteBuffer buffer, int index, int limit) {
369  int i = index;
370  final int lim = limit - 7;
371  // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
372  // To speed things up further, we're reading longs instead of bytes so we use a mask to
373  // determine if any byte in the current long is non-ASCII.
374  for (; i < lim && (buffer.getLong(i) & ASCII_MASK_LONG) == 0; i += 8) {}
375  return i - index;
376  }
377 
379  // TODO(nathanmittler): Add support for Memory/MemoryBlock on Android.
380  abstract static class Processor {
389  final boolean isValidUtf8(byte[] bytes, int index, int limit) {
390  return partialIsValidUtf8(COMPLETE, bytes, index, limit) == COMPLETE;
391  }
392 
406  abstract int partialIsValidUtf8(int state, byte[] bytes, int index, int limit);
407 
416  final boolean isValidUtf8(ByteBuffer buffer, int index, int limit) {
417  return partialIsValidUtf8(COMPLETE, buffer, index, limit) == COMPLETE;
418  }
419 
426  final int partialIsValidUtf8(
427  final int state, final ByteBuffer buffer, int index, final int limit) {
428  if (buffer.hasArray()) {
429  final int offset = buffer.arrayOffset();
430  return partialIsValidUtf8(state, buffer.array(), offset + index, offset + limit);
431  } else if (buffer.isDirect()) {
432  return partialIsValidUtf8Direct(state, buffer, index, limit);
433  }
434  return partialIsValidUtf8Default(state, buffer, index, limit);
435  }
436 
438  abstract int partialIsValidUtf8Direct(
439  final int state, final ByteBuffer buffer, int index, final int limit);
440 
446  final int partialIsValidUtf8Default(
447  final int state, final ByteBuffer buffer, int index, final int limit) {
448  if (state != COMPLETE) {
449  // The previous decoding operation was incomplete (or malformed).
450  // We look for a well-formed sequence consisting of bytes from
451  // the previous decoding operation (stored in state) together
452  // with bytes from the array slice.
453  //
454  // We expect such "straddler characters" to be rare.
455 
456  if (index >= limit) { // No bytes? No progress.
457  return state;
458  }
459 
460  byte byte1 = (byte) state;
461  // byte1 is never ASCII.
462  if (byte1 < (byte) 0xE0) {
463  // two-byte form
464 
465  // Simultaneously checks for illegal trailing-byte in
466  // leading position and overlong 2-byte form.
467  if (byte1 < (byte) 0xC2
468  // byte2 trailing-byte test
469  || buffer.get(index++) > (byte) 0xBF) {
470  return MALFORMED;
471  }
472  } else if (byte1 < (byte) 0xF0) {
473  // three-byte form
474 
475  // Get byte2 from saved state or array
476  byte byte2 = (byte) ~(state >> 8);
477  if (byte2 == 0) {
478  byte2 = buffer.get(index++);
479  if (index >= limit) {
480  return incompleteStateFor(byte1, byte2);
481  }
482  }
483  if (byte2 > (byte) 0xBF
484  // overlong? 5 most significant bits must not all be zero
485  || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
486  // illegal surrogate codepoint?
487  || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
488  // byte3 trailing-byte test
489  || buffer.get(index++) > (byte) 0xBF) {
490  return MALFORMED;
491  }
492  } else {
493  // four-byte form
494 
495  // Get byte2 and byte3 from saved state or array
496  byte byte2 = (byte) ~(state >> 8);
497  byte byte3 = 0;
498  if (byte2 == 0) {
499  byte2 = buffer.get(index++);
500  if (index >= limit) {
501  return incompleteStateFor(byte1, byte2);
502  }
503  } else {
504  byte3 = (byte) (state >> 16);
505  }
506  if (byte3 == 0) {
507  byte3 = buffer.get(index++);
508  if (index >= limit) {
509  return incompleteStateFor(byte1, byte2, byte3);
510  }
511  }
512 
513  // If we were called with state == MALFORMED, then byte1 is 0xFF,
514  // which never occurs in well-formed UTF-8, and so we will return
515  // MALFORMED again below.
516 
517  if (byte2 > (byte) 0xBF
518  // Check that 1 <= plane <= 16. Tricky optimized form of:
519  // if (byte1 > (byte) 0xF4 ||
520  // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
521  // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
522  || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
523  // byte3 trailing-byte test
524  || byte3 > (byte) 0xBF
525  // byte4 trailing-byte test
526  || buffer.get(index++) > (byte) 0xBF) {
527  return MALFORMED;
528  }
529  }
530  }
531 
532  // Finish validation for the sequence.
533  return partialIsValidUtf8(buffer, index, limit);
534  }
535 
540  private static int partialIsValidUtf8(final ByteBuffer buffer, int index, final int limit) {
541  index += estimateConsecutiveAscii(buffer, index, limit);
542 
543  for (; ; ) {
544  // Optimize for interior runs of ASCII bytes.
545  // TODO(nathanmittler): Consider checking 8 bytes at a time after some threshold?
546  // Maybe after seeing a few in a row that are ASCII, go back to fast mode?
547  int byte1;
548  do {
549  if (index >= limit) {
550  return COMPLETE;
551  }
552  } while ((byte1 = buffer.get(index++)) >= 0);
553 
554  // If we're here byte1 is not ASCII. Only need to handle 2-4 byte forms.
555  if (byte1 < (byte) 0xE0) {
556  // Two-byte form (110xxxxx 10xxxxxx)
557  if (index >= limit) {
558  // Incomplete sequence
559  return byte1;
560  }
561 
562  // Simultaneously checks for illegal trailing-byte in
563  // leading position and overlong 2-byte form.
564  if (byte1 < (byte) 0xC2 || buffer.get(index) > (byte) 0xBF) {
565  return MALFORMED;
566  }
567  index++;
568  } else if (byte1 < (byte) 0xF0) {
569  // Three-byte form (1110xxxx 10xxxxxx 10xxxxxx)
570  if (index >= limit - 1) {
571  // Incomplete sequence
572  return incompleteStateFor(buffer, byte1, index, limit - index);
573  }
574 
575  final byte byte2 = buffer.get(index++);
576  if (byte2 > (byte) 0xBF
577  // overlong? 5 most significant bits must not all be zero
578  || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
579  // check for illegal surrogate codepoints
580  || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
581  // byte3 trailing-byte test
582  || buffer.get(index) > (byte) 0xBF) {
583  return MALFORMED;
584  }
585  index++;
586  } else {
587  // Four-byte form (1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx)
588  if (index >= limit - 2) {
589  // Incomplete sequence
590  return incompleteStateFor(buffer, byte1, index, limit - index);
591  }
592 
593  // TODO(nathanmittler): Consider using getInt() to improve performance.
594  final int byte2 = buffer.get(index++);
595  if (byte2 > (byte) 0xBF
596  // Check that 1 <= plane <= 16. Tricky optimized form of:
597  // if (byte1 > (byte) 0xF4 ||
598  // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
599  // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
600  || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
601  // byte3 trailing-byte test
602  || buffer.get(index++) > (byte) 0xBF
603  // byte4 trailing-byte test
604  || buffer.get(index++) > (byte) 0xBF) {
605  return MALFORMED;
606  }
607  }
608  }
609  }
610 
616  abstract String decodeUtf8(byte[] bytes, int index, int size)
617  throws InvalidProtocolBufferException;
618 
624  final String decodeUtf8(ByteBuffer buffer, int index, int size)
625  throws InvalidProtocolBufferException {
626  if (buffer.hasArray()) {
627  final int offset = buffer.arrayOffset();
628  return decodeUtf8(buffer.array(), offset + index, size);
629  } else if (buffer.isDirect()) {
630  return decodeUtf8Direct(buffer, index, size);
631  }
632  return decodeUtf8Default(buffer, index, size);
633  }
634 
636  abstract String decodeUtf8Direct(ByteBuffer buffer, int index, int size)
637  throws InvalidProtocolBufferException;
638 
643  final String decodeUtf8Default(ByteBuffer buffer, int index, int size)
644  throws InvalidProtocolBufferException {
645  // Bitwise OR combines the sign bits so any negative value fails the check.
646  if ((index | size | buffer.limit() - index - size) < 0) {
647  throw new ArrayIndexOutOfBoundsException(
648  String.format("buffer limit=%d, index=%d, limit=%d", buffer.limit(), index, size));
649  }
650 
651  int offset = index;
652  final int limit = offset + size;
653 
654  // The longest possible resulting String is the same as the number of input bytes, when it is
655  // all ASCII. For other cases, this over-allocates and we will truncate in the end.
656  char[] resultArr = new char[size];
657  int resultPos = 0;
658 
659  // Optimize for 100% ASCII (Hotspot loves small simple top-level loops like this).
660  // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
661  while (offset < limit) {
662  byte b = buffer.get(offset);
663  if (!DecodeUtil.isOneByte(b)) {
664  break;
665  }
666  offset++;
667  DecodeUtil.handleOneByte(b, resultArr, resultPos++);
668  }
669 
670  while (offset < limit) {
671  byte byte1 = buffer.get(offset++);
672  if (DecodeUtil.isOneByte(byte1)) {
673  DecodeUtil.handleOneByte(byte1, resultArr, resultPos++);
674  // It's common for there to be multiple ASCII characters in a run mixed in, so add an
675  // extra optimized loop to take care of these runs.
676  while (offset < limit) {
677  byte b = buffer.get(offset);
678  if (!DecodeUtil.isOneByte(b)) {
679  break;
680  }
681  offset++;
682  DecodeUtil.handleOneByte(b, resultArr, resultPos++);
683  }
684  } else if (DecodeUtil.isTwoBytes(byte1)) {
685  if (offset >= limit) {
686  throw InvalidProtocolBufferException.invalidUtf8();
687  }
688  DecodeUtil.handleTwoBytes(
689  byte1, /* byte2 */ buffer.get(offset++), resultArr, resultPos++);
690  } else if (DecodeUtil.isThreeBytes(byte1)) {
691  if (offset >= limit - 1) {
692  throw InvalidProtocolBufferException.invalidUtf8();
693  }
694  DecodeUtil.handleThreeBytes(
695  byte1,
696  /* byte2 */ buffer.get(offset++),
697  /* byte3 */ buffer.get(offset++),
698  resultArr,
699  resultPos++);
700  } else {
701  if (offset >= limit - 2) {
702  throw InvalidProtocolBufferException.invalidUtf8();
703  }
704  DecodeUtil.handleFourBytes(
705  byte1,
706  /* byte2 */ buffer.get(offset++),
707  /* byte3 */ buffer.get(offset++),
708  /* byte4 */ buffer.get(offset++),
709  resultArr,
710  resultPos++);
711  // 4-byte case requires two chars.
712  resultPos++;
713  }
714  }
715 
716  return new String(resultArr, 0, resultPos);
717  }
718 
749  abstract int encodeUtf8(CharSequence in, byte[] out, int offset, int length);
750 
768  final void encodeUtf8(CharSequence in, ByteBuffer out) {
769  if (out.hasArray()) {
770  final int offset = out.arrayOffset();
771  int endIndex = Utf8.encode(in, out.array(), offset + out.position(), out.remaining());
772  out.position(endIndex - offset);
773  } else if (out.isDirect()) {
774  encodeUtf8Direct(in, out);
775  } else {
776  encodeUtf8Default(in, out);
777  }
778  }
779 
781  abstract void encodeUtf8Direct(CharSequence in, ByteBuffer out);
782 
787  final void encodeUtf8Default(CharSequence in, ByteBuffer out) {
788  final int inLength = in.length();
789  int outIx = out.position();
790  int inIx = 0;
791 
792  // Since ByteBuffer.putXXX() already checks boundaries for us, no need to explicitly check
793  // access. Assume the buffer is big enough and let it handle the out of bounds exception
794  // if it occurs.
795  try {
796  // Designed to take advantage of
797  // https://wiki.openjdk.java.net/display/HotSpotInternals/RangeCheckElimination
798  for (char c; inIx < inLength && (c = in.charAt(inIx)) < 0x80; ++inIx) {
799  out.put(outIx + inIx, (byte) c);
800  }
801  if (inIx == inLength) {
802  // Successfully encoded the entire string.
803  out.position(outIx + inIx);
804  return;
805  }
806 
807  outIx += inIx;
808  for (char c; inIx < inLength; ++inIx, ++outIx) {
809  c = in.charAt(inIx);
810  if (c < 0x80) {
811  // One byte (0xxx xxxx)
812  out.put(outIx, (byte) c);
813  } else if (c < 0x800) {
814  // Two bytes (110x xxxx 10xx xxxx)
815 
816  // Benchmarks show put performs better than putShort here (for HotSpot).
817  out.put(outIx++, (byte) (0xC0 | (c >>> 6)));
818  out.put(outIx, (byte) (0x80 | (0x3F & c)));
819  } else if (c < MIN_SURROGATE || MAX_SURROGATE < c) {
820  // Three bytes (1110 xxxx 10xx xxxx 10xx xxxx)
821  // Maximum single-char code point is 0xFFFF, 16 bits.
822 
823  // Benchmarks show put performs better than putShort here (for HotSpot).
824  out.put(outIx++, (byte) (0xE0 | (c >>> 12)));
825  out.put(outIx++, (byte) (0x80 | (0x3F & (c >>> 6))));
826  out.put(outIx, (byte) (0x80 | (0x3F & c)));
827  } else {
828  // Four bytes (1111 xxxx 10xx xxxx 10xx xxxx 10xx xxxx)
829 
830  // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8
831  // bytes
832  final char low;
833  if (inIx + 1 == inLength || !isSurrogatePair(c, (low = in.charAt(++inIx)))) {
834  throw new UnpairedSurrogateException(inIx, inLength);
835  }
836  // TODO(nathanmittler): Consider using putInt() to improve performance.
837  int codePoint = toCodePoint(c, low);
838  out.put(outIx++, (byte) ((0xF << 4) | (codePoint >>> 18)));
839  out.put(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 12))));
840  out.put(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 6))));
841  out.put(outIx, (byte) (0x80 | (0x3F & codePoint)));
842  }
843  }
844 
845  // Successfully encoded the entire string.
846  out.position(outIx);
847  } catch (IndexOutOfBoundsException e) {
848  // TODO(nathanmittler): Consider making the API throw IndexOutOfBoundsException instead.
849 
850  // If we failed in the outer ASCII loop, outIx will not have been updated. In this case,
851  // use inIx to determine the bad write index.
852  int badWriteIndex = out.position() + Math.max(inIx, outIx - out.position() + 1);
853  throw new ArrayIndexOutOfBoundsException(
854  "Failed writing " + in.charAt(inIx) + " at index " + badWriteIndex);
855  }
856  }
857  }
858 
860  static final class SafeProcessor extends Processor {
861  @Override
862  int partialIsValidUtf8(int state, byte[] bytes, int index, int limit) {
863  if (state != COMPLETE) {
864  // The previous decoding operation was incomplete (or malformed).
865  // We look for a well-formed sequence consisting of bytes from
866  // the previous decoding operation (stored in state) together
867  // with bytes from the array slice.
868  //
869  // We expect such "straddler characters" to be rare.
870 
871  if (index >= limit) { // No bytes? No progress.
872  return state;
873  }
874  int byte1 = (byte) state;
875  // byte1 is never ASCII.
876  if (byte1 < (byte) 0xE0) {
877  // two-byte form
878 
879  // Simultaneously checks for illegal trailing-byte in
880  // leading position and overlong 2-byte form.
881  if (byte1 < (byte) 0xC2
882  // byte2 trailing-byte test
883  || bytes[index++] > (byte) 0xBF) {
884  return MALFORMED;
885  }
886  } else if (byte1 < (byte) 0xF0) {
887  // three-byte form
888 
889  // Get byte2 from saved state or array
890  int byte2 = (byte) ~(state >> 8);
891  if (byte2 == 0) {
892  byte2 = bytes[index++];
893  if (index >= limit) {
894  return incompleteStateFor(byte1, byte2);
895  }
896  }
897  if (byte2 > (byte) 0xBF
898  // overlong? 5 most significant bits must not all be zero
899  || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
900  // illegal surrogate codepoint?
901  || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
902  // byte3 trailing-byte test
903  || bytes[index++] > (byte) 0xBF) {
904  return MALFORMED;
905  }
906  } else {
907  // four-byte form
908 
909  // Get byte2 and byte3 from saved state or array
910  int byte2 = (byte) ~(state >> 8);
911  int byte3 = 0;
912  if (byte2 == 0) {
913  byte2 = bytes[index++];
914  if (index >= limit) {
915  return incompleteStateFor(byte1, byte2);
916  }
917  } else {
918  byte3 = (byte) (state >> 16);
919  }
920  if (byte3 == 0) {
921  byte3 = bytes[index++];
922  if (index >= limit) {
923  return incompleteStateFor(byte1, byte2, byte3);
924  }
925  }
926 
927  // If we were called with state == MALFORMED, then byte1 is 0xFF,
928  // which never occurs in well-formed UTF-8, and so we will return
929  // MALFORMED again below.
930 
931  if (byte2 > (byte) 0xBF
932  // Check that 1 <= plane <= 16. Tricky optimized form of:
933  // if (byte1 > (byte) 0xF4 ||
934  // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
935  // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
936  || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
937  // byte3 trailing-byte test
938  || byte3 > (byte) 0xBF
939  // byte4 trailing-byte test
940  || bytes[index++] > (byte) 0xBF) {
941  return MALFORMED;
942  }
943  }
944  }
945 
946  return partialIsValidUtf8(bytes, index, limit);
947  }
948 
949  @Override
950  int partialIsValidUtf8Direct(int state, ByteBuffer buffer, int index, int limit) {
951  // For safe processing, we have to use the ByteBuffer API.
952  return partialIsValidUtf8Default(state, buffer, index, limit);
953  }
954 
955  @Override
956  String decodeUtf8(byte[] bytes, int index, int size) throws InvalidProtocolBufferException {
957  // Bitwise OR combines the sign bits so any negative value fails the check.
958  if ((index | size | bytes.length - index - size) < 0) {
959  throw new ArrayIndexOutOfBoundsException(
960  String.format("buffer length=%d, index=%d, size=%d", bytes.length, index, size));
961  }
962 
963  int offset = index;
964  final int limit = offset + size;
965 
966  // The longest possible resulting String is the same as the number of input bytes, when it is
967  // all ASCII. For other cases, this over-allocates and we will truncate in the end.
968  char[] resultArr = new char[size];
969  int resultPos = 0;
970 
971  // Optimize for 100% ASCII (Hotspot loves small simple top-level loops like this).
972  // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
973  while (offset < limit) {
974  byte b = bytes[offset];
975  if (!DecodeUtil.isOneByte(b)) {
976  break;
977  }
978  offset++;
979  DecodeUtil.handleOneByte(b, resultArr, resultPos++);
980  }
981 
982  while (offset < limit) {
983  byte byte1 = bytes[offset++];
984  if (DecodeUtil.isOneByte(byte1)) {
985  DecodeUtil.handleOneByte(byte1, resultArr, resultPos++);
986  // It's common for there to be multiple ASCII characters in a run mixed in, so add an
987  // extra optimized loop to take care of these runs.
988  while (offset < limit) {
989  byte b = bytes[offset];
990  if (!DecodeUtil.isOneByte(b)) {
991  break;
992  }
993  offset++;
994  DecodeUtil.handleOneByte(b, resultArr, resultPos++);
995  }
996  } else if (DecodeUtil.isTwoBytes(byte1)) {
997  if (offset >= limit) {
998  throw InvalidProtocolBufferException.invalidUtf8();
999  }
1000  DecodeUtil.handleTwoBytes(byte1, /* byte2 */ bytes[offset++], resultArr, resultPos++);
1001  } else if (DecodeUtil.isThreeBytes(byte1)) {
1002  if (offset >= limit - 1) {
1003  throw InvalidProtocolBufferException.invalidUtf8();
1004  }
1005  DecodeUtil.handleThreeBytes(
1006  byte1,
1007  /* byte2 */ bytes[offset++],
1008  /* byte3 */ bytes[offset++],
1009  resultArr,
1010  resultPos++);
1011  } else {
1012  if (offset >= limit - 2) {
1013  throw InvalidProtocolBufferException.invalidUtf8();
1014  }
1015  DecodeUtil.handleFourBytes(
1016  byte1,
1017  /* byte2 */ bytes[offset++],
1018  /* byte3 */ bytes[offset++],
1019  /* byte4 */ bytes[offset++],
1020  resultArr,
1021  resultPos++);
1022  // 4-byte case requires two chars.
1023  resultPos++;
1024  }
1025  }
1026 
1027  return new String(resultArr, 0, resultPos);
1028  }
1029 
1030  @Override
1031  String decodeUtf8Direct(ByteBuffer buffer, int index, int size)
1032  throws InvalidProtocolBufferException {
1033  // For safe processing, we have to use the ByteBufferAPI.
1034  return decodeUtf8Default(buffer, index, size);
1035  }
1036 
1037  @Override
1038  int encodeUtf8(CharSequence in, byte[] out, int offset, int length) {
1039  int utf16Length = in.length();
1040  int j = offset;
1041  int i = 0;
1042  int limit = offset + length;
1043  // Designed to take advantage of
1044  // https://wiki.openjdk.java.net/display/HotSpotInternals/RangeCheckElimination
1045  for (char c; i < utf16Length && i + j < limit && (c = in.charAt(i)) < 0x80; i++) {
1046  out[j + i] = (byte) c;
1047  }
1048  if (i == utf16Length) {
1049  return j + utf16Length;
1050  }
1051  j += i;
1052  for (char c; i < utf16Length; i++) {
1053  c = in.charAt(i);
1054  if (c < 0x80 && j < limit) {
1055  out[j++] = (byte) c;
1056  } else if (c < 0x800 && j <= limit - 2) { // 11 bits, two UTF-8 bytes
1057  out[j++] = (byte) ((0xF << 6) | (c >>> 6));
1058  out[j++] = (byte) (0x80 | (0x3F & c));
1059  } else if ((c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) && j <= limit - 3) {
1060  // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
1061  out[j++] = (byte) ((0xF << 5) | (c >>> 12));
1062  out[j++] = (byte) (0x80 | (0x3F & (c >>> 6)));
1063  out[j++] = (byte) (0x80 | (0x3F & c));
1064  } else if (j <= limit - 4) {
1065  // Minimum code point represented by a surrogate pair is 0x10000, 17 bits,
1066  // four UTF-8 bytes
1067  final char low;
1068  if (i + 1 == in.length() || !Character.isSurrogatePair(c, (low = in.charAt(++i)))) {
1069  throw new UnpairedSurrogateException((i - 1), utf16Length);
1070  }
1071  int codePoint = Character.toCodePoint(c, low);
1072  out[j++] = (byte) ((0xF << 4) | (codePoint >>> 18));
1073  out[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 12)));
1074  out[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 6)));
1075  out[j++] = (byte) (0x80 | (0x3F & codePoint));
1076  } else {
1077  // If we are surrogates and we're not a surrogate pair, always throw an
1078  // UnpairedSurrogateException instead of an ArrayOutOfBoundsException.
1079  if ((Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE)
1080  && (i + 1 == in.length() || !Character.isSurrogatePair(c, in.charAt(i + 1)))) {
1081  throw new UnpairedSurrogateException(i, utf16Length);
1082  }
1083  throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + j);
1084  }
1085  }
1086  return j;
1087  }
1088 
1089  @Override
1090  void encodeUtf8Direct(CharSequence in, ByteBuffer out) {
1091  // For safe processing, we have to use the ByteBuffer API.
1092  encodeUtf8Default(in, out);
1093  }
1094 
1095  private static int partialIsValidUtf8(byte[] bytes, int index, int limit) {
1096  // Optimize for 100% ASCII (Hotspot loves small simple top-level loops like this).
1097  // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
1098  while (index < limit && bytes[index] >= 0) {
1099  index++;
1100  }
1101 
1102  return (index >= limit) ? COMPLETE : partialIsValidUtf8NonAscii(bytes, index, limit);
1103  }
1104 
1105  private static int partialIsValidUtf8NonAscii(byte[] bytes, int index, int limit) {
1106  for (; ; ) {
1107  int byte1;
1108  int byte2;
1109 
1110  // Optimize for interior runs of ASCII bytes.
1111  do {
1112  if (index >= limit) {
1113  return COMPLETE;
1114  }
1115  } while ((byte1 = bytes[index++]) >= 0);
1116 
1117  if (byte1 < (byte) 0xE0) {
1118  // two-byte form
1119 
1120  if (index >= limit) {
1121  // Incomplete sequence
1122  return byte1;
1123  }
1124 
1125  // Simultaneously checks for illegal trailing-byte in
1126  // leading position and overlong 2-byte form.
1127  if (byte1 < (byte) 0xC2 || bytes[index++] > (byte) 0xBF) {
1128  return MALFORMED;
1129  }
1130  } else if (byte1 < (byte) 0xF0) {
1131  // three-byte form
1132 
1133  if (index >= limit - 1) { // incomplete sequence
1134  return incompleteStateFor(bytes, index, limit);
1135  }
1136  if ((byte2 = bytes[index++]) > (byte) 0xBF
1137  // overlong? 5 most significant bits must not all be zero
1138  || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
1139  // check for illegal surrogate codepoints
1140  || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
1141  // byte3 trailing-byte test
1142  || bytes[index++] > (byte) 0xBF) {
1143  return MALFORMED;
1144  }
1145  } else {
1146  // four-byte form
1147 
1148  if (index >= limit - 2) { // incomplete sequence
1149  return incompleteStateFor(bytes, index, limit);
1150  }
1151  if ((byte2 = bytes[index++]) > (byte) 0xBF
1152  // Check that 1 <= plane <= 16. Tricky optimized form of:
1153  // if (byte1 > (byte) 0xF4 ||
1154  // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
1155  // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
1156  || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
1157  // byte3 trailing-byte test
1158  || bytes[index++] > (byte) 0xBF
1159  // byte4 trailing-byte test
1160  || bytes[index++] > (byte) 0xBF) {
1161  return MALFORMED;
1162  }
1163  }
1164  }
1165  }
1166  }
1167 
1169  static final class UnsafeProcessor extends Processor {
1171  static boolean isAvailable() {
1172  return hasUnsafeArrayOperations() && hasUnsafeByteBufferOperations();
1173  }
1174 
1175  @Override
1176  int partialIsValidUtf8(int state, byte[] bytes, final int index, final int limit) {
1177  // Bitwise OR combines the sign bits so any negative value fails the check.
1178  if ((index | limit | bytes.length - limit) < 0) {
1179  throw new ArrayIndexOutOfBoundsException(
1180  String.format("Array length=%d, index=%d, limit=%d", bytes.length, index, limit));
1181  }
1182  long offset = index;
1183  final long offsetLimit = limit;
1184  if (state != COMPLETE) {
1185  // The previous decoding operation was incomplete (or malformed).
1186  // We look for a well-formed sequence consisting of bytes from
1187  // the previous decoding operation (stored in state) together
1188  // with bytes from the array slice.
1189  //
1190  // We expect such "straddler characters" to be rare.
1191 
1192  if (offset >= offsetLimit) { // No bytes? No progress.
1193  return state;
1194  }
1195  int byte1 = (byte) state;
1196  // byte1 is never ASCII.
1197  if (byte1 < (byte) 0xE0) {
1198  // two-byte form
1199 
1200  // Simultaneously checks for illegal trailing-byte in
1201  // leading position and overlong 2-byte form.
1202  if (byte1 < (byte) 0xC2
1203  // byte2 trailing-byte test
1204  || UnsafeUtil.getByte(bytes, offset++) > (byte) 0xBF) {
1205  return MALFORMED;
1206  }
1207  } else if (byte1 < (byte) 0xF0) {
1208  // three-byte form
1209 
1210  // Get byte2 from saved state or array
1211  int byte2 = (byte) ~(state >> 8);
1212  if (byte2 == 0) {
1213  byte2 = UnsafeUtil.getByte(bytes, offset++);
1214  if (offset >= offsetLimit) {
1215  return incompleteStateFor(byte1, byte2);
1216  }
1217  }
1218  if (byte2 > (byte) 0xBF
1219  // overlong? 5 most significant bits must not all be zero
1220  || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
1221  // illegal surrogate codepoint?
1222  || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
1223  // byte3 trailing-byte test
1224  || UnsafeUtil.getByte(bytes, offset++) > (byte) 0xBF) {
1225  return MALFORMED;
1226  }
1227  } else {
1228  // four-byte form
1229 
1230  // Get byte2 and byte3 from saved state or array
1231  int byte2 = (byte) ~(state >> 8);
1232  int byte3 = 0;
1233  if (byte2 == 0) {
1234  byte2 = UnsafeUtil.getByte(bytes, offset++);
1235  if (offset >= offsetLimit) {
1236  return incompleteStateFor(byte1, byte2);
1237  }
1238  } else {
1239  byte3 = (byte) (state >> 16);
1240  }
1241  if (byte3 == 0) {
1242  byte3 = UnsafeUtil.getByte(bytes, offset++);
1243  if (offset >= offsetLimit) {
1244  return incompleteStateFor(byte1, byte2, byte3);
1245  }
1246  }
1247 
1248  // If we were called with state == MALFORMED, then byte1 is 0xFF,
1249  // which never occurs in well-formed UTF-8, and so we will return
1250  // MALFORMED again below.
1251 
1252  if (byte2 > (byte) 0xBF
1253  // Check that 1 <= plane <= 16. Tricky optimized form of:
1254  // if (byte1 > (byte) 0xF4 ||
1255  // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
1256  // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
1257  || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
1258  // byte3 trailing-byte test
1259  || byte3 > (byte) 0xBF
1260  // byte4 trailing-byte test
1261  || UnsafeUtil.getByte(bytes, offset++) > (byte) 0xBF) {
1262  return MALFORMED;
1263  }
1264  }
1265  }
1266 
1267  return partialIsValidUtf8(bytes, offset, (int) (offsetLimit - offset));
1268  }
1269 
1270  @Override
1271  int partialIsValidUtf8Direct(
1272  final int state, ByteBuffer buffer, final int index, final int limit) {
1273  // Bitwise OR combines the sign bits so any negative value fails the check.
1274  if ((index | limit | buffer.limit() - limit) < 0) {
1275  throw new ArrayIndexOutOfBoundsException(
1276  String.format("buffer limit=%d, index=%d, limit=%d", buffer.limit(), index, limit));
1277  }
1278  long address = addressOffset(buffer) + index;
1279  final long addressLimit = address + (limit - index);
1280  if (state != COMPLETE) {
1281  // The previous decoding operation was incomplete (or malformed).
1282  // We look for a well-formed sequence consisting of bytes from
1283  // the previous decoding operation (stored in state) together
1284  // with bytes from the array slice.
1285  //
1286  // We expect such "straddler characters" to be rare.
1287 
1288  if (address >= addressLimit) { // No bytes? No progress.
1289  return state;
1290  }
1291 
1292  final int byte1 = (byte) state;
1293  // byte1 is never ASCII.
1294  if (byte1 < (byte) 0xE0) {
1295  // two-byte form
1296 
1297  // Simultaneously checks for illegal trailing-byte in
1298  // leading position and overlong 2-byte form.
1299  if (byte1 < (byte) 0xC2
1300  // byte2 trailing-byte test
1301  || UnsafeUtil.getByte(address++) > (byte) 0xBF) {
1302  return MALFORMED;
1303  }
1304  } else if (byte1 < (byte) 0xF0) {
1305  // three-byte form
1306 
1307  // Get byte2 from saved state or array
1308  int byte2 = (byte) ~(state >> 8);
1309  if (byte2 == 0) {
1310  byte2 = UnsafeUtil.getByte(address++);
1311  if (address >= addressLimit) {
1312  return incompleteStateFor(byte1, byte2);
1313  }
1314  }
1315  if (byte2 > (byte) 0xBF
1316  // overlong? 5 most significant bits must not all be zero
1317  || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
1318  // illegal surrogate codepoint?
1319  || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
1320  // byte3 trailing-byte test
1321  || UnsafeUtil.getByte(address++) > (byte) 0xBF) {
1322  return MALFORMED;
1323  }
1324  } else {
1325  // four-byte form
1326 
1327  // Get byte2 and byte3 from saved state or array
1328  int byte2 = (byte) ~(state >> 8);
1329  int byte3 = 0;
1330  if (byte2 == 0) {
1331  byte2 = UnsafeUtil.getByte(address++);
1332  if (address >= addressLimit) {
1333  return incompleteStateFor(byte1, byte2);
1334  }
1335  } else {
1336  byte3 = (byte) (state >> 16);
1337  }
1338  if (byte3 == 0) {
1339  byte3 = UnsafeUtil.getByte(address++);
1340  if (address >= addressLimit) {
1341  return incompleteStateFor(byte1, byte2, byte3);
1342  }
1343  }
1344 
1345  // If we were called with state == MALFORMED, then byte1 is 0xFF,
1346  // which never occurs in well-formed UTF-8, and so we will return
1347  // MALFORMED again below.
1348 
1349  if (byte2 > (byte) 0xBF
1350  // Check that 1 <= plane <= 16. Tricky optimized form of:
1351  // if (byte1 > (byte) 0xF4 ||
1352  // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
1353  // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
1354  || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
1355  // byte3 trailing-byte test
1356  || byte3 > (byte) 0xBF
1357  // byte4 trailing-byte test
1358  || UnsafeUtil.getByte(address++) > (byte) 0xBF) {
1359  return MALFORMED;
1360  }
1361  }
1362  }
1363 
1364  return partialIsValidUtf8(address, (int) (addressLimit - address));
1365  }
1366 
1367  @Override
1368  String decodeUtf8(byte[] bytes, int index, int size) throws InvalidProtocolBufferException {
1369  if ((index | size | bytes.length - index - size) < 0) {
1370  throw new ArrayIndexOutOfBoundsException(
1371  String.format("buffer length=%d, index=%d, size=%d", bytes.length, index, size));
1372  }
1373 
1374  int offset = index;
1375  final int limit = offset + size;
1376 
1377  // The longest possible resulting String is the same as the number of input bytes, when it is
1378  // all ASCII. For other cases, this over-allocates and we will truncate in the end.
1379  char[] resultArr = new char[size];
1380  int resultPos = 0;
1381 
1382  // Optimize for 100% ASCII (Hotspot loves small simple top-level loops like this).
1383  // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
1384  while (offset < limit) {
1385  byte b = UnsafeUtil.getByte(bytes, offset);
1386  if (!DecodeUtil.isOneByte(b)) {
1387  break;
1388  }
1389  offset++;
1390  DecodeUtil.handleOneByte(b, resultArr, resultPos++);
1391  }
1392 
1393  while (offset < limit) {
1394  byte byte1 = UnsafeUtil.getByte(bytes, offset++);
1395  if (DecodeUtil.isOneByte(byte1)) {
1396  DecodeUtil.handleOneByte(byte1, resultArr, resultPos++);
1397  // It's common for there to be multiple ASCII characters in a run mixed in, so add an
1398  // extra optimized loop to take care of these runs.
1399  while (offset < limit) {
1400  byte b = UnsafeUtil.getByte(bytes, offset);
1401  if (!DecodeUtil.isOneByte(b)) {
1402  break;
1403  }
1404  offset++;
1405  DecodeUtil.handleOneByte(b, resultArr, resultPos++);
1406  }
1407  } else if (DecodeUtil.isTwoBytes(byte1)) {
1408  if (offset >= limit) {
1409  throw InvalidProtocolBufferException.invalidUtf8();
1410  }
1411  DecodeUtil.handleTwoBytes(
1412  byte1, /* byte2 */ UnsafeUtil.getByte(bytes, offset++), resultArr, resultPos++);
1413  } else if (DecodeUtil.isThreeBytes(byte1)) {
1414  if (offset >= limit - 1) {
1415  throw InvalidProtocolBufferException.invalidUtf8();
1416  }
1417  DecodeUtil.handleThreeBytes(
1418  byte1,
1419  /* byte2 */ UnsafeUtil.getByte(bytes, offset++),
1420  /* byte3 */ UnsafeUtil.getByte(bytes, offset++),
1421  resultArr,
1422  resultPos++);
1423  } else {
1424  if (offset >= limit - 2) {
1425  throw InvalidProtocolBufferException.invalidUtf8();
1426  }
1427  DecodeUtil.handleFourBytes(
1428  byte1,
1429  /* byte2 */ UnsafeUtil.getByte(bytes, offset++),
1430  /* byte3 */ UnsafeUtil.getByte(bytes, offset++),
1431  /* byte4 */ UnsafeUtil.getByte(bytes, offset++),
1432  resultArr,
1433  resultPos++);
1434  // 4-byte case requires two chars.
1435  resultPos++;
1436  }
1437  }
1438 
1439  return new String(resultArr, 0, resultPos);
1440  }
1441 
1442  @Override
1443  String decodeUtf8Direct(ByteBuffer buffer, int index, int size)
1444  throws InvalidProtocolBufferException {
1445  // Bitwise OR combines the sign bits so any negative value fails the check.
1446  if ((index | size | buffer.limit() - index - size) < 0) {
1447  throw new ArrayIndexOutOfBoundsException(
1448  String.format("buffer limit=%d, index=%d, limit=%d", buffer.limit(), index, size));
1449  }
1450  long address = UnsafeUtil.addressOffset(buffer) + index;
1451  final long addressLimit = address + size;
1452 
1453  // The longest possible resulting String is the same as the number of input bytes, when it is
1454  // all ASCII. For other cases, this over-allocates and we will truncate in the end.
1455  char[] resultArr = new char[size];
1456  int resultPos = 0;
1457 
1458  // Optimize for 100% ASCII (Hotspot loves small simple top-level loops like this).
1459  // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
1460  while (address < addressLimit) {
1461  byte b = UnsafeUtil.getByte(address);
1462  if (!DecodeUtil.isOneByte(b)) {
1463  break;
1464  }
1465  address++;
1466  DecodeUtil.handleOneByte(b, resultArr, resultPos++);
1467  }
1468 
1469  while (address < addressLimit) {
1470  byte byte1 = UnsafeUtil.getByte(address++);
1471  if (DecodeUtil.isOneByte(byte1)) {
1472  DecodeUtil.handleOneByte(byte1, resultArr, resultPos++);
1473  // It's common for there to be multiple ASCII characters in a run mixed in, so add an
1474  // extra optimized loop to take care of these runs.
1475  while (address < addressLimit) {
1476  byte b = UnsafeUtil.getByte(address);
1477  if (!DecodeUtil.isOneByte(b)) {
1478  break;
1479  }
1480  address++;
1481  DecodeUtil.handleOneByte(b, resultArr, resultPos++);
1482  }
1483  } else if (DecodeUtil.isTwoBytes(byte1)) {
1484  if (address >= addressLimit) {
1485  throw InvalidProtocolBufferException.invalidUtf8();
1486  }
1487  DecodeUtil.handleTwoBytes(
1488  byte1, /* byte2 */ UnsafeUtil.getByte(address++), resultArr, resultPos++);
1489  } else if (DecodeUtil.isThreeBytes(byte1)) {
1490  if (address >= addressLimit - 1) {
1491  throw InvalidProtocolBufferException.invalidUtf8();
1492  }
1493  DecodeUtil.handleThreeBytes(
1494  byte1,
1495  /* byte2 */ UnsafeUtil.getByte(address++),
1496  /* byte3 */ UnsafeUtil.getByte(address++),
1497  resultArr,
1498  resultPos++);
1499  } else {
1500  if (address >= addressLimit - 2) {
1501  throw InvalidProtocolBufferException.invalidUtf8();
1502  }
1503  DecodeUtil.handleFourBytes(
1504  byte1,
1505  /* byte2 */ UnsafeUtil.getByte(address++),
1506  /* byte3 */ UnsafeUtil.getByte(address++),
1507  /* byte4 */ UnsafeUtil.getByte(address++),
1508  resultArr,
1509  resultPos++);
1510  // 4-byte case requires two chars.
1511  resultPos++;
1512  }
1513  }
1514 
1515  return new String(resultArr, 0, resultPos);
1516  }
1517 
1518  @Override
1519  int encodeUtf8(final CharSequence in, final byte[] out, final int offset, final int length) {
1520  long outIx = offset;
1521  final long outLimit = outIx + length;
1522  final int inLimit = in.length();
1523  if (inLimit > length || out.length - length < offset) {
1524  // Not even enough room for an ASCII-encoded string.
1525  throw new ArrayIndexOutOfBoundsException(
1526  "Failed writing " + in.charAt(inLimit - 1) + " at index " + (offset + length));
1527  }
1528 
1529  // Designed to take advantage of
1530  // https://wiki.openjdk.java.net/display/HotSpotInternals/RangeCheckElimination
1531  int inIx = 0;
1532  for (char c; inIx < inLimit && (c = in.charAt(inIx)) < 0x80; ++inIx) {
1533  UnsafeUtil.putByte(out, outIx++, (byte) c);
1534  }
1535  if (inIx == inLimit) {
1536  // We're done, it was ASCII encoded.
1537  return (int) outIx;
1538  }
1539 
1540  for (char c; inIx < inLimit; ++inIx) {
1541  c = in.charAt(inIx);
1542  if (c < 0x80 && outIx < outLimit) {
1543  UnsafeUtil.putByte(out, outIx++, (byte) c);
1544  } else if (c < 0x800 && outIx <= outLimit - 2L) { // 11 bits, two UTF-8 bytes
1545  UnsafeUtil.putByte(out, outIx++, (byte) ((0xF << 6) | (c >>> 6)));
1546  UnsafeUtil.putByte(out, outIx++, (byte) (0x80 | (0x3F & c)));
1547  } else if ((c < MIN_SURROGATE || MAX_SURROGATE < c) && outIx <= outLimit - 3L) {
1548  // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
1549  UnsafeUtil.putByte(out, outIx++, (byte) ((0xF << 5) | (c >>> 12)));
1550  UnsafeUtil.putByte(out, outIx++, (byte) (0x80 | (0x3F & (c >>> 6))));
1551  UnsafeUtil.putByte(out, outIx++, (byte) (0x80 | (0x3F & c)));
1552  } else if (outIx <= outLimit - 4L) {
1553  // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8
1554  // bytes
1555  final char low;
1556  if (inIx + 1 == inLimit || !isSurrogatePair(c, (low = in.charAt(++inIx)))) {
1557  throw new UnpairedSurrogateException((inIx - 1), inLimit);
1558  }
1559  int codePoint = toCodePoint(c, low);
1560  UnsafeUtil.putByte(out, outIx++, (byte) ((0xF << 4) | (codePoint >>> 18)));
1561  UnsafeUtil.putByte(out, outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 12))));
1562  UnsafeUtil.putByte(out, outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 6))));
1563  UnsafeUtil.putByte(out, outIx++, (byte) (0x80 | (0x3F & codePoint)));
1564  } else {
1565  if ((MIN_SURROGATE <= c && c <= MAX_SURROGATE)
1566  && (inIx + 1 == inLimit || !isSurrogatePair(c, in.charAt(inIx + 1)))) {
1567  // We are surrogates and we're not a surrogate pair.
1568  throw new UnpairedSurrogateException(inIx, inLimit);
1569  }
1570  // Not enough space in the output buffer.
1571  throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + outIx);
1572  }
1573  }
1574 
1575  // All bytes have been encoded.
1576  return (int) outIx;
1577  }
1578 
1579  @Override
1580  void encodeUtf8Direct(CharSequence in, ByteBuffer out) {
1581  final long address = addressOffset(out);
1582  long outIx = address + out.position();
1583  final long outLimit = address + out.limit();
1584  final int inLimit = in.length();
1585  if (inLimit > outLimit - outIx) {
1586  // Not even enough room for an ASCII-encoded string.
1587  throw new ArrayIndexOutOfBoundsException(
1588  "Failed writing " + in.charAt(inLimit - 1) + " at index " + out.limit());
1589  }
1590 
1591  // Designed to take advantage of
1592  // https://wiki.openjdk.java.net/display/HotSpotInternals/RangeCheckElimination
1593  int inIx = 0;
1594  for (char c; inIx < inLimit && (c = in.charAt(inIx)) < 0x80; ++inIx) {
1595  UnsafeUtil.putByte(outIx++, (byte) c);
1596  }
1597  if (inIx == inLimit) {
1598  // We're done, it was ASCII encoded.
1599  out.position((int) (outIx - address));
1600  return;
1601  }
1602 
1603  for (char c; inIx < inLimit; ++inIx) {
1604  c = in.charAt(inIx);
1605  if (c < 0x80 && outIx < outLimit) {
1606  UnsafeUtil.putByte(outIx++, (byte) c);
1607  } else if (c < 0x800 && outIx <= outLimit - 2L) { // 11 bits, two UTF-8 bytes
1608  UnsafeUtil.putByte(outIx++, (byte) ((0xF << 6) | (c >>> 6)));
1609  UnsafeUtil.putByte(outIx++, (byte) (0x80 | (0x3F & c)));
1610  } else if ((c < MIN_SURROGATE || MAX_SURROGATE < c) && outIx <= outLimit - 3L) {
1611  // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
1612  UnsafeUtil.putByte(outIx++, (byte) ((0xF << 5) | (c >>> 12)));
1613  UnsafeUtil.putByte(outIx++, (byte) (0x80 | (0x3F & (c >>> 6))));
1614  UnsafeUtil.putByte(outIx++, (byte) (0x80 | (0x3F & c)));
1615  } else if (outIx <= outLimit - 4L) {
1616  // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8
1617  // bytes
1618  final char low;
1619  if (inIx + 1 == inLimit || !isSurrogatePair(c, (low = in.charAt(++inIx)))) {
1620  throw new UnpairedSurrogateException((inIx - 1), inLimit);
1621  }
1622  int codePoint = toCodePoint(c, low);
1623  UnsafeUtil.putByte(outIx++, (byte) ((0xF << 4) | (codePoint >>> 18)));
1624  UnsafeUtil.putByte(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 12))));
1625  UnsafeUtil.putByte(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 6))));
1626  UnsafeUtil.putByte(outIx++, (byte) (0x80 | (0x3F & codePoint)));
1627  } else {
1628  if ((MIN_SURROGATE <= c && c <= MAX_SURROGATE)
1629  && (inIx + 1 == inLimit || !isSurrogatePair(c, in.charAt(inIx + 1)))) {
1630  // We are surrogates and we're not a surrogate pair.
1631  throw new UnpairedSurrogateException(inIx, inLimit);
1632  }
1633  // Not enough space in the output buffer.
1634  throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + outIx);
1635  }
1636  }
1637 
1638  // All bytes have been encoded.
1639  out.position((int) (outIx - address));
1640  }
1641 
1652  private static int unsafeEstimateConsecutiveAscii(
1653  byte[] bytes, long offset, final int maxChars) {
1654  if (maxChars < UNSAFE_COUNT_ASCII_THRESHOLD) {
1655  // Don't bother with small strings.
1656  return 0;
1657  }
1658 
1659  for (int i = 0; i < maxChars; i++) {
1660  if (UnsafeUtil.getByte(bytes, offset++) < 0) {
1661  return i;
1662  }
1663  }
1664  return maxChars;
1665  }
1666 
1671  private static int unsafeEstimateConsecutiveAscii(long address, final int maxChars) {
1672  int remaining = maxChars;
1673  if (remaining < UNSAFE_COUNT_ASCII_THRESHOLD) {
1674  // Don't bother with small strings.
1675  return 0;
1676  }
1677 
1678  // Read bytes until 8-byte aligned so that we can read longs in the loop below.
1679  // We do this by ANDing the address with 7 to determine the number of bytes that need to
1680  // be read before we're 8-byte aligned.
1681  final int unaligned = 8 - ((int) address & 7);
1682  for (int j = unaligned; j > 0; j--) {
1683  if (UnsafeUtil.getByte(address++) < 0) {
1684  return unaligned - j;
1685  }
1686  }
1687 
1688  // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
1689  // To speed things up further, we're reading longs instead of bytes so we use a mask to
1690  // determine if any byte in the current long is non-ASCII.
1691  remaining -= unaligned;
1692  for (;
1693  remaining >= 8 && (UnsafeUtil.getLong(address) & ASCII_MASK_LONG) == 0;
1694  address += 8, remaining -= 8) {}
1695  return maxChars - remaining;
1696  }
1697 
1698  private static int partialIsValidUtf8(final byte[] bytes, long offset, int remaining) {
1699  // Skip past ASCII characters as quickly as possible.
1700  final int skipped = unsafeEstimateConsecutiveAscii(bytes, offset, remaining);
1701  remaining -= skipped;
1702  offset += skipped;
1703 
1704  for (; ; ) {
1705  // Optimize for interior runs of ASCII bytes.
1706  // TODO(nathanmittler): Consider checking 8 bytes at a time after some threshold?
1707  // Maybe after seeing a few in a row that are ASCII, go back to fast mode?
1708  int byte1 = 0;
1709  for (; remaining > 0 && (byte1 = UnsafeUtil.getByte(bytes, offset++)) >= 0; --remaining) {}
1710  if (remaining == 0) {
1711  return COMPLETE;
1712  }
1713  remaining--;
1714 
1715  // If we're here byte1 is not ASCII. Only need to handle 2-4 byte forms.
1716  if (byte1 < (byte) 0xE0) {
1717  // Two-byte form (110xxxxx 10xxxxxx)
1718  if (remaining == 0) {
1719  // Incomplete sequence
1720  return byte1;
1721  }
1722  remaining--;
1723 
1724  // Simultaneously checks for illegal trailing-byte in
1725  // leading position and overlong 2-byte form.
1726  if (byte1 < (byte) 0xC2 || UnsafeUtil.getByte(bytes, offset++) > (byte) 0xBF) {
1727  return MALFORMED;
1728  }
1729  } else if (byte1 < (byte) 0xF0) {
1730  // Three-byte form (1110xxxx 10xxxxxx 10xxxxxx)
1731  if (remaining < 2) {
1732  // Incomplete sequence
1733  return unsafeIncompleteStateFor(bytes, byte1, offset, remaining);
1734  }
1735  remaining -= 2;
1736 
1737  final int byte2;
1738  if ((byte2 = UnsafeUtil.getByte(bytes, offset++)) > (byte) 0xBF
1739  // overlong? 5 most significant bits must not all be zero
1740  || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
1741  // check for illegal surrogate codepoints
1742  || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
1743  // byte3 trailing-byte test
1744  || UnsafeUtil.getByte(bytes, offset++) > (byte) 0xBF) {
1745  return MALFORMED;
1746  }
1747  } else {
1748  // Four-byte form (1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx)
1749  if (remaining < 3) {
1750  // Incomplete sequence
1751  return unsafeIncompleteStateFor(bytes, byte1, offset, remaining);
1752  }
1753  remaining -= 3;
1754 
1755  final int byte2;
1756  if ((byte2 = UnsafeUtil.getByte(bytes, offset++)) > (byte) 0xBF
1757  // Check that 1 <= plane <= 16. Tricky optimized form of:
1758  // if (byte1 > (byte) 0xF4 ||
1759  // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
1760  // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
1761  || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
1762  // byte3 trailing-byte test
1763  || UnsafeUtil.getByte(bytes, offset++) > (byte) 0xBF
1764  // byte4 trailing-byte test
1765  || UnsafeUtil.getByte(bytes, offset++) > (byte) 0xBF) {
1766  return MALFORMED;
1767  }
1768  }
1769  }
1770  }
1771 
1772  private static int partialIsValidUtf8(long address, int remaining) {
1773  // Skip past ASCII characters as quickly as possible.
1774  final int skipped = unsafeEstimateConsecutiveAscii(address, remaining);
1775  address += skipped;
1776  remaining -= skipped;
1777 
1778  for (; ; ) {
1779  // Optimize for interior runs of ASCII bytes.
1780  // TODO(nathanmittler): Consider checking 8 bytes at a time after some threshold?
1781  // Maybe after seeing a few in a row that are ASCII, go back to fast mode?
1782  int byte1 = 0;
1783  for (; remaining > 0 && (byte1 = UnsafeUtil.getByte(address++)) >= 0; --remaining) {}
1784  if (remaining == 0) {
1785  return COMPLETE;
1786  }
1787  remaining--;
1788 
1789  if (byte1 < (byte) 0xE0) {
1790  // Two-byte form
1791 
1792  if (remaining == 0) {
1793  // Incomplete sequence
1794  return byte1;
1795  }
1796  remaining--;
1797 
1798  // Simultaneously checks for illegal trailing-byte in
1799  // leading position and overlong 2-byte form.
1800  if (byte1 < (byte) 0xC2 || UnsafeUtil.getByte(address++) > (byte) 0xBF) {
1801  return MALFORMED;
1802  }
1803  } else if (byte1 < (byte) 0xF0) {
1804  // Three-byte form
1805 
1806  if (remaining < 2) {
1807  // Incomplete sequence
1808  return unsafeIncompleteStateFor(address, byte1, remaining);
1809  }
1810  remaining -= 2;
1811 
1812  final byte byte2 = UnsafeUtil.getByte(address++);
1813  if (byte2 > (byte) 0xBF
1814  // overlong? 5 most significant bits must not all be zero
1815  || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
1816  // check for illegal surrogate codepoints
1817  || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
1818  // byte3 trailing-byte test
1819  || UnsafeUtil.getByte(address++) > (byte) 0xBF) {
1820  return MALFORMED;
1821  }
1822  } else {
1823  // Four-byte form
1824 
1825  if (remaining < 3) {
1826  // Incomplete sequence
1827  return unsafeIncompleteStateFor(address, byte1, remaining);
1828  }
1829  remaining -= 3;
1830 
1831  final byte byte2 = UnsafeUtil.getByte(address++);
1832  if (byte2 > (byte) 0xBF
1833  // Check that 1 <= plane <= 16. Tricky optimized form of:
1834  // if (byte1 > (byte) 0xF4 ||
1835  // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
1836  // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
1837  || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
1838  // byte3 trailing-byte test
1839  || UnsafeUtil.getByte(address++) > (byte) 0xBF
1840  // byte4 trailing-byte test
1841  || UnsafeUtil.getByte(address++) > (byte) 0xBF) {
1842  return MALFORMED;
1843  }
1844  }
1845  }
1846  }
1847 
1848  private static int unsafeIncompleteStateFor(
1849  byte[] bytes, int byte1, long offset, int remaining) {
1850  switch (remaining) {
1851  case 0:
1852  return incompleteStateFor(byte1);
1853  case 1:
1854  return incompleteStateFor(byte1, UnsafeUtil.getByte(bytes, offset));
1855  case 2:
1856  return incompleteStateFor(
1857  byte1, UnsafeUtil.getByte(bytes, offset), UnsafeUtil.getByte(bytes, offset + 1));
1858  default:
1859  throw new AssertionError();
1860  }
1861  }
1862 
1863  private static int unsafeIncompleteStateFor(long address, final int byte1, int remaining) {
1864  switch (remaining) {
1865  case 0:
1866  return incompleteStateFor(byte1);
1867  case 1:
1868  return incompleteStateFor(byte1, UnsafeUtil.getByte(address));
1869  case 2:
1870  return incompleteStateFor(
1871  byte1, UnsafeUtil.getByte(address), UnsafeUtil.getByte(address + 1));
1872  default:
1873  throw new AssertionError();
1874  }
1875  }
1876  }
1877 
1883  private static class DecodeUtil {
1884 
1886  private static boolean isOneByte(byte b) {
1887  return b >= 0;
1888  }
1889 
1891  private static boolean isTwoBytes(byte b) {
1892  return b < (byte) 0xE0;
1893  }
1894 
1896  private static boolean isThreeBytes(byte b) {
1897  return b < (byte) 0xF0;
1898  }
1899 
1900  private static void handleOneByte(byte byte1, char[] resultArr, int resultPos) {
1901  resultArr[resultPos] = (char) byte1;
1902  }
1903 
1904  private static void handleTwoBytes(byte byte1, byte byte2, char[] resultArr, int resultPos)
1906  // Simultaneously checks for illegal trailing-byte in leading position (<= '11000000') and
1907  // overlong 2-byte, '11000001'.
1908  if (byte1 < (byte) 0xC2 || isNotTrailingByte(byte2)) {
1909  throw InvalidProtocolBufferException.invalidUtf8();
1910  }
1911  resultArr[resultPos] = (char) (((byte1 & 0x1F) << 6) | trailingByteValue(byte2));
1912  }
1913 
1914  private static void handleThreeBytes(
1915  byte byte1, byte byte2, byte byte3, char[] resultArr, int resultPos)
1917  if (isNotTrailingByte(byte2)
1918  // overlong? 5 most significant bits must not all be zero
1919  || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
1920  // check for illegal surrogate codepoints
1921  || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
1922  || isNotTrailingByte(byte3)) {
1923  throw InvalidProtocolBufferException.invalidUtf8();
1924  }
1925  resultArr[resultPos] =
1926  (char)
1927  (((byte1 & 0x0F) << 12) | (trailingByteValue(byte2) << 6) | trailingByteValue(byte3));
1928  }
1929 
1930  private static void handleFourBytes(
1931  byte byte1, byte byte2, byte byte3, byte byte4, char[] resultArr, int resultPos)
1933  if (isNotTrailingByte(byte2)
1934  // Check that 1 <= plane <= 16. Tricky optimized form of:
1935  // valid 4-byte leading byte?
1936  // if (byte1 > (byte) 0xF4 ||
1937  // overlong? 4 most significant bits must not all be zero
1938  // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
1939  // codepoint larger than the highest code point (U+10FFFF)?
1940  // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
1941  || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
1942  || isNotTrailingByte(byte3)
1943  || isNotTrailingByte(byte4)) {
1944  throw InvalidProtocolBufferException.invalidUtf8();
1945  }
1946  int codepoint =
1947  ((byte1 & 0x07) << 18)
1948  | (trailingByteValue(byte2) << 12)
1949  | (trailingByteValue(byte3) << 6)
1950  | trailingByteValue(byte4);
1951  resultArr[resultPos] = DecodeUtil.highSurrogate(codepoint);
1952  resultArr[resultPos + 1] = DecodeUtil.lowSurrogate(codepoint);
1953  }
1954 
1956  private static boolean isNotTrailingByte(byte b) {
1957  return b > (byte) 0xBF;
1958  }
1959 
1961  private static int trailingByteValue(byte b) {
1962  return b & 0x3F;
1963  }
1964 
1965  private static char highSurrogate(int codePoint) {
1966  return (char)
1967  ((MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)) + (codePoint >>> 10));
1968  }
1969 
1970  private static char lowSurrogate(int codePoint) {
1971  return (char) (MIN_LOW_SURROGATE + (codePoint & 0x3ff));
1972  }
1973  }
1974 
1975  private Utf8() {}
1976 }
java::lang
com.google.protobuf.Utf8.DecodeUtil.lowSurrogate
static char lowSurrogate(int codePoint)
Definition: Utf8.java:1970
com.google.protobuf.Utf8.DecodeUtil.trailingByteValue
static int trailingByteValue(byte b)
Definition: Utf8.java:1961
length
GLenum GLuint GLenum GLsizei length
Definition: glcorearb.h:2695
com.google.protobuf.Utf8.DecodeUtil.highSurrogate
static char highSurrogate(int codePoint)
Definition: Utf8.java:1965
com.google.protobuf.Utf8.DecodeUtil.isNotTrailingByte
static boolean isNotTrailingByte(byte b)
Definition: Utf8.java:1956
com.google.protobuf
Definition: ProtoCaliperBenchmark.java:2
address
const char * address
Definition: builds/zos/test_fork.cpp:6
bytes
uint8 bytes[10]
Definition: coded_stream_unittest.cc:153
b
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:3228
com.google.protobuf.Utf8.DecodeUtil
Definition: Utf8.java:1883
offset
GLintptr offset
Definition: glcorearb.h:2944
com.google.protobuf.Utf8.DecodeUtil.handleTwoBytes
static void handleTwoBytes(byte byte1, byte byte2, char[] resultArr, int resultPos)
Definition: Utf8.java:1904
start
GLuint start
Definition: glcorearb.h:2858
size
#define size
Definition: glcorearb.h:2944
buffer
Definition: buffer_processor.h:43
byte
SETUP_TEARDOWN_TESTCONTEXT typedef uint8_t byte
Definition: test_stream.cpp:12
com.google.protobuf.Utf8.DecodeUtil.handleFourBytes
static void handleFourBytes(byte byte1, byte byte2, byte byte3, byte byte4, char[] resultArr, int resultPos)
Definition: Utf8.java:1930
i
int i
Definition: gmock-matchers_test.cc:764
java
com.google.protobuf.Utf8.DecodeUtil.isOneByte
static boolean isOneByte(byte b)
Definition: Utf8.java:1886
size
GLsizeiptr size
Definition: glcorearb.h:2943
com.google
com
com.google.protobuf.InvalidProtocolBufferException
Definition: InvalidProtocolBufferException.java:41
com.google.protobuf.Utf8.DecodeUtil.handleOneByte
static void handleOneByte(byte byte1, char[] resultArr, int resultPos)
Definition: Utf8.java:1900
com.google.protobuf.Utf8.DecodeUtil.isTwoBytes
static boolean isTwoBytes(byte b)
Definition: Utf8.java:1891
com.google.protobuf.UnsafeUtil
Definition: UnsafeUtil.java:43
com.google.protobuf.Utf8.DecodeUtil.isThreeBytes
static boolean isThreeBytes(byte b)
Definition: Utf8.java:1896
index
GLuint index
Definition: glcorearb.h:3055
com.google.protobuf.Utf8.DecodeUtil.handleThreeBytes
static void handleThreeBytes(byte byte1, byte byte2, byte byte3, char[] resultArr, int resultPos)
Definition: Utf8.java:1914


libaditof
Author(s):
autogenerated on Wed May 21 2025 02:07:01