unicode.c
Go to the documentation of this file.
1 /* Copyright (c) 2018, Google Inc.
2  *
3  * Permission to use, copy, modify, and/or distribute this software for any
4  * purpose with or without fee is hereby granted, provided that the above
5  * copyright notice and this permission notice appear in all copies.
6  *
7  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10  * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14 
15 #include <openssl/bytestring.h>
16 
17 #include "internal.h"
18 
19 
21  // References in the following are to Unicode 9.0.0.
22  if (// The Unicode space runs from zero to 0x10ffff (3.4 D9).
23  v > 0x10ffff ||
24  // Values 0x...fffe, 0x...ffff, and 0xfdd0-0xfdef are permanently reserved
25  // (3.4 D14)
26  (v & 0xfffe) == 0xfffe ||
27  (v >= 0xfdd0 && v <= 0xfdef) ||
28  // Surrogate code points are invalid (3.2 C1).
29  (v >= 0xd800 && v <= 0xdfff)) {
30  return 0;
31  }
32  return 1;
33 }
34 
35 // BOTTOM_BITS returns a byte with the bottom |n| bits set.
36 #define BOTTOM_BITS(n) (uint8_t)((1u << (n)) - 1)
37 
38 // TOP_BITS returns a byte with the top |n| bits set.
39 #define TOP_BITS(n) ((uint8_t)~BOTTOM_BITS(8 - (n)))
40 
42  uint8_t c;
43  if (!CBS_get_u8(cbs, &c)) {
44  return 0;
45  }
46  if (c <= 0x7f) {
47  *out = c;
48  return 1;
49  }
50  uint32_t v, lower_bound;
51  size_t len;
52  if ((c & TOP_BITS(3)) == TOP_BITS(2)) {
53  v = c & BOTTOM_BITS(5);
54  len = 1;
55  lower_bound = 0x80;
56  } else if ((c & TOP_BITS(4)) == TOP_BITS(3)) {
57  v = c & BOTTOM_BITS(4);
58  len = 2;
59  lower_bound = 0x800;
60  } else if ((c & TOP_BITS(5)) == TOP_BITS(4)) {
61  v = c & BOTTOM_BITS(3);
62  len = 3;
63  lower_bound = 0x10000;
64  } else {
65  return 0;
66  }
67  for (size_t i = 0; i < len; i++) {
68  if (!CBS_get_u8(cbs, &c) ||
69  (c & TOP_BITS(2)) != TOP_BITS(1)) {
70  return 0;
71  }
72  v <<= 6;
73  v |= c & BOTTOM_BITS(6);
74  }
75  if (!is_valid_code_point(v) ||
76  v < lower_bound) {
77  return 0;
78  }
79  *out = v;
80  return 1;
81 }
82 
84  uint8_t c;
85  if (!CBS_get_u8(cbs, &c)) {
86  return 0;
87  }
88  *out = c;
89  return 1;
90 }
91 
93  // Note UCS-2 (used by BMPString) does not support surrogates.
94  uint16_t c;
95  if (!CBS_get_u16(cbs, &c) ||
97  return 0;
98  }
99  *out = c;
100  return 1;
101 }
102 
104  return CBS_get_u32(cbs, out) && is_valid_code_point(*out);
105 }
106 
108  if (u <= 0x7f) {
109  return 1;
110  }
111  if (u <= 0x7ff) {
112  return 2;
113  }
114  if (u <= 0xffff) {
115  return 3;
116  }
117  return 4;
118 }
119 
121  if (!is_valid_code_point(u)) {
122  return 0;
123  }
124  if (u <= 0x7f) {
125  return CBB_add_u8(cbb, (uint8_t)u);
126  }
127  if (u <= 0x7ff) {
128  return CBB_add_u8(cbb, TOP_BITS(2) | (u >> 6)) &&
129  CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
130  }
131  if (u <= 0xffff) {
132  return CBB_add_u8(cbb, TOP_BITS(3) | (u >> 12)) &&
133  CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) &&
134  CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
135  }
136  if (u <= 0x10ffff) {
137  return CBB_add_u8(cbb, TOP_BITS(4) | (u >> 18)) &&
138  CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 12) & BOTTOM_BITS(6))) &&
139  CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) &&
140  CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
141  }
142  return 0;
143 }
144 
146  return u <= 0xff && CBB_add_u8(cbb, (uint8_t)u);
147 }
148 
150  return u <= 0xffff && is_valid_code_point(u) && CBB_add_u16(cbb, (uint16_t)u);
151 }
152 
154  return is_valid_code_point(u) && CBB_add_u32(cbb, u);
155 }
gen_build_yaml.out
dictionary out
Definition: src/benchmark/gen_build_yaml.py:24
CBS_get_u16
#define CBS_get_u16
Definition: boringssl_prefix_symbols.h:1073
cbs_st
Definition: bytestring.h:39
cbb_add_utf8
int cbb_add_utf8(CBB *cbb, uint32_t u)
Definition: unicode.c:120
uint16_t
unsigned short uint16_t
Definition: stdint-msvc2008.h:79
CBS_get_u32
#define CBS_get_u32
Definition: boringssl_prefix_symbols.h:1078
CBB_add_u8
#define CBB_add_u8
Definition: boringssl_prefix_symbols.h:1036
u
OPENSSL_EXPORT pem_password_cb void * u
Definition: pem.h:351
cbs
const CBS * cbs
Definition: third_party/boringssl-with-bazel/src/crypto/trust_token/internal.h:107
uint8_t
unsigned char uint8_t
Definition: stdint-msvc2008.h:78
cbb_add_utf32_be
int cbb_add_utf32_be(CBB *cbb, uint32_t u)
Definition: unicode.c:153
uint32_t
unsigned int uint32_t
Definition: stdint-msvc2008.h:80
cbb_add_ucs2_be
int cbb_add_ucs2_be(CBB *cbb, uint32_t u)
Definition: unicode.c:149
bytestring.h
cbb_get_utf8_len
size_t cbb_get_utf8_len(uint32_t u)
Definition: unicode.c:107
internal.h
CBB_add_u16
#define CBB_add_u16
Definition: boringssl_prefix_symbols.h:1027
c
void c(T a)
Definition: miscompile_with_no_unique_address_test.cc:40
BOTTOM_BITS
#define BOTTOM_BITS(n)
Definition: unicode.c:36
setup.v
v
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
CBS_get_u8
#define CBS_get_u8
Definition: boringssl_prefix_symbols.h:1082
CBB_add_u32
#define CBB_add_u32
Definition: boringssl_prefix_symbols.h:1032
cbb_add_latin1
int cbb_add_latin1(CBB *cbb, uint32_t u)
Definition: unicode.c:145
cbs_get_utf8
int cbs_get_utf8(CBS *cbs, uint32_t *out)
Definition: unicode.c:41
TOP_BITS
#define TOP_BITS(n)
Definition: unicode.c:39
is_valid_code_point
static int is_valid_code_point(uint32_t v)
Definition: unicode.c:20
cbs_get_utf32_be
int cbs_get_utf32_be(CBS *cbs, uint32_t *out)
Definition: unicode.c:103
len
int len
Definition: abseil-cpp/absl/base/internal/low_level_alloc_test.cc:46
cbs_get_latin1
int cbs_get_latin1(CBS *cbs, uint32_t *out)
Definition: unicode.c:83
cbs_get_ucs2_be
int cbs_get_ucs2_be(CBS *cbs, uint32_t *out)
Definition: unicode.c:92
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
cbb_st
Definition: bytestring.h:375


grpc
Author(s):
autogenerated on Thu Mar 13 2025 03:01:44