abseil-cpp/absl/strings/ascii.cc
Go to the documentation of this file.
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/ascii.h"
16 
17 namespace absl {
19 namespace ascii_internal {
20 
21 // # Table generated by this Python code (bit 0x02 is currently unused):
22 // TODO(mbar) Move Python code for generation of table to BUILD and link here.
23 
24 // NOTE: The kAsciiPropertyBits table used within this code was generated by
25 // Python code of the following form. (Bit 0x02 is currently unused and
26 // available.)
27 //
28 // def Hex2(n):
29 // return '0x' + hex(n/16)[2:] + hex(n%16)[2:]
30 // def IsPunct(ch):
31 // return (ord(ch) >= 32 and ord(ch) < 127 and
32 // not ch.isspace() and not ch.isalnum())
33 // def IsBlank(ch):
34 // return ch in ' \t'
35 // def IsCntrl(ch):
36 // return ord(ch) < 32 or ord(ch) == 127
37 // def IsXDigit(ch):
38 // return ch.isdigit() or ch.lower() in 'abcdef'
39 // for i in range(128):
40 // ch = chr(i)
41 // mask = ((ch.isalpha() and 0x01 or 0) |
42 // (ch.isalnum() and 0x04 or 0) |
43 // (ch.isspace() and 0x08 or 0) |
44 // (IsPunct(ch) and 0x10 or 0) |
45 // (IsBlank(ch) and 0x20 or 0) |
46 // (IsCntrl(ch) and 0x40 or 0) |
47 // (IsXDigit(ch) and 0x80 or 0))
48 // print Hex2(mask) + ',',
49 // if i % 16 == 7:
50 // print ' //', Hex2(i & 0x78)
51 // elif i % 16 == 15:
52 // print
53 
54 // clang-format off
55 // Array of bitfields holding character information. Each bit value corresponds
56 // to a particular character feature. For readability, and because the value
57 // of these bits is tightly coupled to this implementation, the individual bits
58 // are not named. Note that bitfields for all characters above ASCII 127 are
59 // zero-initialized.
60 ABSL_DLL const unsigned char kPropertyBits[256] = {
61  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x00
62  0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40,
63  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x10
64  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
65  0x28, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x20
66  0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
67  0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, // 0x30
68  0x84, 0x84, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
69  0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x40
70  0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
71  0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x50
72  0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x10,
73  0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x60
74  0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
75  0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x70
76  0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x40,
77 };
78 
79 // Array of characters for the ascii_tolower() function. For values 'A'
80 // through 'Z', return the lower-case character; otherwise, return the
81 // identity of the passed character.
82 ABSL_DLL const char kToLower[256] = {
83  '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
84  '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
85  '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
86  '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
87  '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
88  '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
89  '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
90  '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
91  '\x40', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
92  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
93  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
94  'x', 'y', 'z', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
95  '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
96  '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
97  '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
98  '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
99  '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
100  '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
101  '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
102  '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
103  '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
104  '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
105  '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
106  '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
107  '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
108  '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
109  '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
110  '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
111  '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
112  '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
113  '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
114  '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
115 };
116 
117 // Array of characters for the ascii_toupper() function. For values 'a'
118 // through 'z', return the upper-case character; otherwise, return the
119 // identity of the passed character.
120 ABSL_DLL const char kToUpper[256] = {
121  '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
122  '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
123  '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
124  '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
125  '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
126  '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
127  '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
128  '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
129  '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
130  '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
131  '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
132  '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
133  '\x60', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
134  'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
135  'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
136  'X', 'Y', 'Z', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
137  '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
138  '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
139  '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
140  '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
141  '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
142  '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
143  '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
144  '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
145  '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
146  '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
147  '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
148  '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
149  '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
150  '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
151  '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
152  '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
153 };
154 // clang-format on
155 
156 } // namespace ascii_internal
157 
159  for (auto& ch : *s) {
161  }
162 }
163 
165  for (auto& ch : *s) {
167  }
168 }
169 
171  auto stripped = StripAsciiWhitespace(*str);
172 
173  if (stripped.empty()) {
174  str->clear();
175  return;
176  }
177 
178  auto input_it = stripped.begin();
179  auto input_end = stripped.end();
180  auto output_it = &(*str)[0];
181  bool is_ws = false;
182 
183  for (; input_it < input_end; ++input_it) {
184  if (is_ws) {
185  // Consecutive whitespace? Keep only the last.
186  is_ws = absl::ascii_isspace(*input_it);
187  if (is_ws) --output_it;
188  } else {
189  is_ws = absl::ascii_isspace(*input_it);
190  }
191 
192  *output_it = *input_it;
193  ++output_it;
194  }
195 
196  str->erase(output_it - &(*str)[0]);
197 }
198 
200 } // namespace absl
absl::StripAsciiWhitespace
ABSL_MUST_USE_RESULT absl::string_view StripAsciiWhitespace(absl::string_view str)
Definition: abseil-cpp/absl/strings/ascii.h:225
xds_interop_client.str
str
Definition: xds_interop_client.py:487
absl::ascii_tolower
char ascii_tolower(unsigned char c)
Definition: abseil-cpp/absl/strings/ascii.h:163
absl::RemoveExtraAsciiWhitespace
void RemoveExtraAsciiWhitespace(std::string *str)
Definition: abseil-cpp/absl/strings/ascii.cc:170
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
absl::FormatConversionChar::s
@ s
ABSL_NAMESPACE_END
#define ABSL_NAMESPACE_END
Definition: third_party/abseil-cpp/absl/base/config.h:171
absl::AsciiStrToLower
void AsciiStrToLower(std::string *s)
Definition: abseil-cpp/absl/strings/ascii.cc:158
absl::ascii_isspace
bool ascii_isspace(unsigned char c)
Definition: abseil-cpp/absl/strings/ascii.h:95
ABSL_NAMESPACE_BEGIN
#define ABSL_NAMESPACE_BEGIN
Definition: third_party/abseil-cpp/absl/base/config.h:170
absl::AsciiStrToUpper
void AsciiStrToUpper(std::string *s)
Definition: abseil-cpp/absl/strings/ascii.cc:164
absl::ascii_internal::kPropertyBits
const ABSL_DLL unsigned char kPropertyBits[256]
Definition: abseil-cpp/absl/strings/ascii.cc:60
absl::ascii_internal::kToUpper
const ABSL_DLL char kToUpper[256]
Definition: abseil-cpp/absl/strings/ascii.cc:120
ABSL_DLL
#define ABSL_DLL
Definition: third_party/abseil-cpp/absl/base/config.h:746
absl::ascii_toupper
char ascii_toupper(unsigned char c)
Definition: abseil-cpp/absl/strings/ascii.h:181
absl::ascii_internal::kToLower
const ABSL_DLL char kToLower[256]
Definition: abseil-cpp/absl/strings/ascii.cc:82
absl
Definition: abseil-cpp/absl/algorithm/algorithm.h:31
ch
char ch
Definition: bloaty/third_party/googletest/googlemock/test/gmock-matchers_test.cc:3621


grpc
Author(s):
autogenerated on Thu Mar 13 2025 02:58:34