ascii.h
Go to the documentation of this file.
1 //
2 // Copyright 2017 The Abseil Authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // https://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // -----------------------------------------------------------------------------
17 // File: ascii.h
18 // -----------------------------------------------------------------------------
19 //
20 // This package contains functions operating on characters and strings
21 // restricted to standard ASCII. These include character classification
22 // functions analogous to those found in the ANSI C Standard Library <ctype.h>
23 // header file.
24 //
25 // C++ implementations provide <ctype.h> functionality based on their
26 // C environment locale. In general, reliance on such a locale is not ideal, as
27 // the locale standard is problematic (and may not return invariant information
28 // for the same character set, for example). These `ascii_*()` functions are
29 // hard-wired for standard ASCII, much faster, and guaranteed to behave
30 // consistently. They will never be overloaded, nor will their function
31 // signature change.
32 //
33 // `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`,
34 // `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`,
35 // `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`,
36 // `ascii_isxdigit()`
37 // Analogous to the <ctype.h> functions with similar names, these
38 // functions take an unsigned char and return a bool, based on whether the
39 // character matches the condition specified.
40 //
41 // If the input character has a numerical value greater than 127, these
42 // functions return `false`.
43 //
44 // `ascii_tolower()`, `ascii_toupper()`
45 // Analogous to the <ctype.h> functions with similar names, these functions
46 // take an unsigned char and return a char.
47 //
48 // If the input character is not an ASCII {lower,upper}-case letter (including
49 // numerical values greater than 127) then the functions return the same value
50 // as the input character.
51 
52 #ifndef ABSL_STRINGS_ASCII_H_
53 #define ABSL_STRINGS_ASCII_H_
54 
55 #include <algorithm>
56 #include <string>
57 
58 #include "absl/base/attributes.h"
60 
61 namespace absl {
62 namespace ascii_internal {
63 
64 // Declaration for an array of bitfields holding character information.
65 extern const unsigned char kPropertyBits[256];
66 
67 // Declaration for the array of characters to upper-case characters.
68 extern const char kToUpper[256];
69 
70 // Declaration for the array of characters to lower-case characters.
71 extern const char kToLower[256];
72 
73 } // namespace ascii_internal
74 
75 // ascii_isalpha()
76 //
77 // Determines whether the given character is an alphabetic character.
78 inline bool ascii_isalpha(unsigned char c) {
79  return (ascii_internal::kPropertyBits[c] & 0x01) != 0;
80 }
81 
82 // ascii_isalnum()
83 //
84 // Determines whether the given character is an alphanumeric character.
85 inline bool ascii_isalnum(unsigned char c) {
86  return (ascii_internal::kPropertyBits[c] & 0x04) != 0;
87 }
88 
89 // ascii_isspace()
90 //
91 // Determines whether the given character is a whitespace character (space,
92 // tab, vertical tab, formfeed, linefeed, or carriage return).
93 inline bool ascii_isspace(unsigned char c) {
94  return (ascii_internal::kPropertyBits[c] & 0x08) != 0;
95 }
96 
97 // ascii_ispunct()
98 //
99 // Determines whether the given character is a punctuation character.
100 inline bool ascii_ispunct(unsigned char c) {
101  return (ascii_internal::kPropertyBits[c] & 0x10) != 0;
102 }
103 
104 // ascii_isblank()
105 //
106 // Determines whether the given character is a blank character (tab or space).
107 inline bool ascii_isblank(unsigned char c) {
108  return (ascii_internal::kPropertyBits[c] & 0x20) != 0;
109 }
110 
111 // ascii_iscntrl()
112 //
113 // Determines whether the given character is a control character.
114 inline bool ascii_iscntrl(unsigned char c) {
115  return (ascii_internal::kPropertyBits[c] & 0x40) != 0;
116 }
117 
118 // ascii_isxdigit()
119 //
120 // Determines whether the given character can be represented as a hexadecimal
121 // digit character (i.e. {0-9} or {A-F}).
122 inline bool ascii_isxdigit(unsigned char c) {
123  return (ascii_internal::kPropertyBits[c] & 0x80) != 0;
124 }
125 
126 // ascii_isdigit()
127 //
128 // Determines whether the given character can be represented as a decimal
129 // digit character (i.e. {0-9}).
130 inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; }
131 
132 // ascii_isprint()
133 //
134 // Determines whether the given character is printable, including whitespace.
135 inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; }
136 
137 // ascii_isgraph()
138 //
139 // Determines whether the given character has a graphical representation.
140 inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; }
141 
142 // ascii_isupper()
143 //
144 // Determines whether the given character is uppercase.
145 inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; }
146 
147 // ascii_islower()
148 //
149 // Determines whether the given character is lowercase.
150 inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; }
151 
152 // ascii_isascii()
153 //
154 // Determines whether the given character is ASCII.
155 inline bool ascii_isascii(unsigned char c) { return c < 128; }
156 
157 // ascii_tolower()
158 //
159 // Returns an ASCII character, converting to lowercase if uppercase is
160 // passed. Note that character values > 127 are simply returned.
161 inline char ascii_tolower(unsigned char c) {
162  return ascii_internal::kToLower[c];
163 }
164 
165 // Converts the characters in `s` to lowercase, changing the contents of `s`.
166 void AsciiStrToLower(std::string* s);
167 
168 // Creates a lowercase string from a given absl::string_view.
170  std::string result(s);
171  absl::AsciiStrToLower(&result);
172  return result;
173 }
174 
175 // ascii_toupper()
176 //
177 // Returns the ASCII character, converting to upper-case if lower-case is
178 // passed. Note that characters values > 127 are simply returned.
179 inline char ascii_toupper(unsigned char c) {
180  return ascii_internal::kToUpper[c];
181 }
182 
183 // Converts the characters in `s` to uppercase, changing the contents of `s`.
184 void AsciiStrToUpper(std::string* s);
185 
186 // Creates an uppercase string from a given absl::string_view.
188  std::string result(s);
189  absl::AsciiStrToUpper(&result);
190  return result;
191 }
192 
193 // Returns absl::string_view with whitespace stripped from the beginning of the
194 // given string_view.
196  absl::string_view str) {
197  auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace);
198  return str.substr(it - str.begin());
199 }
200 
201 // Strips in place whitespace from the beginning of the given string.
202 inline void StripLeadingAsciiWhitespace(std::string* str) {
203  auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace);
204  str->erase(str->begin(), it);
205 }
206 
207 // Returns absl::string_view with whitespace stripped from the end of the given
208 // string_view.
210  absl::string_view str) {
211  auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace);
212  return str.substr(0, str.rend() - it);
213 }
214 
215 // Strips in place whitespace from the end of the given string
216 inline void StripTrailingAsciiWhitespace(std::string* str) {
217  auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace);
218  str->erase(str->rend() - it);
219 }
220 
221 // Returns absl::string_view with whitespace stripped from both ends of the
222 // given string_view.
224  absl::string_view str) {
226 }
227 
228 // Strips in place whitespace from both ends of the given string
229 inline void StripAsciiWhitespace(std::string* str) {
232 }
233 
234 // Removes leading, trailing, and consecutive internal whitespace.
235 void RemoveExtraAsciiWhitespace(std::string*);
236 
237 } // namespace absl
238 
239 #endif // ABSL_STRINGS_ASCII_H_
bool ascii_isalnum(unsigned char c)
Definition: ascii.h:85
const_reverse_iterator rend() const noexcept
Definition: string_view.h:238
const_reverse_iterator rbegin() const noexcept
Definition: string_view.h:229
void AsciiStrToLower(std::string *s)
Definition: ascii.cc:157
bool ascii_isblank(unsigned char c)
Definition: ascii.h:107
bool ascii_isgraph(unsigned char c)
Definition: ascii.h:140
string_view substr(size_type pos, size_type n=npos) const
Definition: string_view.h:354
const char kToUpper[256]
Definition: ascii.cc:119
bool ascii_isprint(unsigned char c)
Definition: ascii.h:135
bool ascii_isspace(unsigned char c)
Definition: ascii.h:93
bool ascii_isalpha(unsigned char c)
Definition: ascii.h:78
void AsciiStrToUpper(std::string *s)
Definition: ascii.cc:163
void RemoveExtraAsciiWhitespace(std::string *str)
Definition: ascii.cc:169
Definition: algorithm.h:29
const char kToLower[256]
Definition: ascii.cc:81
char ascii_toupper(unsigned char c)
Definition: ascii.h:179
bool ascii_isupper(unsigned char c)
Definition: ascii.h:145
#define ABSL_MUST_USE_RESULT
Definition: attributes.h:449
constexpr const_iterator begin() const noexcept
Definition: string_view.h:203
ABSL_MUST_USE_RESULT absl::string_view StripAsciiWhitespace(absl::string_view str)
Definition: ascii.h:223
ABSL_MUST_USE_RESULT absl::string_view StripLeadingAsciiWhitespace(absl::string_view str)
Definition: ascii.h:195
bool ascii_islower(unsigned char c)
Definition: ascii.h:150
bool ascii_ispunct(unsigned char c)
Definition: ascii.h:100
bool ascii_isascii(unsigned char c)
Definition: ascii.h:155
bool ascii_isxdigit(unsigned char c)
Definition: ascii.h:122
bool ascii_isdigit(unsigned char c)
Definition: ascii.h:130
char ascii_tolower(unsigned char c)
Definition: ascii.h:161
bool ascii_iscntrl(unsigned char c)
Definition: ascii.h:114
const unsigned char kPropertyBits[256]
Definition: ascii.cc:59
constexpr const_iterator end() const noexcept
Definition: string_view.h:210
ABSL_MUST_USE_RESULT absl::string_view StripTrailingAsciiWhitespace(absl::string_view str)
Definition: ascii.h:209


abseil_cpp
Author(s):
autogenerated on Mon Feb 28 2022 21:31:17