00001 // 00002 // Copyright 2017 The Abseil Authors. 00003 // 00004 // Licensed under the Apache License, Version 2.0 (the "License"); 00005 // you may not use this file except in compliance with the License. 00006 // You may obtain a copy of the License at 00007 // 00008 // https://www.apache.org/licenses/LICENSE-2.0 00009 // 00010 // Unless required by applicable law or agreed to in writing, software 00011 // distributed under the License is distributed on an "AS IS" BASIS, 00012 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00013 // See the License for the specific language governing permissions and 00014 // limitations under the License. 00015 // 00016 // ----------------------------------------------------------------------------- 00017 // File: ascii.h 00018 // ----------------------------------------------------------------------------- 00019 // 00020 // This package contains functions operating on characters and strings 00021 // restricted to standard ASCII. These include character classification 00022 // functions analogous to those found in the ANSI C Standard Library <ctype.h> 00023 // header file. 00024 // 00025 // C++ implementations provide <ctype.h> functionality based on their 00026 // C environment locale. In general, reliance on such a locale is not ideal, as 00027 // the locale standard is problematic (and may not return invariant information 00028 // for the same character set, for example). These `ascii_*()` functions are 00029 // hard-wired for standard ASCII, much faster, and guaranteed to behave 00030 // consistently. They will never be overloaded, nor will their function 00031 // signature change. 00032 // 00033 // `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`, 00034 // `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`, 00035 // `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`, 00036 // `ascii_isxdigit()` 00037 // Analogous to the <ctype.h> functions with similar names, these 00038 // functions take an unsigned char and return a bool, based on whether the 00039 // character matches the condition specified. 00040 // 00041 // If the input character has a numerical value greater than 127, these 00042 // functions return `false`. 00043 // 00044 // `ascii_tolower()`, `ascii_toupper()` 00045 // Analogous to the <ctype.h> functions with similar names, these functions 00046 // take an unsigned char and return a char. 00047 // 00048 // If the input character is not an ASCII {lower,upper}-case letter (including 00049 // numerical values greater than 127) then the functions return the same value 00050 // as the input character. 00051 00052 #ifndef ABSL_STRINGS_ASCII_H_ 00053 #define ABSL_STRINGS_ASCII_H_ 00054 00055 #include <algorithm> 00056 #include <string> 00057 00058 #include "absl/base/attributes.h" 00059 #include "absl/strings/string_view.h" 00060 00061 namespace absl { 00062 namespace ascii_internal { 00063 00064 // Declaration for an array of bitfields holding character information. 00065 extern const unsigned char kPropertyBits[256]; 00066 00067 // Declaration for the array of characters to upper-case characters. 00068 extern const char kToUpper[256]; 00069 00070 // Declaration for the array of characters to lower-case characters. 00071 extern const char kToLower[256]; 00072 00073 } // namespace ascii_internal 00074 00075 // ascii_isalpha() 00076 // 00077 // Determines whether the given character is an alphabetic character. 00078 inline bool ascii_isalpha(unsigned char c) { 00079 return (ascii_internal::kPropertyBits[c] & 0x01) != 0; 00080 } 00081 00082 // ascii_isalnum() 00083 // 00084 // Determines whether the given character is an alphanumeric character. 00085 inline bool ascii_isalnum(unsigned char c) { 00086 return (ascii_internal::kPropertyBits[c] & 0x04) != 0; 00087 } 00088 00089 // ascii_isspace() 00090 // 00091 // Determines whether the given character is a whitespace character (space, 00092 // tab, vertical tab, formfeed, linefeed, or carriage return). 00093 inline bool ascii_isspace(unsigned char c) { 00094 return (ascii_internal::kPropertyBits[c] & 0x08) != 0; 00095 } 00096 00097 // ascii_ispunct() 00098 // 00099 // Determines whether the given character is a punctuation character. 00100 inline bool ascii_ispunct(unsigned char c) { 00101 return (ascii_internal::kPropertyBits[c] & 0x10) != 0; 00102 } 00103 00104 // ascii_isblank() 00105 // 00106 // Determines whether the given character is a blank character (tab or space). 00107 inline bool ascii_isblank(unsigned char c) { 00108 return (ascii_internal::kPropertyBits[c] & 0x20) != 0; 00109 } 00110 00111 // ascii_iscntrl() 00112 // 00113 // Determines whether the given character is a control character. 00114 inline bool ascii_iscntrl(unsigned char c) { 00115 return (ascii_internal::kPropertyBits[c] & 0x40) != 0; 00116 } 00117 00118 // ascii_isxdigit() 00119 // 00120 // Determines whether the given character can be represented as a hexadecimal 00121 // digit character (i.e. {0-9} or {A-F}). 00122 inline bool ascii_isxdigit(unsigned char c) { 00123 return (ascii_internal::kPropertyBits[c] & 0x80) != 0; 00124 } 00125 00126 // ascii_isdigit() 00127 // 00128 // Determines whether the given character can be represented as a decimal 00129 // digit character (i.e. {0-9}). 00130 inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; } 00131 00132 // ascii_isprint() 00133 // 00134 // Determines whether the given character is printable, including whitespace. 00135 inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; } 00136 00137 // ascii_isgraph() 00138 // 00139 // Determines whether the given character has a graphical representation. 00140 inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; } 00141 00142 // ascii_isupper() 00143 // 00144 // Determines whether the given character is uppercase. 00145 inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; } 00146 00147 // ascii_islower() 00148 // 00149 // Determines whether the given character is lowercase. 00150 inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; } 00151 00152 // ascii_isascii() 00153 // 00154 // Determines whether the given character is ASCII. 00155 inline bool ascii_isascii(unsigned char c) { return c < 128; } 00156 00157 // ascii_tolower() 00158 // 00159 // Returns an ASCII character, converting to lowercase if uppercase is 00160 // passed. Note that character values > 127 are simply returned. 00161 inline char ascii_tolower(unsigned char c) { 00162 return ascii_internal::kToLower[c]; 00163 } 00164 00165 // Converts the characters in `s` to lowercase, changing the contents of `s`. 00166 void AsciiStrToLower(std::string* s); 00167 00168 // Creates a lowercase string from a given absl::string_view. 00169 ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) { 00170 std::string result(s); 00171 absl::AsciiStrToLower(&result); 00172 return result; 00173 } 00174 00175 // ascii_toupper() 00176 // 00177 // Returns the ASCII character, converting to upper-case if lower-case is 00178 // passed. Note that characters values > 127 are simply returned. 00179 inline char ascii_toupper(unsigned char c) { 00180 return ascii_internal::kToUpper[c]; 00181 } 00182 00183 // Converts the characters in `s` to uppercase, changing the contents of `s`. 00184 void AsciiStrToUpper(std::string* s); 00185 00186 // Creates an uppercase string from a given absl::string_view. 00187 ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) { 00188 std::string result(s); 00189 absl::AsciiStrToUpper(&result); 00190 return result; 00191 } 00192 00193 // Returns absl::string_view with whitespace stripped from the beginning of the 00194 // given string_view. 00195 ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace( 00196 absl::string_view str) { 00197 auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace); 00198 return str.substr(it - str.begin()); 00199 } 00200 00201 // Strips in place whitespace from the beginning of the given string. 00202 inline void StripLeadingAsciiWhitespace(std::string* str) { 00203 auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace); 00204 str->erase(str->begin(), it); 00205 } 00206 00207 // Returns absl::string_view with whitespace stripped from the end of the given 00208 // string_view. 00209 ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace( 00210 absl::string_view str) { 00211 auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace); 00212 return str.substr(0, str.rend() - it); 00213 } 00214 00215 // Strips in place whitespace from the end of the given string 00216 inline void StripTrailingAsciiWhitespace(std::string* str) { 00217 auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace); 00218 str->erase(str->rend() - it); 00219 } 00220 00221 // Returns absl::string_view with whitespace stripped from both ends of the 00222 // given string_view. 00223 ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace( 00224 absl::string_view str) { 00225 return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str)); 00226 } 00227 00228 // Strips in place whitespace from both ends of the given string 00229 inline void StripAsciiWhitespace(std::string* str) { 00230 StripTrailingAsciiWhitespace(str); 00231 StripLeadingAsciiWhitespace(str); 00232 } 00233 00234 // Removes leading, trailing, and consecutive internal whitespace. 00235 void RemoveExtraAsciiWhitespace(std::string*); 00236 00237 } // namespace absl 00238 00239 #endif // ABSL_STRINGS_ASCII_H_