ascii.cc
Go to the documentation of this file.
00001 // Copyright 2017 The Abseil Authors.
00002 //
00003 // Licensed under the Apache License, Version 2.0 (the "License");
00004 // you may not use this file except in compliance with the License.
00005 // You may obtain a copy of the License at
00006 //
00007 //      https://www.apache.org/licenses/LICENSE-2.0
00008 //
00009 // Unless required by applicable law or agreed to in writing, software
00010 // distributed under the License is distributed on an "AS IS" BASIS,
00011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 // See the License for the specific language governing permissions and
00013 // limitations under the License.
00014 
00015 #include "absl/strings/ascii.h"
00016 
00017 namespace absl {
00018 namespace ascii_internal {
00019 
00020 // # Table generated by this Python code (bit 0x02 is currently unused):
00021 // TODO(mbar) Move Python code for generation of table to BUILD and link here.
00022 
00023 // NOTE: The kAsciiPropertyBits table used within this code was generated by
00024 // Python code of the following form. (Bit 0x02 is currently unused and
00025 // available.)
00026 //
00027 // def Hex2(n):
00028 //   return '0x' + hex(n/16)[2:] + hex(n%16)[2:]
00029 // def IsPunct(ch):
00030 //   return (ord(ch) >= 32 and ord(ch) < 127 and
00031 //           not ch.isspace() and not ch.isalnum())
00032 // def IsBlank(ch):
00033 //   return ch in ' \t'
00034 // def IsCntrl(ch):
00035 //   return ord(ch) < 32 or ord(ch) == 127
00036 // def IsXDigit(ch):
00037 //   return ch.isdigit() or ch.lower() in 'abcdef'
00038 // for i in range(128):
00039 //   ch = chr(i)
00040 //   mask = ((ch.isalpha() and 0x01 or 0) |
00041 //           (ch.isalnum() and 0x04 or 0) |
00042 //           (ch.isspace() and 0x08 or 0) |
00043 //           (IsPunct(ch) and 0x10 or 0) |
00044 //           (IsBlank(ch) and 0x20 or 0) |
00045 //           (IsCntrl(ch) and 0x40 or 0) |
00046 //           (IsXDigit(ch) and 0x80 or 0))
00047 //   print Hex2(mask) + ',',
00048 //   if i % 16 == 7:
00049 //     print ' //', Hex2(i & 0x78)
00050 //   elif i % 16 == 15:
00051 //     print
00052 
00053 // clang-format off
00054 // Array of bitfields holding character information. Each bit value corresponds
00055 // to a particular character feature. For readability, and because the value
00056 // of these bits is tightly coupled to this implementation, the individual bits
00057 // are not named. Note that bitfields for all characters above ASCII 127 are
00058 // zero-initialized.
00059 const unsigned char kPropertyBits[256] = {
00060     0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x00
00061     0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40,
00062     0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x10
00063     0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
00064     0x28, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,  // 0x20
00065     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
00066     0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84,  // 0x30
00067     0x84, 0x84, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
00068     0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05,  // 0x40
00069     0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
00070     0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,  // 0x50
00071     0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x10,
00072     0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05,  // 0x60
00073     0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
00074     0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,  // 0x70
00075     0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x40,
00076 };
00077 
00078 // Array of characters for the ascii_tolower() function. For values 'A'
00079 // through 'Z', return the lower-case character; otherwise, return the
00080 // identity of the passed character.
00081 const char kToLower[256] = {
00082   '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
00083   '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
00084   '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
00085   '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
00086   '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
00087   '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
00088   '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
00089   '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
00090   '\x40',    'a',    'b',    'c',    'd',    'e',    'f',    'g',
00091      'h',    'i',    'j',    'k',    'l',    'm',    'n',    'o',
00092      'p',    'q',    'r',    's',    't',    'u',    'v',    'w',
00093      'x',    'y',    'z', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
00094   '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
00095   '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
00096   '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
00097   '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
00098   '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
00099   '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
00100   '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
00101   '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
00102   '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
00103   '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
00104   '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
00105   '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
00106   '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
00107   '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
00108   '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
00109   '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
00110   '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
00111   '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
00112   '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
00113   '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
00114 };
00115 
00116 // Array of characters for the ascii_toupper() function. For values 'a'
00117 // through 'z', return the upper-case character; otherwise, return the
00118 // identity of the passed character.
00119 const char kToUpper[256] = {
00120   '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
00121   '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
00122   '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
00123   '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
00124   '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
00125   '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
00126   '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
00127   '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
00128   '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
00129   '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
00130   '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
00131   '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
00132   '\x60',    'A',    'B',    'C',    'D',    'E',    'F',    'G',
00133      'H',    'I',    'J',    'K',    'L',    'M',    'N',    'O',
00134      'P',    'Q',    'R',    'S',    'T',    'U',    'V',    'W',
00135      'X',    'Y',    'Z', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
00136   '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
00137   '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
00138   '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
00139   '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
00140   '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
00141   '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
00142   '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
00143   '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
00144   '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
00145   '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
00146   '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
00147   '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
00148   '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
00149   '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
00150   '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
00151   '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
00152 };
00153 // clang-format on
00154 
00155 }  // namespace ascii_internal
00156 
00157 void AsciiStrToLower(std::string* s) {
00158   for (auto& ch : *s) {
00159     ch = absl::ascii_tolower(ch);
00160   }
00161 }
00162 
00163 void AsciiStrToUpper(std::string* s) {
00164   for (auto& ch : *s) {
00165     ch = absl::ascii_toupper(ch);
00166   }
00167 }
00168 
00169 void RemoveExtraAsciiWhitespace(std::string* str) {
00170   auto stripped = StripAsciiWhitespace(*str);
00171 
00172   if (stripped.empty()) {
00173     str->clear();
00174     return;
00175   }
00176 
00177   auto input_it = stripped.begin();
00178   auto input_end = stripped.end();
00179   auto output_it = &(*str)[0];
00180   bool is_ws = false;
00181 
00182   for (; input_it < input_end; ++input_it) {
00183     if (is_ws) {
00184       // Consecutive whitespace?  Keep only the last.
00185       is_ws = absl::ascii_isspace(*input_it);
00186       if (is_ws) --output_it;
00187     } else {
00188       is_ws = absl::ascii_isspace(*input_it);
00189     }
00190 
00191     *output_it = *input_it;
00192     ++output_it;
00193   }
00194 
00195   str->erase(output_it - &(*str)[0]);
00196 }
00197 
00198 }  // namespace absl


abseil_cpp
Author(s):
autogenerated on Wed Jun 19 2019 19:42:14