00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "absl/strings/ascii.h"
00016
00017 namespace absl {
00018 namespace ascii_internal {
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059 const unsigned char kPropertyBits[256] = {
00060 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
00061 0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40,
00062 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
00063 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
00064 0x28, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
00065 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
00066 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84,
00067 0x84, 0x84, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
00068 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05,
00069 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
00070 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
00071 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x10,
00072 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05,
00073 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
00074 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
00075 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x40,
00076 };
00077
00078
00079
00080
00081 const char kToLower[256] = {
00082 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
00083 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
00084 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
00085 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
00086 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
00087 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
00088 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
00089 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
00090 '\x40', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
00091 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
00092 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
00093 'x', 'y', 'z', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
00094 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
00095 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
00096 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
00097 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
00098 '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
00099 '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
00100 '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
00101 '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
00102 '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
00103 '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
00104 '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
00105 '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
00106 '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
00107 '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
00108 '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
00109 '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
00110 '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
00111 '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
00112 '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
00113 '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
00114 };
00115
00116
00117
00118
00119 const char kToUpper[256] = {
00120 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
00121 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
00122 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
00123 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
00124 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
00125 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
00126 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
00127 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
00128 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
00129 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
00130 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
00131 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
00132 '\x60', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
00133 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
00134 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
00135 'X', 'Y', 'Z', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
00136 '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
00137 '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
00138 '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
00139 '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
00140 '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
00141 '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
00142 '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
00143 '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
00144 '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
00145 '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
00146 '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
00147 '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
00148 '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
00149 '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
00150 '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
00151 '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
00152 };
00153
00154
00155 }
00156
00157 void AsciiStrToLower(std::string* s) {
00158 for (auto& ch : *s) {
00159 ch = absl::ascii_tolower(ch);
00160 }
00161 }
00162
00163 void AsciiStrToUpper(std::string* s) {
00164 for (auto& ch : *s) {
00165 ch = absl::ascii_toupper(ch);
00166 }
00167 }
00168
00169 void RemoveExtraAsciiWhitespace(std::string* str) {
00170 auto stripped = StripAsciiWhitespace(*str);
00171
00172 if (stripped.empty()) {
00173 str->clear();
00174 return;
00175 }
00176
00177 auto input_it = stripped.begin();
00178 auto input_end = stripped.end();
00179 auto output_it = &(*str)[0];
00180 bool is_ws = false;
00181
00182 for (; input_it < input_end; ++input_it) {
00183 if (is_ws) {
00184
00185 is_ws = absl::ascii_isspace(*input_it);
00186 if (is_ws) --output_it;
00187 } else {
00188 is_ws = absl::ascii_isspace(*input_it);
00189 }
00190
00191 *output_it = *input_it;
00192 ++output_it;
00193 }
00194
00195 str->erase(output_it - &(*str)[0]);
00196 }
00197
00198 }