00001 // 00002 // Copyright 2017 The Abseil Authors. 00003 // 00004 // Licensed under the Apache License, Version 2.0 (the "License"); 00005 // you may not use this file except in compliance with the License. 00006 // You may obtain a copy of the License at 00007 // 00008 // https://www.apache.org/licenses/LICENSE-2.0 00009 // 00010 // Unless required by applicable law or agreed to in writing, software 00011 // distributed under the License is distributed on an "AS IS" BASIS, 00012 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00013 // See the License for the specific language governing permissions and 00014 // limitations under the License. 00015 // 00016 // ----------------------------------------------------------------------------- 00017 // File: escaping.h 00018 // ----------------------------------------------------------------------------- 00019 // 00020 // This header file contains string utilities involved in escaping and 00021 // unescaping strings in various ways. 00022 00023 #ifndef ABSL_STRINGS_ESCAPING_H_ 00024 #define ABSL_STRINGS_ESCAPING_H_ 00025 00026 #include <cstddef> 00027 #include <string> 00028 #include <vector> 00029 00030 #include "absl/base/macros.h" 00031 #include "absl/strings/ascii.h" 00032 #include "absl/strings/str_join.h" 00033 #include "absl/strings/string_view.h" 00034 00035 namespace absl { 00036 00037 // CUnescape() 00038 // 00039 // Unescapes a `source` string and copies it into `dest`, rewriting C-style 00040 // escape sequences (https://en.cppreference.com/w/cpp/language/escape) into 00041 // their proper code point equivalents, returning `true` if successful. 00042 // 00043 // The following unescape sequences can be handled: 00044 // 00045 // * ASCII escape sequences ('\n','\r','\\', etc.) to their ASCII equivalents 00046 // * Octal escape sequences ('\nnn') to byte nnn. The unescaped value must 00047 // resolve to a single byte or an error will occur. E.g. values greater than 00048 // 0xff will produce an error. 00049 // * Hexadecimal escape sequences ('\xnn') to byte nn. While an arbitrary 00050 // number of following digits are allowed, the unescaped value must resolve 00051 // to a single byte or an error will occur. E.g. '\x0045' is equivalent to 00052 // '\x45', but '\x1234' will produce an error. 00053 // * Unicode escape sequences ('\unnnn' for exactly four hex digits or 00054 // '\Unnnnnnnn' for exactly eight hex digits, which will be encoded in 00055 // UTF-8. (E.g., `\u2019` unescapes to the three bytes 0xE2, 0x80, and 00056 // 0x99). 00057 // 00058 // If any errors are encountered, this function returns `false`, leaving the 00059 // `dest` output parameter in an unspecified state, and stores the first 00060 // encountered error in `error`. To disable error reporting, set `error` to 00061 // `nullptr` or use the overload with no error reporting below. 00062 // 00063 // Example: 00064 // 00065 // std::string s = "foo\\rbar\\nbaz\\t"; 00066 // std::string unescaped_s; 00067 // if (!absl::CUnescape(s, &unescaped_s) { 00068 // ... 00069 // } 00070 // EXPECT_EQ(unescaped_s, "foo\rbar\nbaz\t"); 00071 bool CUnescape(absl::string_view source, std::string* dest, std::string* error); 00072 00073 // Overload of `CUnescape()` with no error reporting. 00074 inline bool CUnescape(absl::string_view source, std::string* dest) { 00075 return CUnescape(source, dest, nullptr); 00076 } 00077 00078 // CEscape() 00079 // 00080 // Escapes a 'src' string using C-style escapes sequences 00081 // (https://en.cppreference.com/w/cpp/language/escape), escaping other 00082 // non-printable/non-whitespace bytes as octal sequences (e.g. "\377"). 00083 // 00084 // Example: 00085 // 00086 // std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n"; 00087 // std::string escaped_s = absl::CEscape(s); 00088 // EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\010\\t\\n\\013\\014\\r\\n"); 00089 std::string CEscape(absl::string_view src); 00090 00091 // CHexEscape() 00092 // 00093 // Escapes a 'src' string using C-style escape sequences, escaping 00094 // other non-printable/non-whitespace bytes as hexadecimal sequences (e.g. 00095 // "\xFF"). 00096 // 00097 // Example: 00098 // 00099 // std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n"; 00100 // std::string escaped_s = absl::CHexEscape(s); 00101 // EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\x08\\t\\n\\x0b\\x0c\\r\\n"); 00102 std::string CHexEscape(absl::string_view src); 00103 00104 // Utf8SafeCEscape() 00105 // 00106 // Escapes a 'src' string using C-style escape sequences, escaping bytes as 00107 // octal sequences, and passing through UTF-8 characters without conversion. 00108 // I.e., when encountering any bytes with their high bit set, this function 00109 // will not escape those values, whether or not they are valid UTF-8. 00110 std::string Utf8SafeCEscape(absl::string_view src); 00111 00112 // Utf8SafeCHexEscape() 00113 // 00114 // Escapes a 'src' string using C-style escape sequences, escaping bytes as 00115 // hexadecimal sequences, and passing through UTF-8 characters without 00116 // conversion. 00117 std::string Utf8SafeCHexEscape(absl::string_view src); 00118 00119 // Base64Unescape() 00120 // 00121 // Converts a `src` string encoded in Base64 to its binary equivalent, writing 00122 // it to a `dest` buffer, returning `true` on success. If `src` contains invalid 00123 // characters, `dest` is cleared and returns `false`. 00124 bool Base64Unescape(absl::string_view src, std::string* dest); 00125 00126 // WebSafeBase64Unescape() 00127 // 00128 // Converts a `src` string encoded in Base64 to its binary equivalent, writing 00129 // it to a `dest` buffer, but using '-' instead of '+', and '_' instead of '/'. 00130 // If `src` contains invalid characters, `dest` is cleared and returns `false`. 00131 bool WebSafeBase64Unescape(absl::string_view src, std::string* dest); 00132 00133 // Base64Escape() 00134 // 00135 // Encodes a `src` string into a base64-encoded string, with padding characters. 00136 // This function conforms with RFC 4648 section 4 (base64). 00137 void Base64Escape(absl::string_view src, std::string* dest); 00138 std::string Base64Escape(absl::string_view src); 00139 00140 // WebSafeBase64Escape() 00141 // 00142 // Encodes a `src` string into a base64-like string, using '-' instead of '+' 00143 // and '_' instead of '/', and without padding. This function conforms with RFC 00144 // 4648 section 5 (base64url). 00145 void WebSafeBase64Escape(absl::string_view src, std::string* dest); 00146 std::string WebSafeBase64Escape(absl::string_view src); 00147 00148 // HexStringToBytes() 00149 // 00150 // Converts an ASCII hex string into bytes, returning binary data of length 00151 // `from.size()/2`. 00152 std::string HexStringToBytes(absl::string_view from); 00153 00154 // BytesToHexString() 00155 // 00156 // Converts binary data into an ASCII text string, returning a string of size 00157 // `2*from.size()`. 00158 std::string BytesToHexString(absl::string_view from); 00159 00160 } // namespace absl 00161 00162 #endif // ABSL_STRINGS_ESCAPING_H_