str_split.h
Go to the documentation of this file.
00001 //
00002 // Copyright 2017 The Abseil Authors.
00003 //
00004 // Licensed under the Apache License, Version 2.0 (the "License");
00005 // you may not use this file except in compliance with the License.
00006 // You may obtain a copy of the License at
00007 //
00008 //      https://www.apache.org/licenses/LICENSE-2.0
00009 //
00010 // Unless required by applicable law or agreed to in writing, software
00011 // distributed under the License is distributed on an "AS IS" BASIS,
00012 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013 // See the License for the specific language governing permissions and
00014 // limitations under the License.
00015 //
00016 // -----------------------------------------------------------------------------
00017 // File: str_split.h
00018 // -----------------------------------------------------------------------------
00019 //
00020 // This file contains functions for splitting strings. It defines the main
00021 // `StrSplit()` function, several delimiters for determining the boundaries on
00022 // which to split the string, and predicates for filtering delimited results.
00023 // `StrSplit()` adapts the returned collection to the type specified by the
00024 // caller.
00025 //
00026 // Example:
00027 //
00028 //   // Splits the given string on commas. Returns the results in a
00029 //   // vector of strings.
00030 //   std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
00031 //   // Can also use ","
00032 //   // v[0] == "a", v[1] == "b", v[2] == "c"
00033 //
00034 // See StrSplit() below for more information.
00035 #ifndef ABSL_STRINGS_STR_SPLIT_H_
00036 #define ABSL_STRINGS_STR_SPLIT_H_
00037 
00038 #include <algorithm>
00039 #include <cstddef>
00040 #include <map>
00041 #include <set>
00042 #include <string>
00043 #include <utility>
00044 #include <vector>
00045 
00046 #include "absl/base/internal/raw_logging.h"
00047 #include "absl/strings/internal/str_split_internal.h"
00048 #include "absl/strings/string_view.h"
00049 #include "absl/strings/strip.h"
00050 
00051 namespace absl {
00052 
00053 //------------------------------------------------------------------------------
00054 // Delimiters
00055 //------------------------------------------------------------------------------
00056 //
00057 // `StrSplit()` uses delimiters to define the boundaries between elements in the
00058 // provided input. Several `Delimiter` types are defined below. If a string
00059 // (`const char*`, `std::string`, or `absl::string_view`) is passed in place of
00060 // an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it
00061 // were passed a `ByString` delimiter.
00062 //
00063 // A `Delimiter` is an object with a `Find()` function that knows how to find
00064 // the first occurrence of itself in a given `absl::string_view`.
00065 //
00066 // The following `Delimiter` types are available for use within `StrSplit()`:
00067 //
00068 //   - `ByString` (default for string arguments)
00069 //   - `ByChar` (default for a char argument)
00070 //   - `ByAnyChar`
00071 //   - `ByLength`
00072 //   - `MaxSplits`
00073 //
00074 // A Delimiter's `Find()` member function will be passed an input `text` that is
00075 // to be split and a position (`pos`) to begin searching for the next delimiter
00076 // in `text`. The returned absl::string_view should refer to the next occurrence
00077 // (after `pos`) of the represented delimiter; this returned absl::string_view
00078 // represents the next location where the input `text` should be broken.
00079 //
00080 // The returned absl::string_view may be zero-length if the Delimiter does not
00081 // represent a part of the string (e.g., a fixed-length delimiter). If no
00082 // delimiter is found in the input `text`, a zero-length absl::string_view
00083 // referring to `text.end()` should be returned (e.g.,
00084 // `text.substr(text.size())`). It is important that the returned
00085 // absl::string_view always be within the bounds of the input `text` given as an
00086 // argument--it must not refer to a string that is physically located outside of
00087 // the given string.
00088 //
00089 // The following example is a simple Delimiter object that is created with a
00090 // single char and will look for that char in the text passed to the `Find()`
00091 // function:
00092 //
00093 //   struct SimpleDelimiter {
00094 //     const char c_;
00095 //     explicit SimpleDelimiter(char c) : c_(c) {}
00096 //     absl::string_view Find(absl::string_view text, size_t pos) {
00097 //       auto found = text.find(c_, pos);
00098 //       if (found == absl::string_view::npos)
00099 //         return text.substr(text.size());
00100 //
00101 //       return text.substr(found, 1);
00102 //     }
00103 //   };
00104 
00105 // ByString
00106 //
00107 // A sub-string delimiter. If `StrSplit()` is passed a string in place of a
00108 // `Delimiter` object, the string will be implicitly converted into a
00109 // `ByString` delimiter.
00110 //
00111 // Example:
00112 //
00113 //   // Because a string literal is converted to an `absl::ByString`,
00114 //   // the following two splits are equivalent.
00115 //
00116 //   std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", ");
00117 //
00118 //   using absl::ByString;
00119 //   std::vector<std::string> v2 = absl::StrSplit("a, b, c",
00120 //                                                ByString(", "));
00121 //   // v[0] == "a", v[1] == "b", v[2] == "c"
00122 class ByString {
00123  public:
00124   explicit ByString(absl::string_view sp);
00125   absl::string_view Find(absl::string_view text, size_t pos) const;
00126 
00127  private:
00128   const std::string delimiter_;
00129 };
00130 
00131 // ByChar
00132 //
00133 // A single character delimiter. `ByChar` is functionally equivalent to a
00134 // 1-char string within a `ByString` delimiter, but slightly more efficient.
00135 //
00136 // Example:
00137 //
00138 //   // Because a char literal is converted to a absl::ByChar,
00139 //   // the following two splits are equivalent.
00140 //   std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
00141 //   using absl::ByChar;
00142 //   std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(','));
00143 //   // v[0] == "a", v[1] == "b", v[2] == "c"
00144 //
00145 // `ByChar` is also the default delimiter if a single character is given
00146 // as the delimiter to `StrSplit()`. For example, the following calls are
00147 // equivalent:
00148 //
00149 //   std::vector<std::string> v = absl::StrSplit("a-b", '-');
00150 //
00151 //   using absl::ByChar;
00152 //   std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-'));
00153 //
00154 class ByChar {
00155  public:
00156   explicit ByChar(char c) : c_(c) {}
00157   absl::string_view Find(absl::string_view text, size_t pos) const;
00158 
00159  private:
00160   char c_;
00161 };
00162 
00163 // ByAnyChar
00164 //
00165 // A delimiter that will match any of the given byte-sized characters within
00166 // its provided string.
00167 //
00168 // Note: this delimiter works with single-byte string data, but does not work
00169 // with variable-width encodings, such as UTF-8.
00170 //
00171 // Example:
00172 //
00173 //   using absl::ByAnyChar;
00174 //   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
00175 //   // v[0] == "a", v[1] == "b", v[2] == "c"
00176 //
00177 // If `ByAnyChar` is given the empty string, it behaves exactly like
00178 // `ByString` and matches each individual character in the input string.
00179 //
00180 class ByAnyChar {
00181  public:
00182   explicit ByAnyChar(absl::string_view sp);
00183   absl::string_view Find(absl::string_view text, size_t pos) const;
00184 
00185  private:
00186   const std::string delimiters_;
00187 };
00188 
00189 // ByLength
00190 //
00191 // A delimiter for splitting into equal-length strings. The length argument to
00192 // the constructor must be greater than 0.
00193 //
00194 // Note: this delimiter works with single-byte string data, but does not work
00195 // with variable-width encodings, such as UTF-8.
00196 //
00197 // Example:
00198 //
00199 //   using absl::ByLength;
00200 //   std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3));
00201 
00202 //   // v[0] == "123", v[1] == "456", v[2] == "789"
00203 //
00204 // Note that the string does not have to be a multiple of the fixed split
00205 // length. In such a case, the last substring will be shorter.
00206 //
00207 //   using absl::ByLength;
00208 //   std::vector<std::string> v = absl::StrSplit("12345", ByLength(2));
00209 //
00210 //   // v[0] == "12", v[1] == "34", v[2] == "5"
00211 class ByLength {
00212  public:
00213   explicit ByLength(ptrdiff_t length);
00214   absl::string_view Find(absl::string_view text, size_t pos) const;
00215 
00216  private:
00217   const ptrdiff_t length_;
00218 };
00219 
00220 namespace strings_internal {
00221 
00222 // A traits-like metafunction for selecting the default Delimiter object type
00223 // for a particular Delimiter type. The base case simply exposes type Delimiter
00224 // itself as the delimiter's Type. However, there are specializations for
00225 // string-like objects that map them to the ByString delimiter object.
00226 // This allows functions like absl::StrSplit() and absl::MaxSplits() to accept
00227 // string-like objects (e.g., ',') as delimiter arguments but they will be
00228 // treated as if a ByString delimiter was given.
00229 template <typename Delimiter>
00230 struct SelectDelimiter {
00231   using type = Delimiter;
00232 };
00233 
00234 template <>
00235 struct SelectDelimiter<char> {
00236   using type = ByChar;
00237 };
00238 template <>
00239 struct SelectDelimiter<char*> {
00240   using type = ByString;
00241 };
00242 template <>
00243 struct SelectDelimiter<const char*> {
00244   using type = ByString;
00245 };
00246 template <>
00247 struct SelectDelimiter<absl::string_view> {
00248   using type = ByString;
00249 };
00250 template <>
00251 struct SelectDelimiter<std::string> {
00252   using type = ByString;
00253 };
00254 
00255 // Wraps another delimiter and sets a max number of matches for that delimiter.
00256 template <typename Delimiter>
00257 class MaxSplitsImpl {
00258  public:
00259   MaxSplitsImpl(Delimiter delimiter, int limit)
00260       : delimiter_(delimiter), limit_(limit), count_(0) {}
00261   absl::string_view Find(absl::string_view text, size_t pos) {
00262     if (count_++ == limit_) {
00263       return absl::string_view(text.data() + text.size(),
00264                                0);  // No more matches.
00265     }
00266     return delimiter_.Find(text, pos);
00267   }
00268 
00269  private:
00270   Delimiter delimiter_;
00271   const int limit_;
00272   int count_;
00273 };
00274 
00275 }  // namespace strings_internal
00276 
00277 // MaxSplits()
00278 //
00279 // A delimiter that limits the number of matches which can occur to the passed
00280 // `limit`. The last element in the returned collection will contain all
00281 // remaining unsplit pieces, which may contain instances of the delimiter.
00282 // The collection will contain at most `limit` + 1 elements.
00283 // Example:
00284 //
00285 //   using absl::MaxSplits;
00286 //   std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1));
00287 //
00288 //   // v[0] == "a", v[1] == "b,c"
00289 template <typename Delimiter>
00290 inline strings_internal::MaxSplitsImpl<
00291     typename strings_internal::SelectDelimiter<Delimiter>::type>
00292 MaxSplits(Delimiter delimiter, int limit) {
00293   typedef
00294       typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType;
00295   return strings_internal::MaxSplitsImpl<DelimiterType>(
00296       DelimiterType(delimiter), limit);
00297 }
00298 
00299 //------------------------------------------------------------------------------
00300 // Predicates
00301 //------------------------------------------------------------------------------
00302 //
00303 // Predicates filter the results of a `StrSplit()` by determining whether or not
00304 // a resultant element is included in the result set. A predicate may be passed
00305 // as an optional third argument to the `StrSplit()` function.
00306 //
00307 // Predicates are unary functions (or functors) that take a single
00308 // `absl::string_view` argument and return a bool indicating whether the
00309 // argument should be included (`true`) or excluded (`false`).
00310 //
00311 // Predicates are useful when filtering out empty substrings. By default, empty
00312 // substrings may be returned by `StrSplit()`, which is similar to the way split
00313 // functions work in other programming languages.
00314 
00315 // AllowEmpty()
00316 //
00317 // Always returns `true`, indicating that all strings--including empty
00318 // strings--should be included in the split output. This predicate is not
00319 // strictly needed because this is the default behavior of `StrSplit()`;
00320 // however, it might be useful at some call sites to make the intent explicit.
00321 //
00322 // Example:
00323 //
00324 //  std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty());
00325 //
00326 //  // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == ""
00327 struct AllowEmpty {
00328   bool operator()(absl::string_view) const { return true; }
00329 };
00330 
00331 // SkipEmpty()
00332 //
00333 // Returns `false` if the given `absl::string_view` is empty, indicating that
00334 // `StrSplit()` should omit the empty string.
00335 //
00336 // Example:
00337 //
00338 //   std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty());
00339 //
00340 //   // v[0] == "a", v[1] == "b"
00341 //
00342 // Note: `SkipEmpty()` does not consider a string containing only whitespace
00343 // to be empty. To skip such whitespace as well, use the `SkipWhitespace()`
00344 // predicate.
00345 struct SkipEmpty {
00346   bool operator()(absl::string_view sp) const { return !sp.empty(); }
00347 };
00348 
00349 // SkipWhitespace()
00350 //
00351 // Returns `false` if the given `absl::string_view` is empty *or* contains only
00352 // whitespace, indicating that `StrSplit()` should omit the string.
00353 //
00354 // Example:
00355 //
00356 //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
00357 //                                               ',', SkipWhitespace());
00358 //   // v[0] == " a ", v[1] == "b"
00359 //
00360 //   // SkipEmpty() would return whitespace elements
00361 //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty());
00362 //   // v[0] == " a ", v[1] == " ", v[2] == "b"
00363 struct SkipWhitespace {
00364   bool operator()(absl::string_view sp) const {
00365     sp = absl::StripAsciiWhitespace(sp);
00366     return !sp.empty();
00367   }
00368 };
00369 
00370 //------------------------------------------------------------------------------
00371 //                                  StrSplit()
00372 //------------------------------------------------------------------------------
00373 
00374 // StrSplit()
00375 //
00376 // Splits a given string based on the provided `Delimiter` object, returning the
00377 // elements within the type specified by the caller. Optionally, you may pass a
00378 // `Predicate` to `StrSplit()` indicating whether to include or exclude the
00379 // resulting element within the final result set. (See the overviews for
00380 // Delimiters and Predicates above.)
00381 //
00382 // Example:
00383 //
00384 //   std::vector<std::string> v = absl::StrSplit("a,b,c,d", ',');
00385 //   // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
00386 //
00387 // You can also provide an explicit `Delimiter` object:
00388 //
00389 // Example:
00390 //
00391 //   using absl::ByAnyChar;
00392 //   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
00393 //   // v[0] == "a", v[1] == "b", v[2] == "c"
00394 //
00395 // See above for more information on delimiters.
00396 //
00397 // By default, empty strings are included in the result set. You can optionally
00398 // include a third `Predicate` argument to apply a test for whether the
00399 // resultant element should be included in the result set:
00400 //
00401 // Example:
00402 //
00403 //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
00404 //                                               ',', SkipWhitespace());
00405 //   // v[0] == " a ", v[1] == "b"
00406 //
00407 // See above for more information on predicates.
00408 //
00409 //------------------------------------------------------------------------------
00410 // StrSplit() Return Types
00411 //------------------------------------------------------------------------------
00412 //
00413 // The `StrSplit()` function adapts the returned collection to the collection
00414 // specified by the caller (e.g. `std::vector` above). The returned collections
00415 // may contain `std::string`, `absl::string_view` (in which case the original
00416 // string being split must ensure that it outlives the collection), or any
00417 // object that can be explicitly created from an `absl::string_view`. This
00418 // behavior works for:
00419 //
00420 // 1) All standard STL containers including `std::vector`, `std::list`,
00421 //    `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap`
00422 // 2) `std::pair` (which is not actually a container). See below.
00423 //
00424 // Example:
00425 //
00426 //   // The results are returned as `absl::string_view` objects. Note that we
00427 //   // have to ensure that the input string outlives any results.
00428 //   std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
00429 //
00430 //   // Stores results in a std::set<std::string>, which also performs
00431 //   // de-duplication and orders the elements in ascending order.
00432 //   std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ',');
00433 //   // v[0] == "a", v[1] == "b", v[2] = "c"
00434 //
00435 //   // `StrSplit()` can be used within a range-based for loop, in which case
00436 //   // each element will be of type `absl::string_view`.
00437 //   std::vector<std::string> v;
00438 //   for (const auto sv : absl::StrSplit("a,b,c", ',')) {
00439 //     if (sv != "b") v.emplace_back(sv);
00440 //   }
00441 //   // v[0] == "a", v[1] == "c"
00442 //
00443 //   // Stores results in a map. The map implementation assumes that the input
00444 //   // is provided as a series of key/value pairs. For example, the 0th element
00445 //   // resulting from the split will be stored as a key to the 1st element. If
00446 //   // an odd number of elements are resolved, the last element is paired with
00447 //   // a default-constructed value (e.g., empty string).
00448 //   std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ',');
00449 //   // m["a"] == "b", m["c"] == ""     // last component value equals ""
00450 //
00451 // Splitting to `std::pair` is an interesting case because it can hold only two
00452 // elements and is not a collection type. When splitting to a `std::pair` the
00453 // first two split strings become the `std::pair` `.first` and `.second`
00454 // members, respectively. The remaining split substrings are discarded. If there
00455 // are less than two split substrings, the empty string is used for the
00456 // corresponding
00457 // `std::pair` member.
00458 //
00459 // Example:
00460 //
00461 //   // Stores first two split strings as the members in a std::pair.
00462 //   std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
00463 //   // p.first == "a", p.second == "b"       // "c" is omitted.
00464 //
00465 // The `StrSplit()` function can be used multiple times to perform more
00466 // complicated splitting logic, such as intelligently parsing key-value pairs.
00467 //
00468 // Example:
00469 //
00470 //   // The input string "a=b=c,d=e,f=,g" becomes
00471 //   // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
00472 //   std::map<std::string, std::string> m;
00473 //   for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
00474 //     m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
00475 //   }
00476 //   EXPECT_EQ("b=c", m.find("a")->second);
00477 //   EXPECT_EQ("e", m.find("d")->second);
00478 //   EXPECT_EQ("", m.find("f")->second);
00479 //   EXPECT_EQ("", m.find("g")->second);
00480 //
00481 // WARNING: Due to a legacy bug that is maintained for backward compatibility,
00482 // splitting the following empty string_views produces different results:
00483 //
00484 //   absl::StrSplit(absl::string_view(""), '-');  // {""}
00485 //   absl::StrSplit(absl::string_view(), '-');    // {}, but should be {""}
00486 //
00487 // Try not to depend on this distinction because the bug may one day be fixed.
00488 template <typename Delimiter>
00489 strings_internal::Splitter<
00490     typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty>
00491 StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) {
00492   using DelimiterType =
00493       typename strings_internal::SelectDelimiter<Delimiter>::type;
00494   return strings_internal::Splitter<DelimiterType, AllowEmpty>(
00495       std::move(text), DelimiterType(d), AllowEmpty());
00496 }
00497 
00498 template <typename Delimiter, typename Predicate>
00499 strings_internal::Splitter<
00500     typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate>
00501 StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d,
00502          Predicate p) {
00503   using DelimiterType =
00504       typename strings_internal::SelectDelimiter<Delimiter>::type;
00505   return strings_internal::Splitter<DelimiterType, Predicate>(
00506       std::move(text), DelimiterType(d), std::move(p));
00507 }
00508 
00509 }  // namespace absl
00510 
00511 #endif  // ABSL_STRINGS_STR_SPLIT_H_


abseil_cpp
Author(s):
autogenerated on Wed Jun 19 2019 19:42:15