str_split.h
Go to the documentation of this file.
1 //
2 // Copyright 2017 The Abseil Authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // https://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // -----------------------------------------------------------------------------
17 // File: str_split.h
18 // -----------------------------------------------------------------------------
19 //
20 // This file contains functions for splitting strings. It defines the main
21 // `StrSplit()` function, several delimiters for determining the boundaries on
22 // which to split the string, and predicates for filtering delimited results.
23 // `StrSplit()` adapts the returned collection to the type specified by the
24 // caller.
25 //
26 // Example:
27 //
28 // // Splits the given string on commas. Returns the results in a
29 // // vector of strings.
30 // std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
31 // // Can also use ","
32 // // v[0] == "a", v[1] == "b", v[2] == "c"
33 //
34 // See StrSplit() below for more information.
35 #ifndef ABSL_STRINGS_STR_SPLIT_H_
36 #define ABSL_STRINGS_STR_SPLIT_H_
37 
38 #include <algorithm>
39 #include <cstddef>
40 #include <map>
41 #include <set>
42 #include <string>
43 #include <utility>
44 #include <vector>
45 
49 #include "absl/strings/strip.h"
50 
51 namespace absl {
52 
53 //------------------------------------------------------------------------------
54 // Delimiters
55 //------------------------------------------------------------------------------
56 //
57 // `StrSplit()` uses delimiters to define the boundaries between elements in the
58 // provided input. Several `Delimiter` types are defined below. If a string
59 // (`const char*`, `std::string`, or `absl::string_view`) is passed in place of
60 // an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it
61 // were passed a `ByString` delimiter.
62 //
63 // A `Delimiter` is an object with a `Find()` function that knows how to find
64 // the first occurrence of itself in a given `absl::string_view`.
65 //
66 // The following `Delimiter` types are available for use within `StrSplit()`:
67 //
68 // - `ByString` (default for string arguments)
69 // - `ByChar` (default for a char argument)
70 // - `ByAnyChar`
71 // - `ByLength`
72 // - `MaxSplits`
73 //
74 // A Delimiter's `Find()` member function will be passed an input `text` that is
75 // to be split and a position (`pos`) to begin searching for the next delimiter
76 // in `text`. The returned absl::string_view should refer to the next occurrence
77 // (after `pos`) of the represented delimiter; this returned absl::string_view
78 // represents the next location where the input `text` should be broken.
79 //
80 // The returned absl::string_view may be zero-length if the Delimiter does not
81 // represent a part of the string (e.g., a fixed-length delimiter). If no
82 // delimiter is found in the input `text`, a zero-length absl::string_view
83 // referring to `text.end()` should be returned (e.g.,
84 // `text.substr(text.size())`). It is important that the returned
85 // absl::string_view always be within the bounds of the input `text` given as an
86 // argument--it must not refer to a string that is physically located outside of
87 // the given string.
88 //
89 // The following example is a simple Delimiter object that is created with a
90 // single char and will look for that char in the text passed to the `Find()`
91 // function:
92 //
93 // struct SimpleDelimiter {
94 // const char c_;
95 // explicit SimpleDelimiter(char c) : c_(c) {}
96 // absl::string_view Find(absl::string_view text, size_t pos) {
97 // auto found = text.find(c_, pos);
98 // if (found == absl::string_view::npos)
99 // return text.substr(text.size());
100 //
101 // return text.substr(found, 1);
102 // }
103 // };
104 
105 // ByString
106 //
107 // A sub-string delimiter. If `StrSplit()` is passed a string in place of a
108 // `Delimiter` object, the string will be implicitly converted into a
109 // `ByString` delimiter.
110 //
111 // Example:
112 //
113 // // Because a string literal is converted to an `absl::ByString`,
114 // // the following two splits are equivalent.
115 //
116 // std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", ");
117 //
118 // using absl::ByString;
119 // std::vector<std::string> v2 = absl::StrSplit("a, b, c",
120 // ByString(", "));
121 // // v[0] == "a", v[1] == "b", v[2] == "c"
122 class ByString {
123  public:
124  explicit ByString(absl::string_view sp);
125  absl::string_view Find(absl::string_view text, size_t pos) const;
126 
127  private:
128  const std::string delimiter_;
129 };
130 
131 // ByChar
132 //
133 // A single character delimiter. `ByChar` is functionally equivalent to a
134 // 1-char string within a `ByString` delimiter, but slightly more efficient.
135 //
136 // Example:
137 //
138 // // Because a char literal is converted to a absl::ByChar,
139 // // the following two splits are equivalent.
140 // std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
141 // using absl::ByChar;
142 // std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(','));
143 // // v[0] == "a", v[1] == "b", v[2] == "c"
144 //
145 // `ByChar` is also the default delimiter if a single character is given
146 // as the delimiter to `StrSplit()`. For example, the following calls are
147 // equivalent:
148 //
149 // std::vector<std::string> v = absl::StrSplit("a-b", '-');
150 //
151 // using absl::ByChar;
152 // std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-'));
153 //
154 class ByChar {
155  public:
156  explicit ByChar(char c) : c_(c) {}
157  absl::string_view Find(absl::string_view text, size_t pos) const;
158 
159  private:
160  char c_;
161 };
162 
163 // ByAnyChar
164 //
165 // A delimiter that will match any of the given byte-sized characters within
166 // its provided string.
167 //
168 // Note: this delimiter works with single-byte string data, but does not work
169 // with variable-width encodings, such as UTF-8.
170 //
171 // Example:
172 //
173 // using absl::ByAnyChar;
174 // std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
175 // // v[0] == "a", v[1] == "b", v[2] == "c"
176 //
177 // If `ByAnyChar` is given the empty string, it behaves exactly like
178 // `ByString` and matches each individual character in the input string.
179 //
180 class ByAnyChar {
181  public:
182  explicit ByAnyChar(absl::string_view sp);
183  absl::string_view Find(absl::string_view text, size_t pos) const;
184 
185  private:
186  const std::string delimiters_;
187 };
188 
189 // ByLength
190 //
191 // A delimiter for splitting into equal-length strings. The length argument to
192 // the constructor must be greater than 0.
193 //
194 // Note: this delimiter works with single-byte string data, but does not work
195 // with variable-width encodings, such as UTF-8.
196 //
197 // Example:
198 //
199 // using absl::ByLength;
200 // std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3));
201 
202 // // v[0] == "123", v[1] == "456", v[2] == "789"
203 //
204 // Note that the string does not have to be a multiple of the fixed split
205 // length. In such a case, the last substring will be shorter.
206 //
207 // using absl::ByLength;
208 // std::vector<std::string> v = absl::StrSplit("12345", ByLength(2));
209 //
210 // // v[0] == "12", v[1] == "34", v[2] == "5"
211 class ByLength {
212  public:
213  explicit ByLength(ptrdiff_t length);
214  absl::string_view Find(absl::string_view text, size_t pos) const;
215 
216  private:
217  const ptrdiff_t length_;
218 };
219 
220 namespace strings_internal {
221 
222 // A traits-like metafunction for selecting the default Delimiter object type
223 // for a particular Delimiter type. The base case simply exposes type Delimiter
224 // itself as the delimiter's Type. However, there are specializations for
225 // string-like objects that map them to the ByString delimiter object.
226 // This allows functions like absl::StrSplit() and absl::MaxSplits() to accept
227 // string-like objects (e.g., ',') as delimiter arguments but they will be
228 // treated as if a ByString delimiter was given.
229 template <typename Delimiter>
231  using type = Delimiter;
232 };
233 
234 template <>
235 struct SelectDelimiter<char> {
236  using type = ByChar;
237 };
238 template <>
239 struct SelectDelimiter<char*> {
240  using type = ByString;
241 };
242 template <>
243 struct SelectDelimiter<const char*> {
244  using type = ByString;
245 };
246 template <>
248  using type = ByString;
249 };
250 template <>
251 struct SelectDelimiter<std::string> {
252  using type = ByString;
253 };
254 
255 // Wraps another delimiter and sets a max number of matches for that delimiter.
256 template <typename Delimiter>
258  public:
259  MaxSplitsImpl(Delimiter delimiter, int limit)
260  : delimiter_(delimiter), limit_(limit), count_(0) {}
262  if (count_++ == limit_) {
263  return absl::string_view(text.data() + text.size(),
264  0); // No more matches.
265  }
266  return delimiter_.Find(text, pos);
267  }
268 
269  private:
270  Delimiter delimiter_;
271  const int limit_;
272  int count_;
273 };
274 
275 } // namespace strings_internal
276 
277 // MaxSplits()
278 //
279 // A delimiter that limits the number of matches which can occur to the passed
280 // `limit`. The last element in the returned collection will contain all
281 // remaining unsplit pieces, which may contain instances of the delimiter.
282 // The collection will contain at most `limit` + 1 elements.
283 // Example:
284 //
285 // using absl::MaxSplits;
286 // std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1));
287 //
288 // // v[0] == "a", v[1] == "b,c"
289 template <typename Delimiter>
292 MaxSplits(Delimiter delimiter, int limit) {
293  typedef
296  DelimiterType(delimiter), limit);
297 }
298 
299 //------------------------------------------------------------------------------
300 // Predicates
301 //------------------------------------------------------------------------------
302 //
303 // Predicates filter the results of a `StrSplit()` by determining whether or not
304 // a resultant element is included in the result set. A predicate may be passed
305 // as an optional third argument to the `StrSplit()` function.
306 //
307 // Predicates are unary functions (or functors) that take a single
308 // `absl::string_view` argument and return a bool indicating whether the
309 // argument should be included (`true`) or excluded (`false`).
310 //
311 // Predicates are useful when filtering out empty substrings. By default, empty
312 // substrings may be returned by `StrSplit()`, which is similar to the way split
313 // functions work in other programming languages.
314 
315 // AllowEmpty()
316 //
317 // Always returns `true`, indicating that all strings--including empty
318 // strings--should be included in the split output. This predicate is not
319 // strictly needed because this is the default behavior of `StrSplit()`;
320 // however, it might be useful at some call sites to make the intent explicit.
321 //
322 // Example:
323 //
324 // std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty());
325 //
326 // // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == ""
327 struct AllowEmpty {
328  bool operator()(absl::string_view) const { return true; }
329 };
330 
331 // SkipEmpty()
332 //
333 // Returns `false` if the given `absl::string_view` is empty, indicating that
334 // `StrSplit()` should omit the empty string.
335 //
336 // Example:
337 //
338 // std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty());
339 //
340 // // v[0] == "a", v[1] == "b"
341 //
342 // Note: `SkipEmpty()` does not consider a string containing only whitespace
343 // to be empty. To skip such whitespace as well, use the `SkipWhitespace()`
344 // predicate.
345 struct SkipEmpty {
346  bool operator()(absl::string_view sp) const { return !sp.empty(); }
347 };
348 
349 // SkipWhitespace()
350 //
351 // Returns `false` if the given `absl::string_view` is empty *or* contains only
352 // whitespace, indicating that `StrSplit()` should omit the string.
353 //
354 // Example:
355 //
356 // std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
357 // ',', SkipWhitespace());
358 // // v[0] == " a ", v[1] == "b"
359 //
360 // // SkipEmpty() would return whitespace elements
361 // std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty());
362 // // v[0] == " a ", v[1] == " ", v[2] == "b"
364  bool operator()(absl::string_view sp) const {
366  return !sp.empty();
367  }
368 };
369 
370 //------------------------------------------------------------------------------
371 // StrSplit()
372 //------------------------------------------------------------------------------
373 
374 // StrSplit()
375 //
376 // Splits a given string based on the provided `Delimiter` object, returning the
377 // elements within the type specified by the caller. Optionally, you may pass a
378 // `Predicate` to `StrSplit()` indicating whether to include or exclude the
379 // resulting element within the final result set. (See the overviews for
380 // Delimiters and Predicates above.)
381 //
382 // Example:
383 //
384 // std::vector<std::string> v = absl::StrSplit("a,b,c,d", ',');
385 // // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
386 //
387 // You can also provide an explicit `Delimiter` object:
388 //
389 // Example:
390 //
391 // using absl::ByAnyChar;
392 // std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
393 // // v[0] == "a", v[1] == "b", v[2] == "c"
394 //
395 // See above for more information on delimiters.
396 //
397 // By default, empty strings are included in the result set. You can optionally
398 // include a third `Predicate` argument to apply a test for whether the
399 // resultant element should be included in the result set:
400 //
401 // Example:
402 //
403 // std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
404 // ',', SkipWhitespace());
405 // // v[0] == " a ", v[1] == "b"
406 //
407 // See above for more information on predicates.
408 //
409 //------------------------------------------------------------------------------
410 // StrSplit() Return Types
411 //------------------------------------------------------------------------------
412 //
413 // The `StrSplit()` function adapts the returned collection to the collection
414 // specified by the caller (e.g. `std::vector` above). The returned collections
415 // may contain `std::string`, `absl::string_view` (in which case the original
416 // string being split must ensure that it outlives the collection), or any
417 // object that can be explicitly created from an `absl::string_view`. This
418 // behavior works for:
419 //
420 // 1) All standard STL containers including `std::vector`, `std::list`,
421 // `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap`
422 // 2) `std::pair` (which is not actually a container). See below.
423 //
424 // Example:
425 //
426 // // The results are returned as `absl::string_view` objects. Note that we
427 // // have to ensure that the input string outlives any results.
428 // std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
429 //
430 // // Stores results in a std::set<std::string>, which also performs
431 // // de-duplication and orders the elements in ascending order.
432 // std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ',');
433 // // v[0] == "a", v[1] == "b", v[2] = "c"
434 //
435 // // `StrSplit()` can be used within a range-based for loop, in which case
436 // // each element will be of type `absl::string_view`.
437 // std::vector<std::string> v;
438 // for (const auto sv : absl::StrSplit("a,b,c", ',')) {
439 // if (sv != "b") v.emplace_back(sv);
440 // }
441 // // v[0] == "a", v[1] == "c"
442 //
443 // // Stores results in a map. The map implementation assumes that the input
444 // // is provided as a series of key/value pairs. For example, the 0th element
445 // // resulting from the split will be stored as a key to the 1st element. If
446 // // an odd number of elements are resolved, the last element is paired with
447 // // a default-constructed value (e.g., empty string).
448 // std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ',');
449 // // m["a"] == "b", m["c"] == "" // last component value equals ""
450 //
451 // Splitting to `std::pair` is an interesting case because it can hold only two
452 // elements and is not a collection type. When splitting to a `std::pair` the
453 // first two split strings become the `std::pair` `.first` and `.second`
454 // members, respectively. The remaining split substrings are discarded. If there
455 // are less than two split substrings, the empty string is used for the
456 // corresponding
457 // `std::pair` member.
458 //
459 // Example:
460 //
461 // // Stores first two split strings as the members in a std::pair.
462 // std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
463 // // p.first == "a", p.second == "b" // "c" is omitted.
464 //
465 // The `StrSplit()` function can be used multiple times to perform more
466 // complicated splitting logic, such as intelligently parsing key-value pairs.
467 //
468 // Example:
469 //
470 // // The input string "a=b=c,d=e,f=,g" becomes
471 // // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
472 // std::map<std::string, std::string> m;
473 // for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
474 // m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
475 // }
476 // EXPECT_EQ("b=c", m.find("a")->second);
477 // EXPECT_EQ("e", m.find("d")->second);
478 // EXPECT_EQ("", m.find("f")->second);
479 // EXPECT_EQ("", m.find("g")->second);
480 //
481 // WARNING: Due to a legacy bug that is maintained for backward compatibility,
482 // splitting the following empty string_views produces different results:
483 //
484 // absl::StrSplit(absl::string_view(""), '-'); // {""}
485 // absl::StrSplit(absl::string_view(), '-'); // {}, but should be {""}
486 //
487 // Try not to depend on this distinction because the bug may one day be fixed.
488 template <typename Delimiter>
492  using DelimiterType =
493  typename strings_internal::SelectDelimiter<Delimiter>::type;
495  std::move(text), DelimiterType(d), AllowEmpty());
496 }
497 
498 template <typename Delimiter, typename Predicate>
500  typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate>
502  Predicate p) {
503  using DelimiterType =
504  typename strings_internal::SelectDelimiter<Delimiter>::type;
506  std::move(text), DelimiterType(d), std::move(p));
507 }
508 
509 } // namespace absl
510 
511 #endif // ABSL_STRINGS_STR_SPLIT_H_
const ptrdiff_t length_
Definition: str_split.h:217
const std::string delimiter_
Definition: str_split.h:128
bool operator()(absl::string_view) const
Definition: str_split.h:328
strings_internal::MaxSplitsImpl< typename strings_internal::SelectDelimiter< Delimiter >::type > MaxSplits(Delimiter delimiter, int limit)
Definition: str_split.h:292
MaxSplitsImpl(Delimiter delimiter, int limit)
Definition: str_split.h:259
Definition: algorithm.h:29
constexpr size_type size() const noexcept
Definition: string_view.h:260
strings_internal::Splitter< typename strings_internal::SelectDelimiter< Delimiter >::type, AllowEmpty > StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d)
Definition: str_split.h:491
absl::string_view Find(absl::string_view text, size_t pos) const
Definition: str_split.cc:85
absl::string_view Find(absl::string_view text, size_t pos)
Definition: str_split.h:261
ABSL_MUST_USE_RESULT absl::string_view StripAsciiWhitespace(absl::string_view str)
Definition: ascii.h:223
ByChar(char c)
Definition: str_split.h:156
constexpr bool empty() const noexcept
Definition: string_view.h:277
const std::string delimiters_
Definition: str_split.h:186
constexpr const_pointer data() const noexcept
Definition: string_view.h:302
bool operator()(absl::string_view sp) const
Definition: str_split.h:364
bool operator()(absl::string_view sp) const
Definition: str_split.h:346
constexpr absl::remove_reference_t< T > && move(T &&t) noexcept
Definition: utility.h:219
std::size_t length
Definition: test_util.cc:52
ByString(absl::string_view sp)
Definition: str_split.cc:83


abseil_cpp
Author(s):
autogenerated on Mon Feb 28 2022 21:31:20