str_split_internal.h
Go to the documentation of this file.
00001 // Copyright 2017 The Abseil Authors.
00002 //
00003 // Licensed under the Apache License, Version 2.0 (the "License");
00004 // you may not use this file except in compliance with the License.
00005 // You may obtain a copy of the License at
00006 //
00007 //      https://www.apache.org/licenses/LICENSE-2.0
00008 //
00009 // Unless required by applicable law or agreed to in writing, software
00010 // distributed under the License is distributed on an "AS IS" BASIS,
00011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 // See the License for the specific language governing permissions and
00013 // limitations under the License.
00014 //
00015 
00016 // This file declares INTERNAL parts of the Split API that are inline/templated
00017 // or otherwise need to be available at compile time. The main abstractions
00018 // defined in here are
00019 //
00020 //   - ConvertibleToStringView
00021 //   - SplitIterator<>
00022 //   - Splitter<>
00023 //
00024 // DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
00025 // absl/strings/str_split.h.
00026 //
00027 // IWYU pragma: private, include "absl/strings/str_split.h"
00028 
00029 #ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
00030 #define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
00031 
00032 #include <array>
00033 #include <initializer_list>
00034 #include <iterator>
00035 #include <map>
00036 #include <type_traits>
00037 #include <utility>
00038 #include <vector>
00039 
00040 #include "absl/base/macros.h"
00041 #include "absl/base/port.h"
00042 #include "absl/meta/type_traits.h"
00043 #include "absl/strings/string_view.h"
00044 
00045 #ifdef _GLIBCXX_DEBUG
00046 #include "absl/strings/internal/stl_type_traits.h"
00047 #endif  // _GLIBCXX_DEBUG
00048 
00049 namespace absl {
00050 namespace strings_internal {
00051 
00052 // This class is implicitly constructible from everything that absl::string_view
00053 // is implicitly constructible from. If it's constructed from a temporary
00054 // string, the data is moved into a data member so its lifetime matches that of
00055 // the ConvertibleToStringView instance.
00056 class ConvertibleToStringView {
00057  public:
00058   ConvertibleToStringView(const char* s)  // NOLINT(runtime/explicit)
00059       : value_(s) {}
00060   ConvertibleToStringView(char* s) : value_(s) {}  // NOLINT(runtime/explicit)
00061   ConvertibleToStringView(absl::string_view s)     // NOLINT(runtime/explicit)
00062       : value_(s) {}
00063   ConvertibleToStringView(const std::string& s)  // NOLINT(runtime/explicit)
00064       : value_(s) {}
00065 
00066   // Matches rvalue strings and moves their data to a member.
00067 ConvertibleToStringView(std::string&& s)  // NOLINT(runtime/explicit)
00068     : copy_(std::move(s)), value_(copy_) {}
00069 
00070   ConvertibleToStringView(const ConvertibleToStringView& other)
00071       : copy_(other.copy_),
00072         value_(other.IsSelfReferential() ? copy_ : other.value_) {}
00073 
00074   ConvertibleToStringView(ConvertibleToStringView&& other) {
00075     StealMembers(std::move(other));
00076   }
00077 
00078   ConvertibleToStringView& operator=(ConvertibleToStringView other) {
00079     StealMembers(std::move(other));
00080     return *this;
00081   }
00082 
00083   absl::string_view value() const { return value_; }
00084 
00085  private:
00086   // Returns true if ctsp's value refers to its internal copy_ member.
00087   bool IsSelfReferential() const { return value_.data() == copy_.data(); }
00088 
00089   void StealMembers(ConvertibleToStringView&& other) {
00090     if (other.IsSelfReferential()) {
00091       copy_ = std::move(other.copy_);
00092       value_ = copy_;
00093       other.value_ = other.copy_;
00094     } else {
00095       value_ = other.value_;
00096     }
00097   }
00098 
00099   // Holds the data moved from temporary std::string arguments. Declared first
00100   // so that 'value' can refer to 'copy_'.
00101   std::string copy_;
00102   absl::string_view value_;
00103 };
00104 
00105 // An iterator that enumerates the parts of a string from a Splitter. The text
00106 // to be split, the Delimiter, and the Predicate are all taken from the given
00107 // Splitter object. Iterators may only be compared if they refer to the same
00108 // Splitter instance.
00109 //
00110 // This class is NOT part of the public splitting API.
00111 template <typename Splitter>
00112 class SplitIterator {
00113  public:
00114   using iterator_category = std::input_iterator_tag;
00115   using value_type = absl::string_view;
00116   using difference_type = ptrdiff_t;
00117   using pointer = const value_type*;
00118   using reference = const value_type&;
00119 
00120   enum State { kInitState, kLastState, kEndState };
00121   SplitIterator(State state, const Splitter* splitter)
00122       : pos_(0),
00123         state_(state),
00124         splitter_(splitter),
00125         delimiter_(splitter->delimiter()),
00126         predicate_(splitter->predicate()) {
00127     // Hack to maintain backward compatibility. This one block makes it so an
00128     // empty absl::string_view whose .data() happens to be nullptr behaves
00129     // *differently* from an otherwise empty absl::string_view whose .data() is
00130     // not nullptr. This is an undesirable difference in general, but this
00131     // behavior is maintained to avoid breaking existing code that happens to
00132     // depend on this old behavior/bug. Perhaps it will be fixed one day. The
00133     // difference in behavior is as follows:
00134     //   Split(absl::string_view(""), '-');  // {""}
00135     //   Split(absl::string_view(), '-');    // {}
00136     if (splitter_->text().data() == nullptr) {
00137       state_ = kEndState;
00138       pos_ = splitter_->text().size();
00139       return;
00140     }
00141 
00142     if (state_ == kEndState) {
00143       pos_ = splitter_->text().size();
00144     } else {
00145       ++(*this);
00146     }
00147   }
00148 
00149   bool at_end() const { return state_ == kEndState; }
00150 
00151   reference operator*() const { return curr_; }
00152   pointer operator->() const { return &curr_; }
00153 
00154   SplitIterator& operator++() {
00155     do {
00156       if (state_ == kLastState) {
00157         state_ = kEndState;
00158         return *this;
00159       }
00160       const absl::string_view text = splitter_->text();
00161       const absl::string_view d = delimiter_.Find(text, pos_);
00162       if (d.data() == text.data() + text.size()) state_ = kLastState;
00163       curr_ = text.substr(pos_, d.data() - (text.data() + pos_));
00164       pos_ += curr_.size() + d.size();
00165     } while (!predicate_(curr_));
00166     return *this;
00167   }
00168 
00169   SplitIterator operator++(int) {
00170     SplitIterator old(*this);
00171     ++(*this);
00172     return old;
00173   }
00174 
00175   friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
00176     return a.state_ == b.state_ && a.pos_ == b.pos_;
00177   }
00178 
00179   friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
00180     return !(a == b);
00181   }
00182 
00183  private:
00184   size_t pos_;
00185   State state_;
00186   absl::string_view curr_;
00187   const Splitter* splitter_;
00188   typename Splitter::DelimiterType delimiter_;
00189   typename Splitter::PredicateType predicate_;
00190 };
00191 
00192 // HasMappedType<T>::value is true iff there exists a type T::mapped_type.
00193 template <typename T, typename = void>
00194 struct HasMappedType : std::false_type {};
00195 template <typename T>
00196 struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
00197     : std::true_type {};
00198 
00199 // HasValueType<T>::value is true iff there exists a type T::value_type.
00200 template <typename T, typename = void>
00201 struct HasValueType : std::false_type {};
00202 template <typename T>
00203 struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
00204 };
00205 
00206 // HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
00207 template <typename T, typename = void>
00208 struct HasConstIterator : std::false_type {};
00209 template <typename T>
00210 struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
00211     : std::true_type {};
00212 
00213 // IsInitializerList<T>::value is true iff T is an std::initializer_list. More
00214 // details below in Splitter<> where this is used.
00215 std::false_type IsInitializerListDispatch(...);  // default: No
00216 template <typename T>
00217 std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
00218 template <typename T>
00219 struct IsInitializerList
00220     : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
00221 
00222 // A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
00223 // is true for type 'C'.
00224 //
00225 // Restricts conversion to container-like types (by testing for the presence of
00226 // a const_iterator member type) and also to disable conversion to an
00227 // std::initializer_list (which also has a const_iterator). Otherwise, code
00228 // compiled in C++11 will get an error due to ambiguous conversion paths (in
00229 // C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
00230 // or an std::initializer_list<T>).
00231 
00232 template <typename C, bool has_value_type, bool has_mapped_type>
00233 struct SplitterIsConvertibleToImpl : std::false_type {};
00234 
00235 template <typename C>
00236 struct SplitterIsConvertibleToImpl<C, true, false>
00237     : std::is_constructible<typename C::value_type, absl::string_view> {};
00238 
00239 template <typename C>
00240 struct SplitterIsConvertibleToImpl<C, true, true>
00241     : absl::conjunction<
00242           std::is_constructible<typename C::key_type, absl::string_view>,
00243           std::is_constructible<typename C::mapped_type, absl::string_view>> {};
00244 
00245 template <typename C>
00246 struct SplitterIsConvertibleTo
00247     : SplitterIsConvertibleToImpl<
00248           C,
00249 #ifdef _GLIBCXX_DEBUG
00250           !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value &&
00251 #endif  // _GLIBCXX_DEBUG
00252               !IsInitializerList<
00253                   typename std::remove_reference<C>::type>::value &&
00254               HasValueType<C>::value && HasConstIterator<C>::value,
00255           HasMappedType<C>::value> {
00256 };
00257 
00258 // This class implements the range that is returned by absl::StrSplit(). This
00259 // class has templated conversion operators that allow it to be implicitly
00260 // converted to a variety of types that the caller may have specified on the
00261 // left-hand side of an assignment.
00262 //
00263 // The main interface for interacting with this class is through its implicit
00264 // conversion operators. However, this class may also be used like a container
00265 // in that it has .begin() and .end() member functions. It may also be used
00266 // within a range-for loop.
00267 //
00268 // Output containers can be collections of any type that is constructible from
00269 // an absl::string_view.
00270 //
00271 // An Predicate functor may be supplied. This predicate will be used to filter
00272 // the split strings: only strings for which the predicate returns true will be
00273 // kept. A Predicate object is any unary functor that takes an absl::string_view
00274 // and returns bool.
00275 template <typename Delimiter, typename Predicate>
00276 class Splitter {
00277  public:
00278   using DelimiterType = Delimiter;
00279   using PredicateType = Predicate;
00280   using const_iterator = strings_internal::SplitIterator<Splitter>;
00281   using value_type = typename std::iterator_traits<const_iterator>::value_type;
00282 
00283   Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p)
00284       : text_(std::move(input_text)),
00285         delimiter_(std::move(d)),
00286         predicate_(std::move(p)) {}
00287 
00288   absl::string_view text() const { return text_.value(); }
00289   const Delimiter& delimiter() const { return delimiter_; }
00290   const Predicate& predicate() const { return predicate_; }
00291 
00292   // Range functions that iterate the split substrings as absl::string_view
00293   // objects. These methods enable a Splitter to be used in a range-based for
00294   // loop.
00295   const_iterator begin() const { return {const_iterator::kInitState, this}; }
00296   const_iterator end() const { return {const_iterator::kEndState, this}; }
00297 
00298   // An implicit conversion operator that is restricted to only those containers
00299   // that the splitter is convertible to.
00300   template <typename Container,
00301             typename = typename std::enable_if<
00302                 SplitterIsConvertibleTo<Container>::value>::type>
00303   operator Container() const {  // NOLINT(runtime/explicit)
00304     return ConvertToContainer<Container, typename Container::value_type,
00305                               HasMappedType<Container>::value>()(*this);
00306   }
00307 
00308   // Returns a pair with its .first and .second members set to the first two
00309   // strings returned by the begin() iterator. Either/both of .first and .second
00310   // will be constructed with empty strings if the iterator doesn't have a
00311   // corresponding value.
00312   template <typename First, typename Second>
00313   operator std::pair<First, Second>() const {  // NOLINT(runtime/explicit)
00314     absl::string_view first, second;
00315     auto it = begin();
00316     if (it != end()) {
00317       first = *it;
00318       if (++it != end()) {
00319         second = *it;
00320       }
00321     }
00322     return {First(first), Second(second)};
00323   }
00324 
00325  private:
00326   // ConvertToContainer is a functor converting a Splitter to the requested
00327   // Container of ValueType. It is specialized below to optimize splitting to
00328   // certain combinations of Container and ValueType.
00329   //
00330   // This base template handles the generic case of storing the split results in
00331   // the requested non-map-like container and converting the split substrings to
00332   // the requested type.
00333   template <typename Container, typename ValueType, bool is_map = false>
00334   struct ConvertToContainer {
00335     Container operator()(const Splitter& splitter) const {
00336       Container c;
00337       auto it = std::inserter(c, c.end());
00338       for (const auto sp : splitter) {
00339         *it++ = ValueType(sp);
00340       }
00341       return c;
00342     }
00343   };
00344 
00345   // Partial specialization for a std::vector<absl::string_view>.
00346   //
00347   // Optimized for the common case of splitting to a
00348   // std::vector<absl::string_view>. In this case we first split the results to
00349   // a small array of absl::string_view on the stack, to reduce reallocations.
00350   template <typename A>
00351   struct ConvertToContainer<std::vector<absl::string_view, A>,
00352                             absl::string_view, false> {
00353     std::vector<absl::string_view, A> operator()(
00354         const Splitter& splitter) const {
00355       struct raw_view {
00356         const char* data;
00357         size_t size;
00358         operator absl::string_view() const {  // NOLINT(runtime/explicit)
00359           return {data, size};
00360         }
00361       };
00362       std::vector<absl::string_view, A> v;
00363       std::array<raw_view, 16> ar;
00364       for (auto it = splitter.begin(); !it.at_end();) {
00365         size_t index = 0;
00366         do {
00367           ar[index].data = it->data();
00368           ar[index].size = it->size();
00369           ++it;
00370         } while (++index != ar.size() && !it.at_end());
00371         v.insert(v.end(), ar.begin(), ar.begin() + index);
00372       }
00373       return v;
00374     }
00375   };
00376 
00377   // Partial specialization for a std::vector<std::string>.
00378   //
00379   // Optimized for the common case of splitting to a std::vector<std::string>.
00380   // In this case we first split the results to a std::vector<absl::string_view>
00381   // so the returned std::vector<std::string> can have space reserved to avoid
00382   // std::string moves.
00383   template <typename A>
00384   struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
00385     std::vector<std::string, A> operator()(const Splitter& splitter) const {
00386       const std::vector<absl::string_view> v = splitter;
00387       return std::vector<std::string, A>(v.begin(), v.end());
00388     }
00389   };
00390 
00391   // Partial specialization for containers of pairs (e.g., maps).
00392   //
00393   // The algorithm is to insert a new pair into the map for each even-numbered
00394   // item, with the even-numbered item as the key with a default-constructed
00395   // value. Each odd-numbered item will then be assigned to the last pair's
00396   // value.
00397   template <typename Container, typename First, typename Second>
00398   struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
00399     Container operator()(const Splitter& splitter) const {
00400       Container m;
00401       typename Container::iterator it;
00402       bool insert = true;
00403       for (const auto sp : splitter) {
00404         if (insert) {
00405           it = Inserter<Container>::Insert(&m, First(sp), Second());
00406         } else {
00407           it->second = Second(sp);
00408         }
00409         insert = !insert;
00410       }
00411       return m;
00412     }
00413 
00414     // Inserts the key and value into the given map, returning an iterator to
00415     // the inserted item. Specialized for std::map and std::multimap to use
00416     // emplace() and adapt emplace()'s return value.
00417     template <typename Map>
00418     struct Inserter {
00419       using M = Map;
00420       template <typename... Args>
00421       static typename M::iterator Insert(M* m, Args&&... args) {
00422         return m->insert(std::make_pair(std::forward<Args>(args)...)).first;
00423       }
00424     };
00425 
00426     template <typename... Ts>
00427     struct Inserter<std::map<Ts...>> {
00428       using M = std::map<Ts...>;
00429       template <typename... Args>
00430       static typename M::iterator Insert(M* m, Args&&... args) {
00431         return m->emplace(std::make_pair(std::forward<Args>(args)...)).first;
00432       }
00433     };
00434 
00435     template <typename... Ts>
00436     struct Inserter<std::multimap<Ts...>> {
00437       using M = std::multimap<Ts...>;
00438       template <typename... Args>
00439       static typename M::iterator Insert(M* m, Args&&... args) {
00440         return m->emplace(std::make_pair(std::forward<Args>(args)...));
00441       }
00442     };
00443   };
00444 
00445   ConvertibleToStringView text_;
00446   Delimiter delimiter_;
00447   Predicate predicate_;
00448 };
00449 
00450 }  // namespace strings_internal
00451 }  // namespace absl
00452 
00453 #endif  // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_


abseil_cpp
Author(s):
autogenerated on Wed Jun 19 2019 19:42:15