LCOV - code coverage report
Current view: top level - llvm/ADT - StringRef.h (source / functions) Hit Total Coverage
Test: clang.info Lines: 1 1 100.0 %
Date: 2016-01-31 12:01:00 Functions: 1 1 100.0 %

          Line data    Source code
       1             : //===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : 
      10             : #ifndef LLVM_ADT_STRINGREF_H
      11             : #define LLVM_ADT_STRINGREF_H
      12             : 
      13             : #include <algorithm>
      14             : #include <cassert>
      15             : #include <cstring>
      16             : #include <limits>
      17             : #include <string>
      18             : #include <utility>
      19             : 
      20             : namespace llvm {
      21             :   template <typename T>
      22             :   class SmallVectorImpl;
      23             :   class APInt;
      24             :   class hash_code;
      25             :   class StringRef;
      26             : 
      27             :   /// Helper functions for StringRef::getAsInteger.
      28             :   bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
      29             :                             unsigned long long &Result);
      30             : 
      31             :   bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
      32             : 
      33             :   /// StringRef - Represent a constant reference to a string, i.e. a character
      34             :   /// array and a length, which need not be null terminated.
      35             :   ///
      36             :   /// This class does not own the string data, it is expected to be used in
      37             :   /// situations where the character data resides in some other buffer, whose
      38             :   /// lifetime extends past that of the StringRef. For this reason, it is not in
      39             :   /// general safe to store a StringRef.
      40             :   class StringRef {
      41             :   public:
      42             :     typedef const char *iterator;
      43             :     typedef const char *const_iterator;
      44             :     static const size_t npos = ~size_t(0);
      45             :     typedef size_t size_type;
      46             : 
      47             :   private:
      48             :     /// The start of the string, in an external buffer.
      49             :     const char *Data;
      50             : 
      51             :     /// The length of the string.
      52             :     size_t Length;
      53             : 
      54             :     // Workaround memcmp issue with null pointers (undefined behavior)
      55             :     // by providing a specialized version
      56             :     static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
      57             :       if (Length == 0) { return 0; }
      58             :       return ::memcmp(Lhs,Rhs,Length);
      59             :     }
      60             : 
      61             :   public:
      62             :     /// @name Constructors
      63             :     /// @{
      64             : 
      65             :     /// Construct an empty string ref.
      66             :     /*implicit*/ StringRef() : Data(nullptr), Length(0) {}
      67             : 
      68             :     /// Construct a string ref from a cstring.
      69             :     /*implicit*/ StringRef(const char *Str)
      70             :       : Data(Str) {
      71             :         assert(Str && "StringRef cannot be built from a NULL argument");
      72             :         Length = ::strlen(Str); // invoking strlen(NULL) is undefined behavior
      73             :       }
      74             : 
      75             :     /// Construct a string ref from a pointer and length.
      76             :     /*implicit*/ StringRef(const char *data, size_t length)
      77             :       : Data(data), Length(length) {
      78             :         assert((data || length == 0) &&
      79             :         "StringRef cannot be built from a NULL argument with non-null length");
      80             :       }
      81             : 
      82             :     /// Construct a string ref from an std::string.
      83             :     /*implicit*/ StringRef(const std::string &Str)
      84          41 :       : Data(Str.data()), Length(Str.length()) {}
      85             : 
      86             :     /// @}
      87             :     /// @name Iterators
      88             :     /// @{
      89             : 
      90             :     iterator begin() const { return Data; }
      91             : 
      92             :     iterator end() const { return Data + Length; }
      93             : 
      94             :     const unsigned char *bytes_begin() const {
      95             :       return reinterpret_cast<const unsigned char *>(begin());
      96             :     }
      97             :     const unsigned char *bytes_end() const {
      98             :       return reinterpret_cast<const unsigned char *>(end());
      99             :     }
     100             : 
     101             :     /// @}
     102             :     /// @name String Operations
     103             :     /// @{
     104             : 
     105             :     /// data - Get a pointer to the start of the string (which may not be null
     106             :     /// terminated).
     107             :     const char *data() const { return Data; }
     108             : 
     109             :     /// empty - Check if the string is empty.
     110             :     bool empty() const { return Length == 0; }
     111             : 
     112             :     /// size - Get the string size.
     113             :     size_t size() const { return Length; }
     114             : 
     115             :     /// front - Get the first character in the string.
     116             :     char front() const {
     117             :       assert(!empty());
     118             :       return Data[0];
     119             :     }
     120             : 
     121             :     /// back - Get the last character in the string.
     122             :     char back() const {
     123             :       assert(!empty());
     124             :       return Data[Length-1];
     125             :     }
     126             : 
     127             :     // copy - Allocate copy in Allocator and return StringRef to it.
     128             :     template <typename Allocator> StringRef copy(Allocator &A) const {
     129             :       char *S = A.template Allocate<char>(Length);
     130             :       std::copy(begin(), end(), S);
     131             :       return StringRef(S, Length);
     132             :     }
     133             : 
     134             :     /// equals - Check for string equality, this is more efficient than
     135             :     /// compare() when the relative ordering of inequal strings isn't needed.
     136             :     bool equals(StringRef RHS) const {
     137             :       return (Length == RHS.Length &&
     138             :               compareMemory(Data, RHS.Data, RHS.Length) == 0);
     139             :     }
     140             : 
     141             :     /// equals_lower - Check for string equality, ignoring case.
     142             :     bool equals_lower(StringRef RHS) const {
     143             :       return Length == RHS.Length && compare_lower(RHS) == 0;
     144             :     }
     145             : 
     146             :     /// compare - Compare two strings; the result is -1, 0, or 1 if this string
     147             :     /// is lexicographically less than, equal to, or greater than the \p RHS.
     148             :     int compare(StringRef RHS) const {
     149             :       // Check the prefix for a mismatch.
     150             :       if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
     151             :         return Res < 0 ? -1 : 1;
     152             : 
     153             :       // Otherwise the prefixes match, so we only need to check the lengths.
     154             :       if (Length == RHS.Length)
     155             :         return 0;
     156             :       return Length < RHS.Length ? -1 : 1;
     157             :     }
     158             : 
     159             :     /// compare_lower - Compare two strings, ignoring case.
     160             :     int compare_lower(StringRef RHS) const;
     161             : 
     162             :     /// compare_numeric - Compare two strings, treating sequences of digits as
     163             :     /// numbers.
     164             :     int compare_numeric(StringRef RHS) const;
     165             : 
     166             :     /// \brief Determine the edit distance between this string and another
     167             :     /// string.
     168             :     ///
     169             :     /// \param Other the string to compare this string against.
     170             :     ///
     171             :     /// \param AllowReplacements whether to allow character
     172             :     /// replacements (change one character into another) as a single
     173             :     /// operation, rather than as two operations (an insertion and a
     174             :     /// removal).
     175             :     ///
     176             :     /// \param MaxEditDistance If non-zero, the maximum edit distance that
     177             :     /// this routine is allowed to compute. If the edit distance will exceed
     178             :     /// that maximum, returns \c MaxEditDistance+1.
     179             :     ///
     180             :     /// \returns the minimum number of character insertions, removals,
     181             :     /// or (if \p AllowReplacements is \c true) replacements needed to
     182             :     /// transform one of the given strings into the other. If zero,
     183             :     /// the strings are identical.
     184             :     unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
     185             :                            unsigned MaxEditDistance = 0) const;
     186             : 
     187             :     /// str - Get the contents as an std::string.
     188             :     std::string str() const {
     189             :       if (!Data) return std::string();
     190             :       return std::string(Data, Length);
     191             :     }
     192             : 
     193             :     /// @}
     194             :     /// @name Operator Overloads
     195             :     /// @{
     196             : 
     197             :     char operator[](size_t Index) const {
     198             :       assert(Index < Length && "Invalid index!");
     199             :       return Data[Index];
     200             :     }
     201             : 
     202             :     /// @}
     203             :     /// @name Type Conversions
     204             :     /// @{
     205             : 
     206             :     operator std::string() const {
     207             :       return str();
     208             :     }
     209             : 
     210             :     /// @}
     211             :     /// @name String Predicates
     212             :     /// @{
     213             : 
     214             :     /// Check if this string starts with the given \p Prefix.
     215             :     bool startswith(StringRef Prefix) const {
     216             :       return Length >= Prefix.Length &&
     217             :              compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
     218             :     }
     219             : 
     220             :     /// Check if this string starts with the given \p Prefix, ignoring case.
     221             :     bool startswith_lower(StringRef Prefix) const;
     222             : 
     223             :     /// Check if this string ends with the given \p Suffix.
     224             :     bool endswith(StringRef Suffix) const {
     225             :       return Length >= Suffix.Length &&
     226             :         compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
     227             :     }
     228             : 
     229             :     /// Check if this string ends with the given \p Suffix, ignoring case.
     230             :     bool endswith_lower(StringRef Suffix) const;
     231             : 
     232             :     /// @}
     233             :     /// @name String Searching
     234             :     /// @{
     235             : 
     236             :     /// Search for the first character \p C in the string.
     237             :     ///
     238             :     /// \returns The index of the first occurrence of \p C, or npos if not
     239             :     /// found.
     240             :     size_t find(char C, size_t From = 0) const {
     241             :       size_t FindBegin = std::min(From, Length);
     242             :       if (FindBegin < Length) { // Avoid calling memchr with nullptr.
     243             :         // Just forward to memchr, which is faster than a hand-rolled loop.
     244             :         if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin))
     245             :           return static_cast<const char *>(P) - Data;
     246             :       }
     247             :       return npos;
     248             :     }
     249             : 
     250             :     /// Search for the first string \p Str in the string.
     251             :     ///
     252             :     /// \returns The index of the first occurrence of \p Str, or npos if not
     253             :     /// found.
     254             :     size_t find(StringRef Str, size_t From = 0) const;
     255             : 
     256             :     /// Search for the last character \p C in the string.
     257             :     ///
     258             :     /// \returns The index of the last occurrence of \p C, or npos if not
     259             :     /// found.
     260             :     size_t rfind(char C, size_t From = npos) const {
     261             :       From = std::min(From, Length);
     262             :       size_t i = From;
     263             :       while (i != 0) {
     264             :         --i;
     265             :         if (Data[i] == C)
     266             :           return i;
     267             :       }
     268             :       return npos;
     269             :     }
     270             : 
     271             :     /// Search for the last string \p Str in the string.
     272             :     ///
     273             :     /// \returns The index of the last occurrence of \p Str, or npos if not
     274             :     /// found.
     275             :     size_t rfind(StringRef Str) const;
     276             : 
     277             :     /// Find the first character in the string that is \p C, or npos if not
     278             :     /// found. Same as find.
     279             :     size_t find_first_of(char C, size_t From = 0) const {
     280             :       return find(C, From);
     281             :     }
     282             : 
     283             :     /// Find the first character in the string that is in \p Chars, or npos if
     284             :     /// not found.
     285             :     ///
     286             :     /// Complexity: O(size() + Chars.size())
     287             :     size_t find_first_of(StringRef Chars, size_t From = 0) const;
     288             : 
     289             :     /// Find the first character in the string that is not \p C or npos if not
     290             :     /// found.
     291             :     size_t find_first_not_of(char C, size_t From = 0) const;
     292             : 
     293             :     /// Find the first character in the string that is not in the string
     294             :     /// \p Chars, or npos if not found.
     295             :     ///
     296             :     /// Complexity: O(size() + Chars.size())
     297             :     size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
     298             : 
     299             :     /// Find the last character in the string that is \p C, or npos if not
     300             :     /// found.
     301             :     size_t find_last_of(char C, size_t From = npos) const {
     302             :       return rfind(C, From);
     303             :     }
     304             : 
     305             :     /// Find the last character in the string that is in \p C, or npos if not
     306             :     /// found.
     307             :     ///
     308             :     /// Complexity: O(size() + Chars.size())
     309             :     size_t find_last_of(StringRef Chars, size_t From = npos) const;
     310             : 
     311             :     /// Find the last character in the string that is not \p C, or npos if not
     312             :     /// found.
     313             :     size_t find_last_not_of(char C, size_t From = npos) const;
     314             : 
     315             :     /// Find the last character in the string that is not in \p Chars, or
     316             :     /// npos if not found.
     317             :     ///
     318             :     /// Complexity: O(size() + Chars.size())
     319             :     size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
     320             : 
     321             :     /// @}
     322             :     /// @name Helpful Algorithms
     323             :     /// @{
     324             : 
     325             :     /// Return the number of occurrences of \p C in the string.
     326             :     size_t count(char C) const {
     327             :       size_t Count = 0;
     328             :       for (size_t i = 0, e = Length; i != e; ++i)
     329             :         if (Data[i] == C)
     330             :           ++Count;
     331             :       return Count;
     332             :     }
     333             : 
     334             :     /// Return the number of non-overlapped occurrences of \p Str in
     335             :     /// the string.
     336             :     size_t count(StringRef Str) const;
     337             : 
     338             :     /// Parse the current string as an integer of the specified radix.  If
     339             :     /// \p Radix is specified as zero, this does radix autosensing using
     340             :     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
     341             :     ///
     342             :     /// If the string is invalid or if only a subset of the string is valid,
     343             :     /// this returns true to signify the error.  The string is considered
     344             :     /// erroneous if empty or if it overflows T.
     345             :     template <typename T>
     346             :     typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
     347             :     getAsInteger(unsigned Radix, T &Result) const {
     348             :       long long LLVal;
     349             :       if (getAsSignedInteger(*this, Radix, LLVal) ||
     350             :             static_cast<T>(LLVal) != LLVal)
     351             :         return true;
     352             :       Result = LLVal;
     353             :       return false;
     354             :     }
     355             : 
     356             :     template <typename T>
     357             :     typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
     358             :     getAsInteger(unsigned Radix, T &Result) const {
     359             :       unsigned long long ULLVal;
     360             :       // The additional cast to unsigned long long is required to avoid the
     361             :       // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
     362             :       // 'unsigned __int64' when instantiating getAsInteger with T = bool.
     363             :       if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
     364             :           static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
     365             :         return true;
     366             :       Result = ULLVal;
     367             :       return false;
     368             :     }
     369             : 
     370             :     /// Parse the current string as an integer of the specified \p Radix, or of
     371             :     /// an autosensed radix if the \p Radix given is 0.  The current value in
     372             :     /// \p Result is discarded, and the storage is changed to be wide enough to
     373             :     /// store the parsed integer.
     374             :     ///
     375             :     /// \returns true if the string does not solely consist of a valid
     376             :     /// non-empty number in the appropriate base.
     377             :     ///
     378             :     /// APInt::fromString is superficially similar but assumes the
     379             :     /// string is well-formed in the given radix.
     380             :     bool getAsInteger(unsigned Radix, APInt &Result) const;
     381             : 
     382             :     /// @}
     383             :     /// @name String Operations
     384             :     /// @{
     385             : 
     386             :     // Convert the given ASCII string to lowercase.
     387             :     std::string lower() const;
     388             : 
     389             :     /// Convert the given ASCII string to uppercase.
     390             :     std::string upper() const;
     391             : 
     392             :     /// @}
     393             :     /// @name Substring Operations
     394             :     /// @{
     395             : 
     396             :     /// Return a reference to the substring from [Start, Start + N).
     397             :     ///
     398             :     /// \param Start The index of the starting character in the substring; if
     399             :     /// the index is npos or greater than the length of the string then the
     400             :     /// empty substring will be returned.
     401             :     ///
     402             :     /// \param N The number of characters to included in the substring. If N
     403             :     /// exceeds the number of characters remaining in the string, the string
     404             :     /// suffix (starting with \p Start) will be returned.
     405             :     StringRef substr(size_t Start, size_t N = npos) const {
     406             :       Start = std::min(Start, Length);
     407             :       return StringRef(Data + Start, std::min(N, Length - Start));
     408             :     }
     409             : 
     410             :     /// Return a StringRef equal to 'this' but with the first \p N elements
     411             :     /// dropped.
     412             :     StringRef drop_front(size_t N = 1) const {
     413             :       assert(size() >= N && "Dropping more elements than exist");
     414             :       return substr(N);
     415             :     }
     416             : 
     417             :     /// Return a StringRef equal to 'this' but with the last \p N elements
     418             :     /// dropped.
     419             :     StringRef drop_back(size_t N = 1) const {
     420             :       assert(size() >= N && "Dropping more elements than exist");
     421             :       return substr(0, size()-N);
     422             :     }
     423             : 
     424             :     /// Return a reference to the substring from [Start, End).
     425             :     ///
     426             :     /// \param Start The index of the starting character in the substring; if
     427             :     /// the index is npos or greater than the length of the string then the
     428             :     /// empty substring will be returned.
     429             :     ///
     430             :     /// \param End The index following the last character to include in the
     431             :     /// substring. If this is npos, or less than \p Start, or exceeds the
     432             :     /// number of characters remaining in the string, the string suffix
     433             :     /// (starting with \p Start) will be returned.
     434             :     StringRef slice(size_t Start, size_t End) const {
     435             :       Start = std::min(Start, Length);
     436             :       End = std::min(std::max(Start, End), Length);
     437             :       return StringRef(Data + Start, End - Start);
     438             :     }
     439             : 
     440             :     /// Split into two substrings around the first occurrence of a separator
     441             :     /// character.
     442             :     ///
     443             :     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
     444             :     /// such that (*this == LHS + Separator + RHS) is true and RHS is
     445             :     /// maximal. If \p Separator is not in the string, then the result is a
     446             :     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
     447             :     ///
     448             :     /// \param Separator The character to split on.
     449             :     /// \returns The split substrings.
     450             :     std::pair<StringRef, StringRef> split(char Separator) const {
     451             :       size_t Idx = find(Separator);
     452             :       if (Idx == npos)
     453             :         return std::make_pair(*this, StringRef());
     454             :       return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
     455             :     }
     456             : 
     457             :     /// Split into two substrings around the first occurrence of a separator
     458             :     /// string.
     459             :     ///
     460             :     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
     461             :     /// such that (*this == LHS + Separator + RHS) is true and RHS is
     462             :     /// maximal. If \p Separator is not in the string, then the result is a
     463             :     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
     464             :     ///
     465             :     /// \param Separator - The string to split on.
     466             :     /// \return - The split substrings.
     467             :     std::pair<StringRef, StringRef> split(StringRef Separator) const {
     468             :       size_t Idx = find(Separator);
     469             :       if (Idx == npos)
     470             :         return std::make_pair(*this, StringRef());
     471             :       return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
     472             :     }
     473             : 
     474             :     /// Split into substrings around the occurrences of a separator string.
     475             :     ///
     476             :     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
     477             :     /// \p MaxSplit splits are done and consequently <= \p MaxSplit
     478             :     /// elements are added to A.
     479             :     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
     480             :     /// still count when considering \p MaxSplit
     481             :     /// An useful invariant is that
     482             :     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
     483             :     ///
     484             :     /// \param A - Where to put the substrings.
     485             :     /// \param Separator - The string to split on.
     486             :     /// \param MaxSplit - The maximum number of times the string is split.
     487             :     /// \param KeepEmpty - True if empty substring should be added.
     488             :     void split(SmallVectorImpl<StringRef> &A,
     489             :                StringRef Separator, int MaxSplit = -1,
     490             :                bool KeepEmpty = true) const;
     491             : 
     492             :     /// Split into two substrings around the last occurrence of a separator
     493             :     /// character.
     494             :     ///
     495             :     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
     496             :     /// such that (*this == LHS + Separator + RHS) is true and RHS is
     497             :     /// minimal. If \p Separator is not in the string, then the result is a
     498             :     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
     499             :     ///
     500             :     /// \param Separator - The character to split on.
     501             :     /// \return - The split substrings.
     502             :     std::pair<StringRef, StringRef> rsplit(char Separator) const {
     503             :       size_t Idx = rfind(Separator);
     504             :       if (Idx == npos)
     505             :         return std::make_pair(*this, StringRef());
     506             :       return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
     507             :     }
     508             : 
     509             :     /// Return string with consecutive characters in \p Chars starting from
     510             :     /// the left removed.
     511             :     StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
     512             :       return drop_front(std::min(Length, find_first_not_of(Chars)));
     513             :     }
     514             : 
     515             :     /// Return string with consecutive characters in \p Chars starting from
     516             :     /// the right removed.
     517             :     StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
     518             :       return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
     519             :     }
     520             : 
     521             :     /// Return string with consecutive characters in \p Chars starting from
     522             :     /// the left and right removed.
     523             :     StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
     524             :       return ltrim(Chars).rtrim(Chars);
     525             :     }
     526             : 
     527             :     /// @}
     528             :   };
     529             : 
     530             :   /// @name StringRef Comparison Operators
     531             :   /// @{
     532             : 
     533             :   inline bool operator==(StringRef LHS, StringRef RHS) {
     534             :     return LHS.equals(RHS);
     535             :   }
     536             : 
     537             :   inline bool operator!=(StringRef LHS, StringRef RHS) {
     538             :     return !(LHS == RHS);
     539             :   }
     540             : 
     541             :   inline bool operator<(StringRef LHS, StringRef RHS) {
     542             :     return LHS.compare(RHS) == -1;
     543             :   }
     544             : 
     545             :   inline bool operator<=(StringRef LHS, StringRef RHS) {
     546             :     return LHS.compare(RHS) != 1;
     547             :   }
     548             : 
     549             :   inline bool operator>(StringRef LHS, StringRef RHS) {
     550             :     return LHS.compare(RHS) == 1;
     551             :   }
     552             : 
     553             :   inline bool operator>=(StringRef LHS, StringRef RHS) {
     554             :     return LHS.compare(RHS) != -1;
     555             :   }
     556             : 
     557             :   inline std::string &operator+=(std::string &buffer, StringRef string) {
     558             :     return buffer.append(string.data(), string.size());
     559             :   }
     560             : 
     561             :   /// @}
     562             : 
     563             :   /// \brief Compute a hash_code for a StringRef.
     564             :   hash_code hash_value(StringRef S);
     565             : 
     566             :   // StringRefs can be treated like a POD type.
     567             :   template <typename T> struct isPodLike;
     568             :   template <> struct isPodLike<StringRef> { static const bool value = true; };
     569             : }
     570             : 
     571             : #endif

Generated by: LCOV version 1.11