1//===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_ADT_STRINGREF_H
11#define LLVM_ADT_STRINGREF_H
12
13#include "llvm/ADT/STLExtras.h"
14#include "llvm/ADT/iterator_range.h"
15#include "llvm/Support/Compiler.h"
16#include <algorithm>
17#include <cassert>
18#include <cstddef>
19#include <cstring>
20#include <limits>
21#include <string>
22#include <type_traits>
23#include <utility>
24
25namespace llvm {
26
27  class APInt;
28  class hash_code;
29  template <typename T> class SmallVectorImpl;
30  class StringRef;
31
32  /// Helper functions for StringRef::getAsInteger.
33  bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
34                            unsigned long long &Result);
35
36  bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
37
38  bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
39                              unsigned long long &Result);
40  bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
41
42  /// StringRef - Represent a constant reference to a string, i.e. a character
43  /// array and a length, which need not be null terminated.
44  ///
45  /// This class does not own the string data, it is expected to be used in
46  /// situations where the character data resides in some other buffer, whose
47  /// lifetime extends past that of the StringRef. For this reason, it is not in
48  /// general safe to store a StringRef.
49  class StringRef {
50  public:
51    static const size_t npos = ~size_t(0);
52
53    using iterator = const char *;
54    using const_iterator = const char *;
55    using size_type = size_t;
56
57  private:
58    /// The start of the string, in an external buffer.
59    const char *Data = nullptr;
60
61    /// The length of the string.
62    size_t Length = 0;
63
64    // Workaround memcmp issue with null pointers (undefined behavior)
65    // by providing a specialized version
66    LLVM_ATTRIBUTE_ALWAYS_INLINE
67    static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
68      if (Length == 0) { return 0; }
69      return ::memcmp(Lhs,Rhs,Length);
70    }
71
72  public:
73    /// @name Constructors
74    /// @{
75
76    /// Construct an empty string ref.
77    /*implicit*/ StringRef() = default;
78
79    /// Disable conversion from nullptr.  This prevents things like
80    /// if (S == nullptr)
81    StringRef(std::nullptr_t) = delete;
82
83    /// Construct a string ref from a cstring.
84    LLVM_ATTRIBUTE_ALWAYS_INLINE
85    /*implicit*/ StringRef(const char *Str)
86        : Data(Str), Length(Str ? ::strlen(Str) : 0) {}
87
88    /// Construct a string ref from a pointer and length.
89    LLVM_ATTRIBUTE_ALWAYS_INLINE
90    /*implicit*/ constexpr StringRef(const char *data, size_t length)
91        : Data(data), Length(length) {}
92
93    /// Construct a string ref from an std::string.
94    LLVM_ATTRIBUTE_ALWAYS_INLINE
95    /*implicit*/ StringRef(const std::string &Str)
96      : Data(Str.data()), Length(Str.length()) {}
97
98    static StringRef withNullAsEmpty(const char *data) {
99      return StringRef(data ? data : "");
100    }
101
102    /// @}
103    /// @name Iterators
104    /// @{
105
106    iterator begin() const { return Data; }
107
108    iterator end() const { return Data + Length; }
109
110    const unsigned char *bytes_begin() const {
111      return reinterpret_cast<const unsigned char *>(begin());
112    }
113    const unsigned char *bytes_end() const {
114      return reinterpret_cast<const unsigned char *>(end());
115    }
116    iterator_range<const unsigned char *> bytes() const {
117      return make_range(bytes_begin(), bytes_end());
118    }
119
120    /// @}
121    /// @name String Operations
122    /// @{
123
124    /// data - Get a pointer to the start of the string (which may not be null
125    /// terminated).
126    LLVM_NODISCARD
127    LLVM_ATTRIBUTE_ALWAYS_INLINE
128    const char *data() const { return Data; }
129
130    /// empty - Check if the string is empty.
131    LLVM_NODISCARD
132    LLVM_ATTRIBUTE_ALWAYS_INLINE
133    bool empty() const { return Length == 0; }
134
135    /// size - Get the string size.
136    LLVM_NODISCARD
137    LLVM_ATTRIBUTE_ALWAYS_INLINE
138    size_t size() const { return Length; }
139
140    /// front - Get the first character in the string.
141    LLVM_NODISCARD
142    char front() const {
143      assert(!empty());
144      return Data[0];
145    }
146
147    /// back - Get the last character in the string.
148    LLVM_NODISCARD
149    char back() const {
150      assert(!empty());
151      return Data[Length-1];
152    }
153
154    // copy - Allocate copy in Allocator and return StringRef to it.
155    template <typename Allocator>
156    LLVM_NODISCARD StringRef copy(Allocator &A) const {
157      // Don't request a length 0 copy from the allocator.
158      if (empty())
159        return StringRef();
160      char *S = A.template Allocate<char>(Length);
161      std::copy(begin(), end(), S);
162      return StringRef(S, Length);
163    }
164
165    /// equals - Check for string equality, this is more efficient than
166    /// compare() when the relative ordering of inequal strings isn't needed.
167    LLVM_NODISCARD
168    LLVM_ATTRIBUTE_ALWAYS_INLINE
169    bool equals(StringRef RHS) const {
170      return (Length == RHS.Length &&
171              compareMemory(Data, RHS.Data, RHS.Length) == 0);
172    }
173
174    /// equals_lower - Check for string equality, ignoring case.
175    LLVM_NODISCARD
176    bool equals_lower(StringRef RHS) const {
177      return Length == RHS.Length && compare_lower(RHS) == 0;
178    }
179
180    /// compare - Compare two strings; the result is -1, 0, or 1 if this string
181    /// is lexicographically less than, equal to, or greater than the \p RHS.
182    LLVM_NODISCARD
183    LLVM_ATTRIBUTE_ALWAYS_INLINE
184    int compare(StringRef RHS) const {
185      // Check the prefix for a mismatch.
186      if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
187        return Res < 0 ? -1 : 1;
188
189      // Otherwise the prefixes match, so we only need to check the lengths.
190      if (Length == RHS.Length)
191        return 0;
192      return Length < RHS.Length ? -1 : 1;
193    }
194
195    /// compare_lower - Compare two strings, ignoring case.
196    LLVM_NODISCARD
197    int compare_lower(StringRef RHS) const;
198
199    /// compare_numeric - Compare two strings, treating sequences of digits as
200    /// numbers.
201    LLVM_NODISCARD
202    int compare_numeric(StringRef RHS) const;
203
204    /// \brief Determine the edit distance between this string and another
205    /// string.
206    ///
207    /// \param Other the string to compare this string against.
208    ///
209    /// \param AllowReplacements whether to allow character
210    /// replacements (change one character into another) as a single
211    /// operation, rather than as two operations (an insertion and a
212    /// removal).
213    ///
214    /// \param MaxEditDistance If non-zero, the maximum edit distance that
215    /// this routine is allowed to compute. If the edit distance will exceed
216    /// that maximum, returns \c MaxEditDistance+1.
217    ///
218    /// \returns the minimum number of character insertions, removals,
219    /// or (if \p AllowReplacements is \c true) replacements needed to
220    /// transform one of the given strings into the other. If zero,
221    /// the strings are identical.
222    LLVM_NODISCARD
223    unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
224                           unsigned MaxEditDistance = 0) const;
225
226    /// str - Get the contents as an std::string.
227    LLVM_NODISCARD
228    std::string str() const {
229      if (!Data) return std::string();
230      return std::string(Data, Length);
231    }
232
233    /// @}
234    /// @name Operator Overloads
235    /// @{
236
237    LLVM_NODISCARD
238    char operator[](size_t Index) const {
239      assert(Index < Length && "Invalid index!");
240      return Data[Index];
241    }
242
243    /// Disallow accidental assignment from a temporary std::string.
244    ///
245    /// The declaration here is extra complicated so that `stringRef = {}`
246    /// and `stringRef = "abc"` continue to select the move assignment operator.
247    template <typename T>
248    typename std::enable_if<std::is_same<T, std::string>::value,
249                            StringRef>::type &
250    operator=(T &&Str) = delete;
251
252    /// @}
253    /// @name Type Conversions
254    /// @{
255
256    operator std::string() const {
257      return str();
258    }
259
260    /// @}
261    /// @name String Predicates
262    /// @{
263
264    /// Check if this string starts with the given \p Prefix.
265    LLVM_NODISCARD
266    LLVM_ATTRIBUTE_ALWAYS_INLINE
267    bool startswith(StringRef Prefix) const {
268      return Length >= Prefix.Length &&
269             compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
270    }
271
272    /// Check if this string starts with the given \p Prefix, ignoring case.
273    LLVM_NODISCARD
274    bool startswith_lower(StringRef Prefix) const;
275
276    /// Check if this string ends with the given \p Suffix.
277    LLVM_NODISCARD
278    LLVM_ATTRIBUTE_ALWAYS_INLINE
279    bool endswith(StringRef Suffix) const {
280      return Length >= Suffix.Length &&
281        compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
282    }
283
284    /// Check if this string ends with the given \p Suffix, ignoring case.
285    LLVM_NODISCARD
286    bool endswith_lower(StringRef Suffix) const;
287
288    /// @}
289    /// @name String Searching
290    /// @{
291
292    /// Search for the first character \p C in the string.
293    ///
294    /// \returns The index of the first occurrence of \p C, or npos if not
295    /// found.
296    LLVM_NODISCARD
297    LLVM_ATTRIBUTE_ALWAYS_INLINE
298    size_t find(char C, size_t From = 0) const {
299      size_t FindBegin = std::min(From, Length);
300      if (FindBegin < Length) { // Avoid calling memchr with nullptr.
301        // Just forward to memchr, which is faster than a hand-rolled loop.
302        if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin))
303          return static_cast<const char *>(P) - Data;
304      }
305      return npos;
306    }
307
308    /// Search for the first character \p C in the string, ignoring case.
309    ///
310    /// \returns The index of the first occurrence of \p C, or npos if not
311    /// found.
312    LLVM_NODISCARD
313    size_t find_lower(char C, size_t From = 0) const;
314
315    /// Search for the first character satisfying the predicate \p F
316    ///
317    /// \returns The index of the first character satisfying \p F starting from
318    /// \p From, or npos if not found.
319    LLVM_NODISCARD
320    LLVM_ATTRIBUTE_ALWAYS_INLINE
321    size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
322      StringRef S = drop_front(From);
323      while (!S.empty()) {
324        if (F(S.front()))
325          return size() - S.size();
326        S = S.drop_front();
327      }
328      return npos;
329    }
330
331    /// Search for the first character not satisfying the predicate \p F
332    ///
333    /// \returns The index of the first character not satisfying \p F starting
334    /// from \p From, or npos if not found.
335    LLVM_NODISCARD
336    LLVM_ATTRIBUTE_ALWAYS_INLINE
337    size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
338      return find_if([F](char c) { return !F(c); }, From);
339    }
340
341    /// Search for the first string \p Str in the string.
342    ///
343    /// \returns The index of the first occurrence of \p Str, or npos if not
344    /// found.
345    LLVM_NODISCARD
346    size_t find(StringRef Str, size_t From = 0) const;
347
348    /// Search for the first string \p Str in the string, ignoring case.
349    ///
350    /// \returns The index of the first occurrence of \p Str, or npos if not
351    /// found.
352    LLVM_NODISCARD
353    size_t find_lower(StringRef Str, size_t From = 0) const;
354
355    /// Search for the last character \p C in the string.
356    ///
357    /// \returns The index of the last occurrence of \p C, or npos if not
358    /// found.
359    LLVM_NODISCARD
360    size_t rfind(char C, size_t From = npos) const {
361      From = std::min(From, Length);
362      size_t i = From;
363      while (i != 0) {
364        --i;
365        if (Data[i] == C)
366          return i;
367      }
368      return npos;
369    }
370
371    /// Search for the last character \p C in the string, ignoring case.
372    ///
373    /// \returns The index of the last occurrence of \p C, or npos if not
374    /// found.
375    LLVM_NODISCARD
376    size_t rfind_lower(char C, size_t From = npos) const;
377
378    /// Search for the last string \p Str in the string.
379    ///
380    /// \returns The index of the last occurrence of \p Str, or npos if not
381    /// found.
382    LLVM_NODISCARD
383    size_t rfind(StringRef Str) const;
384
385    /// Search for the last string \p Str in the string, ignoring case.
386    ///
387    /// \returns The index of the last occurrence of \p Str, or npos if not
388    /// found.
389    LLVM_NODISCARD
390    size_t rfind_lower(StringRef Str) const;
391
392    /// Find the first character in the string that is \p C, or npos if not
393    /// found. Same as find.
394    LLVM_NODISCARD
395    size_t find_first_of(char C, size_t From = 0) const {
396      return find(C, From);
397    }
398
399    /// Find the first character in the string that is in \p Chars, or npos if
400    /// not found.
401    ///
402    /// Complexity: O(size() + Chars.size())
403    LLVM_NODISCARD
404    size_t find_first_of(StringRef Chars, size_t From = 0) const;
405
406    /// Find the first character in the string that is not \p C or npos if not
407    /// found.
408    LLVM_NODISCARD
409    size_t find_first_not_of(char C, size_t From = 0) const;
410
411    /// Find the first character in the string that is not in the string
412    /// \p Chars, or npos if not found.
413    ///
414    /// Complexity: O(size() + Chars.size())
415    LLVM_NODISCARD
416    size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
417
418    /// Find the last character in the string that is \p C, or npos if not
419    /// found.
420    LLVM_NODISCARD
421    size_t find_last_of(char C, size_t From = npos) const {
422      return rfind(C, From);
423    }
424
425    /// Find the last character in the string that is in \p C, or npos if not
426    /// found.
427    ///
428    /// Complexity: O(size() + Chars.size())
429    LLVM_NODISCARD
430    size_t find_last_of(StringRef Chars, size_t From = npos) const;
431
432    /// Find the last character in the string that is not \p C, or npos if not
433    /// found.
434    LLVM_NODISCARD
435    size_t find_last_not_of(char C, size_t From = npos) const;
436
437    /// Find the last character in the string that is not in \p Chars, or
438    /// npos if not found.
439    ///
440    /// Complexity: O(size() + Chars.size())
441    LLVM_NODISCARD
442    size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
443
444    /// Return true if the given string is a substring of *this, and false
445    /// otherwise.
446    LLVM_NODISCARD
447    LLVM_ATTRIBUTE_ALWAYS_INLINE
448    bool contains(StringRef Other) const { return find(Other) != npos; }
449
450    /// Return true if the given character is contained in *this, and false
451    /// otherwise.
452    LLVM_NODISCARD
453    LLVM_ATTRIBUTE_ALWAYS_INLINE
454    bool contains(char C) const { return find_first_of(C) != npos; }
455
456    /// Return true if the given string is a substring of *this, and false
457    /// otherwise.
458    LLVM_NODISCARD
459    LLVM_ATTRIBUTE_ALWAYS_INLINE
460    bool contains_lower(StringRef Other) const {
461      return find_lower(Other) != npos;
462    }
463
464    /// Return true if the given character is contained in *this, and false
465    /// otherwise.
466    LLVM_NODISCARD
467    LLVM_ATTRIBUTE_ALWAYS_INLINE
468    bool contains_lower(char C) const { return find_lower(C) != npos; }
469
470    /// @}
471    /// @name Helpful Algorithms
472    /// @{
473
474    /// Return the number of occurrences of \p C in the string.
475    LLVM_NODISCARD
476    size_t count(char C) const {
477      size_t Count = 0;
478      for (size_t i = 0, e = Length; i != e; ++i)
479        if (Data[i] == C)
480          ++Count;
481      return Count;
482    }
483
484    /// Return the number of non-overlapped occurrences of \p Str in
485    /// the string.
486    size_t count(StringRef Str) const;
487
488    /// Parse the current string as an integer of the specified radix.  If
489    /// \p Radix is specified as zero, this does radix autosensing using
490    /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
491    ///
492    /// If the string is invalid or if only a subset of the string is valid,
493    /// this returns true to signify the error.  The string is considered
494    /// erroneous if empty or if it overflows T.
495    template <typename T>
496    typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
497    getAsInteger(unsigned Radix, T &Result) const {
498      long long LLVal;
499      if (getAsSignedInteger(*this, Radix, LLVal) ||
500            static_cast<T>(LLVal) != LLVal)
501        return true;
502      Result = LLVal;
503      return false;
504    }
505
506    template <typename T>
507    typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
508    getAsInteger(unsigned Radix, T &Result) const {
509      unsigned long long ULLVal;
510      // The additional cast to unsigned long long is required to avoid the
511      // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
512      // 'unsigned __int64' when instantiating getAsInteger with T = bool.
513      if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
514          static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
515        return true;
516      Result = ULLVal;
517      return false;
518    }
519
520    /// Parse the current string as an integer of the specified radix.  If
521    /// \p Radix is specified as zero, this does radix autosensing using
522    /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
523    ///
524    /// If the string does not begin with a number of the specified radix,
525    /// this returns true to signify the error. The string is considered
526    /// erroneous if empty or if it overflows T.
527    /// The portion of the string representing the discovered numeric value
528    /// is removed from the beginning of the string.
529    template <typename T>
530    typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
531    consumeInteger(unsigned Radix, T &Result) {
532      long long LLVal;
533      if (consumeSignedInteger(*this, Radix, LLVal) ||
534          static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
535        return true;
536      Result = LLVal;
537      return false;
538    }
539
540    template <typename T>
541    typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
542    consumeInteger(unsigned Radix, T &Result) {
543      unsigned long long ULLVal;
544      if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
545          static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
546        return true;
547      Result = ULLVal;
548      return false;
549    }
550
551    /// Parse the current string as an integer of the specified \p Radix, or of
552    /// an autosensed radix if the \p Radix given is 0.  The current value in
553    /// \p Result is discarded, and the storage is changed to be wide enough to
554    /// store the parsed integer.
555    ///
556    /// \returns true if the string does not solely consist of a valid
557    /// non-empty number in the appropriate base.
558    ///
559    /// APInt::fromString is superficially similar but assumes the
560    /// string is well-formed in the given radix.
561    bool getAsInteger(unsigned Radix, APInt &Result) const;
562
563    /// Parse the current string as an IEEE double-precision floating
564    /// point value.  The string must be a well-formed double.
565    ///
566    /// If \p AllowInexact is false, the function will fail if the string
567    /// cannot be represented exactly.  Otherwise, the function only fails
568    /// in case of an overflow or underflow.
569    bool getAsDouble(double &Result, bool AllowInexact = true) const;
570
571    /// @}
572    /// @name String Operations
573    /// @{
574
575    // Convert the given ASCII string to lowercase.
576    LLVM_NODISCARD
577    std::string lower() const;
578
579    /// Convert the given ASCII string to uppercase.
580    LLVM_NODISCARD
581    std::string upper() const;
582
583    /// @}
584    /// @name Substring Operations
585    /// @{
586
587    /// Return a reference to the substring from [Start, Start + N).
588    ///
589    /// \param Start The index of the starting character in the substring; if
590    /// the index is npos or greater than the length of the string then the
591    /// empty substring will be returned.
592    ///
593    /// \param N The number of characters to included in the substring. If N
594    /// exceeds the number of characters remaining in the string, the string
595    /// suffix (starting with \p Start) will be returned.
596    LLVM_NODISCARD
597    LLVM_ATTRIBUTE_ALWAYS_INLINE
598    StringRef substr(size_t Start, size_t N = npos) const {
599      Start = std::min(Start, Length);
600      return StringRef(Data + Start, std::min(N, Length - Start));
601    }
602
603    /// Return a StringRef equal to 'this' but with only the first \p N
604    /// elements remaining.  If \p N is greater than the length of the
605    /// string, the entire string is returned.
606    LLVM_NODISCARD
607    LLVM_ATTRIBUTE_ALWAYS_INLINE
608    StringRef take_front(size_t N = 1) const {
609      if (N >= size())
610        return *this;
611      return drop_back(size() - N);
612    }
613
614    /// Return a StringRef equal to 'this' but with only the last \p N
615    /// elements remaining.  If \p N is greater than the length of the
616    /// string, the entire string is returned.
617    LLVM_NODISCARD
618    LLVM_ATTRIBUTE_ALWAYS_INLINE
619    StringRef take_back(size_t N = 1) const {
620      if (N >= size())
621        return *this;
622      return drop_front(size() - N);
623    }
624
625    /// Return the longest prefix of 'this' such that every character
626    /// in the prefix satisfies the given predicate.
627    LLVM_NODISCARD
628    LLVM_ATTRIBUTE_ALWAYS_INLINE
629    StringRef take_while(function_ref<bool(char)> F) const {
630      return substr(0, find_if_not(F));
631    }
632
633    /// Return the longest prefix of 'this' such that no character in
634    /// the prefix satisfies the given predicate.
635    LLVM_NODISCARD
636    LLVM_ATTRIBUTE_ALWAYS_INLINE
637    StringRef take_until(function_ref<bool(char)> F) const {
638      return substr(0, find_if(F));
639    }
640
641    /// Return a StringRef equal to 'this' but with the first \p N elements
642    /// dropped.
643    LLVM_NODISCARD
644    LLVM_ATTRIBUTE_ALWAYS_INLINE
645    StringRef drop_front(size_t N = 1) const {
646      assert(size() >= N && "Dropping more elements than exist");
647      return substr(N);
648    }
649
650    /// Return a StringRef equal to 'this' but with the last \p N elements
651    /// dropped.
652    LLVM_NODISCARD
653    LLVM_ATTRIBUTE_ALWAYS_INLINE
654    StringRef drop_back(size_t N = 1) const {
655      assert(size() >= N && "Dropping more elements than exist");
656      return substr(0, size()-N);
657    }
658
659    /// Return a StringRef equal to 'this', but with all characters satisfying
660    /// the given predicate dropped from the beginning of the string.
661    LLVM_NODISCARD
662    LLVM_ATTRIBUTE_ALWAYS_INLINE
663    StringRef drop_while(function_ref<bool(char)> F) const {
664      return substr(find_if_not(F));
665    }
666
667    /// Return a StringRef equal to 'this', but with all characters not
668    /// satisfying the given predicate dropped from the beginning of the string.
669    LLVM_NODISCARD
670    LLVM_ATTRIBUTE_ALWAYS_INLINE
671    StringRef drop_until(function_ref<bool(char)> F) const {
672      return substr(find_if(F));
673    }
674
675    /// Returns true if this StringRef has the given prefix and removes that
676    /// prefix.
677    LLVM_ATTRIBUTE_ALWAYS_INLINE
678    bool consume_front(StringRef Prefix) {
679      if (!startswith(Prefix))
680        return false;
681
682      *this = drop_front(Prefix.size());
683      return true;
684    }
685
686    /// Returns true if this StringRef has the given suffix and removes that
687    /// suffix.
688    LLVM_ATTRIBUTE_ALWAYS_INLINE
689    bool consume_back(StringRef Suffix) {
690      if (!endswith(Suffix))
691        return false;
692
693      *this = drop_back(Suffix.size());
694      return true;
695    }
696
697    /// Return a reference to the substring from [Start, End).
698    ///
699    /// \param Start The index of the starting character in the substring; if
700    /// the index is npos or greater than the length of the string then the
701    /// empty substring will be returned.
702    ///
703    /// \param End The index following the last character to include in the
704    /// substring. If this is npos or exceeds the number of characters
705    /// remaining in the string, the string suffix (starting with \p Start)
706    /// will be returned. If this is less than \p Start, an empty string will
707    /// be returned.
708    LLVM_NODISCARD
709    LLVM_ATTRIBUTE_ALWAYS_INLINE
710    StringRef slice(size_t Start, size_t End) const {
711      Start = std::min(Start, Length);
712      End = std::min(std::max(Start, End), Length);
713      return StringRef(Data + Start, End - Start);
714    }
715
716    /// Split into two substrings around the first occurrence of a separator
717    /// character.
718    ///
719    /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
720    /// such that (*this == LHS + Separator + RHS) is true and RHS is
721    /// maximal. If \p Separator is not in the string, then the result is a
722    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
723    ///
724    /// \param Separator The character to split on.
725    /// \returns The split substrings.
726    LLVM_NODISCARD
727    std::pair<StringRef, StringRef> split(char Separator) const {
728      size_t Idx = find(Separator);
729      if (Idx == npos)
730        return std::make_pair(*this, StringRef());
731      return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
732    }
733
734    /// Split into two substrings around the first occurrence of a separator
735    /// string.
736    ///
737    /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
738    /// such that (*this == LHS + Separator + RHS) is true and RHS is
739    /// maximal. If \p Separator is not in the string, then the result is a
740    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
741    ///
742    /// \param Separator - The string to split on.
743    /// \return - The split substrings.
744    LLVM_NODISCARD
745    std::pair<StringRef, StringRef> split(StringRef Separator) const {
746      size_t Idx = find(Separator);
747      if (Idx == npos)
748        return std::make_pair(*this, StringRef());
749      return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
750    }
751
752    /// Split into substrings around the occurrences of a separator string.
753    ///
754    /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
755    /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
756    /// elements are added to A.
757    /// If \p KeepEmpty is false, empty strings are not added to \p A. They
758    /// still count when considering \p MaxSplit
759    /// An useful invariant is that
760    /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
761    ///
762    /// \param A - Where to put the substrings.
763    /// \param Separator - The string to split on.
764    /// \param MaxSplit - The maximum number of times the string is split.
765    /// \param KeepEmpty - True if empty substring should be added.
766    void split(SmallVectorImpl<StringRef> &A,
767               StringRef Separator, int MaxSplit = -1,
768               bool KeepEmpty = true) const;
769
770    /// Split into substrings around the occurrences of a separator character.
771    ///
772    /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
773    /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
774    /// elements are added to A.
775    /// If \p KeepEmpty is false, empty strings are not added to \p A. They
776    /// still count when considering \p MaxSplit
777    /// An useful invariant is that
778    /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
779    ///
780    /// \param A - Where to put the substrings.
781    /// \param Separator - The string to split on.
782    /// \param MaxSplit - The maximum number of times the string is split.
783    /// \param KeepEmpty - True if empty substring should be added.
784    void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
785               bool KeepEmpty = true) const;
786
787    /// Split into two substrings around the last occurrence of a separator
788    /// character.
789    ///
790    /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
791    /// such that (*this == LHS + Separator + RHS) is true and RHS is
792    /// minimal. If \p Separator is not in the string, then the result is a
793    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
794    ///
795    /// \param Separator - The character to split on.
796    /// \return - The split substrings.
797    LLVM_NODISCARD
798    std::pair<StringRef, StringRef> rsplit(char Separator) const {
799      size_t Idx = rfind(Separator);
800      if (Idx == npos)
801        return std::make_pair(*this, StringRef());
802      return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
803    }
804
805    /// Return string with consecutive \p Char characters starting from the
806    /// the left removed.
807    LLVM_NODISCARD
808    StringRef ltrim(char Char) const {
809      return drop_front(std::min(Length, find_first_not_of(Char)));
810    }
811
812    /// Return string with consecutive characters in \p Chars starting from
813    /// the left removed.
814    LLVM_NODISCARD
815    StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
816      return drop_front(std::min(Length, find_first_not_of(Chars)));
817    }
818
819    /// Return string with consecutive \p Char characters starting from the
820    /// right removed.
821    LLVM_NODISCARD
822    StringRef rtrim(char Char) const {
823      return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
824    }
825
826    /// Return string with consecutive characters in \p Chars starting from
827    /// the right removed.
828    LLVM_NODISCARD
829    StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
830      return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
831    }
832
833    /// Return string with consecutive \p Char characters starting from the
834    /// left and right removed.
835    LLVM_NODISCARD
836    StringRef trim(char Char) const {
837      return ltrim(Char).rtrim(Char);
838    }
839
840    /// Return string with consecutive characters in \p Chars starting from
841    /// the left and right removed.
842    LLVM_NODISCARD
843    StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
844      return ltrim(Chars).rtrim(Chars);
845    }
846
847    /// @}
848  };
849
850  /// A wrapper around a string literal that serves as a proxy for constructing
851  /// global tables of StringRefs with the length computed at compile time.
852  /// In order to avoid the invocation of a global constructor, StringLiteral
853  /// should *only* be used in a constexpr context, as such:
854  ///
855  /// constexpr StringLiteral S("test");
856  ///
857  class StringLiteral : public StringRef {
858  public:
859    template <size_t N>
860    constexpr StringLiteral(const char (&Str)[N])
861#if defined(__clang__) && __has_attribute(enable_if)
862#pragma clang diagnostic push
863#pragma clang diagnostic ignored "-Wgcc-compat"
864        __attribute((enable_if(__builtin_strlen(Str) == N - 1,
865                               "invalid string literal")))
866#pragma clang diagnostic pop
867#endif
868        : StringRef(Str, N - 1) {
869    }
870  };
871
872  /// @name StringRef Comparison Operators
873  /// @{
874
875  LLVM_ATTRIBUTE_ALWAYS_INLINE
876  inline bool operator==(StringRef LHS, StringRef RHS) {
877    return LHS.equals(RHS);
878  }
879
880  LLVM_ATTRIBUTE_ALWAYS_INLINE
881  inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
882
883  inline bool operator<(StringRef LHS, StringRef RHS) {
884    return LHS.compare(RHS) == -1;
885  }
886
887  inline bool operator<=(StringRef LHS, StringRef RHS) {
888    return LHS.compare(RHS) != 1;
889  }
890
891  inline bool operator>(StringRef LHS, StringRef RHS) {
892    return LHS.compare(RHS) == 1;
893  }
894
895  inline bool operator>=(StringRef LHS, StringRef RHS) {
896    return LHS.compare(RHS) != -1;
897  }
898
899  inline std::string &operator+=(std::string &buffer, StringRef string) {
900    return buffer.append(string.data(), string.size());
901  }
902
903  /// @}
904
905  /// \brief Compute a hash_code for a StringRef.
906  LLVM_NODISCARD
907  hash_code hash_value(StringRef S);
908
909  // StringRefs can be treated like a POD type.
910  template <typename T> struct isPodLike;
911  template <> struct isPodLike<StringRef> { static const bool value = true; };
912
913} // end namespace llvm
914
915#endif // LLVM_ADT_STRINGREF_H
916