StringRef.h revision d5b1f8a8426e82990dafc6e3336fefc6635c8fa4
1//===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_ADT_STRINGREF_H
11#define LLVM_ADT_STRINGREF_H
12
13#include <cassert>
14#include <cstring>
15#include <utility>
16#include <string>
17
18namespace llvm {
19  template<typename T>
20  class SmallVectorImpl;
21
22  /// StringRef - Represent a constant reference to a string, i.e. a character
23  /// array and a length, which need not be null terminated.
24  ///
25  /// This class does not own the string data, it is expected to be used in
26  /// situations where the character data resides in some other buffer, whose
27  /// lifetime extends past that of the StringRef. For this reason, it is not in
28  /// general safe to store a StringRef.
29  class StringRef {
30  public:
31    typedef const char *iterator;
32    static const size_t npos = ~size_t(0);
33    typedef size_t size_type;
34
35  private:
36    /// The start of the string, in an external buffer.
37    const char *Data;
38
39    /// The length of the string.
40    size_t Length;
41
42    // Workaround PR5482: nearly all gcc 4.x miscompile StringRef and std::min()
43    // Changing the arg of min to be an integer, instead of a reference to an
44    // integer works around this bug.
45    size_t min(size_t a, size_t b) const
46    {
47      return a < b ? a : b;
48    }
49
50    size_t max(size_t a, size_t b) const
51    {
52      return a > b ? a : b;
53    }
54
55  public:
56    /// @name Constructors
57    /// @{
58
59    /// Construct an empty string ref.
60    /*implicit*/ StringRef() : Data(0), Length(0) {}
61
62    /// Construct a string ref from a cstring.
63    /*implicit*/ StringRef(const char *Str)
64      : Data(Str), Length(::strlen(Str)) {}
65
66    /// Construct a string ref from a pointer and length.
67    /*implicit*/ StringRef(const char *data, size_t length)
68      : Data(data), Length(length) {}
69
70    /// Construct a string ref from an std::string.
71    /*implicit*/ StringRef(const std::string &Str)
72      : Data(Str.data()), Length(Str.length()) {}
73
74    /// @}
75    /// @name Iterators
76    /// @{
77
78    iterator begin() const { return Data; }
79
80    iterator end() const { return Data + Length; }
81
82    /// @}
83    /// @name String Operations
84    /// @{
85
86    /// data - Get a pointer to the start of the string (which may not be null
87    /// terminated).
88    const char *data() const { return Data; }
89
90    /// empty - Check if the string is empty.
91    bool empty() const { return Length == 0; }
92
93    /// size - Get the string size.
94    size_t size() const { return Length; }
95
96    /// front - Get the first character in the string.
97    char front() const {
98      assert(!empty());
99      return Data[0];
100    }
101
102    /// back - Get the last character in the string.
103    char back() const {
104      assert(!empty());
105      return Data[Length-1];
106    }
107
108    /// equals - Check for string equality, this is more efficient than
109    /// compare() when the relative ordering of inequal strings isn't needed.
110    bool equals(StringRef RHS) const {
111      return (Length == RHS.Length &&
112              memcmp(Data, RHS.Data, RHS.Length) == 0);
113    }
114
115    /// equals_lower - Check for string equality, ignoring case.
116    bool equals_lower(StringRef RHS) const {
117      return Length == RHS.Length && compare_lower(RHS) == 0;
118    }
119
120    /// compare - Compare two strings; the result is -1, 0, or 1 if this string
121    /// is lexicographically less than, equal to, or greater than the \arg RHS.
122    int compare(StringRef RHS) const {
123      // Check the prefix for a mismatch.
124      if (int Res = memcmp(Data, RHS.Data, min(Length, RHS.Length)))
125        return Res < 0 ? -1 : 1;
126
127      // Otherwise the prefixes match, so we only need to check the lengths.
128      if (Length == RHS.Length)
129        return 0;
130      return Length < RHS.Length ? -1 : 1;
131    }
132
133    /// compare_lower - Compare two strings, ignoring case.
134    int compare_lower(StringRef RHS) const;
135
136    /// str - Get the contents as an std::string.
137    std::string str() const { return std::string(Data, Length); }
138
139    /// @}
140    /// @name Operator Overloads
141    /// @{
142
143    char operator[](size_t Index) const {
144      assert(Index < Length && "Invalid index!");
145      return Data[Index];
146    }
147
148    /// @}
149    /// @name Type Conversions
150    /// @{
151
152    operator std::string() const {
153      return str();
154    }
155
156    /// @}
157    /// @name String Predicates
158    /// @{
159
160    /// startswith - Check if this string starts with the given \arg Prefix.
161    bool startswith(StringRef Prefix) const {
162      return Length >= Prefix.Length &&
163             memcmp(Data, Prefix.Data, Prefix.Length) == 0;
164    }
165
166    /// endswith - Check if this string ends with the given \arg Suffix.
167    bool endswith(StringRef Suffix) const {
168      return Length >= Suffix.Length &&
169             memcmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
170    }
171
172    /// @}
173    /// @name String Searching
174    /// @{
175
176    /// find - Search for the first character \arg C in the string.
177    ///
178    /// \return - The index of the first occurence of \arg C, or npos if not
179    /// found.
180    size_t find(char C, size_t From = 0) const {
181      for (size_t i = min(From, Length), e = Length; i != e; ++i)
182        if (Data[i] == C)
183          return i;
184      return npos;
185    }
186
187    /// find - Search for the first string \arg Str in the string.
188    ///
189    /// \return - The index of the first occurence of \arg Str, or npos if not
190    /// found.
191    size_t find(StringRef Str, size_t From = 0) const;
192
193    /// rfind - Search for the last character \arg C in the string.
194    ///
195    /// \return - The index of the last occurence of \arg C, or npos if not
196    /// found.
197    size_t rfind(char C, size_t From = npos) const {
198      From = min(From, Length);
199      size_t i = From;
200      while (i != 0) {
201        --i;
202        if (Data[i] == C)
203          return i;
204      }
205      return npos;
206    }
207
208    /// rfind - Search for the last string \arg Str in the string.
209    ///
210    /// \return - The index of the last occurence of \arg Str, or npos if not
211    /// found.
212    size_t rfind(StringRef Str) const;
213
214    /// find_first_of - Find the first character in the string that is \arg C,
215    /// or npos if not found. Same as find.
216    size_type find_first_of(char C, size_t = 0) const { return find(C); }
217
218    /// find_first_of - Find the first character in the string that is in \arg
219    /// Chars, or npos if not found.
220    ///
221    /// Note: O(size() * Chars.size())
222    size_type find_first_of(StringRef Chars, size_t From = 0) const;
223
224    /// find_first_not_of - Find the first character in the string that is not
225    /// \arg C or npos if not found.
226    size_type find_first_not_of(char C, size_t From = 0) const;
227
228    /// find_first_not_of - Find the first character in the string that is not
229    /// in the string \arg Chars, or npos if not found.
230    ///
231    /// Note: O(size() * Chars.size())
232    size_type find_first_not_of(StringRef Chars, size_t From = 0) const;
233
234    /// @}
235    /// @name Helpful Algorithms
236    /// @{
237
238    /// count - Return the number of occurrences of \arg C in the string.
239    size_t count(char C) const {
240      size_t Count = 0;
241      for (size_t i = 0, e = Length; i != e; ++i)
242        if (Data[i] == C)
243          ++Count;
244      return Count;
245    }
246
247    /// count - Return the number of non-overlapped occurrences of \arg Str in
248    /// the string.
249    size_t count(StringRef Str) const;
250
251    /// getAsInteger - Parse the current string as an integer of the specified
252    /// radix.  If Radix is specified as zero, this does radix autosensing using
253    /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
254    ///
255    /// If the string is invalid or if only a subset of the string is valid,
256    /// this returns true to signify the error.  The string is considered
257    /// erroneous if empty.
258    ///
259    bool getAsInteger(unsigned Radix, long long &Result) const;
260    bool getAsInteger(unsigned Radix, unsigned long long &Result) const;
261    bool getAsInteger(unsigned Radix, int &Result) const;
262    bool getAsInteger(unsigned Radix, unsigned &Result) const;
263
264    // TODO: Provide overloads for int/unsigned that check for overflow.
265
266    /// @}
267    /// @name Substring Operations
268    /// @{
269
270    /// substr - Return a reference to the substring from [Start, Start + N).
271    ///
272    /// \param Start - The index of the starting character in the substring; if
273    /// the index is npos or greater than the length of the string then the
274    /// empty substring will be returned.
275    ///
276    /// \param N - The number of characters to included in the substring. If N
277    /// exceeds the number of characters remaining in the string, the string
278    /// suffix (starting with \arg Start) will be returned.
279    StringRef substr(size_t Start, size_t N = npos) const {
280      Start = min(Start, Length);
281      return StringRef(Data + Start, min(N, Length - Start));
282    }
283
284    /// slice - Return a reference to the substring from [Start, End).
285    ///
286    /// \param Start - The index of the starting character in the substring; if
287    /// the index is npos or greater than the length of the string then the
288    /// empty substring will be returned.
289    ///
290    /// \param End - The index following the last character to include in the
291    /// substring. If this is npos, or less than \arg Start, or exceeds the
292    /// number of characters remaining in the string, the string suffix
293    /// (starting with \arg Start) will be returned.
294    StringRef slice(size_t Start, size_t End) const {
295      Start = min(Start, Length);
296      End = min(max(Start, End), Length);
297      return StringRef(Data + Start, End - Start);
298    }
299
300    /// split - Split into two substrings around the first occurence of a
301    /// separator character.
302    ///
303    /// If \arg Separator is in the string, then the result is a pair (LHS, RHS)
304    /// such that (*this == LHS + Separator + RHS) is true and RHS is
305    /// maximal. If \arg Separator is not in the string, then the result is a
306    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
307    ///
308    /// \param Separator - The character to split on.
309    /// \return - The split substrings.
310    std::pair<StringRef, StringRef> split(char Separator) const {
311      size_t Idx = find(Separator);
312      if (Idx == npos)
313        return std::make_pair(*this, StringRef());
314      return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
315    }
316
317    /// split - Split into two substrings around the first occurence of a
318    /// separator string.
319    ///
320    /// If \arg Separator is in the string, then the result is a pair (LHS, RHS)
321    /// such that (*this == LHS + Separator + RHS) is true and RHS is
322    /// maximal. If \arg Separator is not in the string, then the result is a
323    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
324    ///
325    /// \param Separator - The string to split on.
326    /// \return - The split substrings.
327    std::pair<StringRef, StringRef> split(StringRef Separator) const {
328      size_t Idx = find(Separator);
329      if (Idx == npos)
330        return std::make_pair(*this, StringRef());
331      return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
332    }
333
334    /// split - Split into substrings around the occurences of a separator
335    /// string.
336    ///
337    /// Each substring is stored in \arg A. If \arg MaxSplit is >= 0, at most
338    /// \arg MaxSplit splits are done and consequently <= \arg MaxSplit
339    /// elements are added to A.
340    /// If \arg KeepEmpty is false, empty strings are not added to \arg A. They
341    /// still count when considering \arg MaxSplit
342    /// An useful invariant is that
343    /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
344    ///
345    /// \param A - Where to put the substrings.
346    /// \param Separator - The string to split on.
347    /// \param MaxSplit - The maximum number of times the string is split.
348    /// \parm KeepEmpty - True if empty substring should be added.
349    void split(SmallVectorImpl<StringRef> &A,
350               StringRef Separator, int MaxSplit = -1,
351               bool KeepEmpty = true) const;
352
353    /// rsplit - Split into two substrings around the last occurence of a
354    /// separator character.
355    ///
356    /// If \arg Separator is in the string, then the result is a pair (LHS, RHS)
357    /// such that (*this == LHS + Separator + RHS) is true and RHS is
358    /// minimal. If \arg Separator is not in the string, then the result is a
359    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
360    ///
361    /// \param Separator - The character to split on.
362    /// \return - The split substrings.
363    std::pair<StringRef, StringRef> rsplit(char Separator) const {
364      size_t Idx = rfind(Separator);
365      if (Idx == npos)
366        return std::make_pair(*this, StringRef());
367      return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
368    }
369
370    /// @}
371  };
372
373  /// @name StringRef Comparison Operators
374  /// @{
375
376  inline bool operator==(StringRef LHS, StringRef RHS) {
377    return LHS.equals(RHS);
378  }
379
380  inline bool operator!=(StringRef LHS, StringRef RHS) {
381    return !(LHS == RHS);
382  }
383
384  inline bool operator<(StringRef LHS, StringRef RHS) {
385    return LHS.compare(RHS) == -1;
386  }
387
388  inline bool operator<=(StringRef LHS, StringRef RHS) {
389    return LHS.compare(RHS) != 1;
390  }
391
392  inline bool operator>(StringRef LHS, StringRef RHS) {
393    return LHS.compare(RHS) == 1;
394  }
395
396  inline bool operator>=(StringRef LHS, StringRef RHS) {
397    return LHS.compare(RHS) != -1;
398  }
399
400  /// @}
401
402}
403
404#endif
405