StringRef.h revision d5b1f8a8426e82990dafc6e3336fefc6635c8fa4
1//===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#ifndef LLVM_ADT_STRINGREF_H 11#define LLVM_ADT_STRINGREF_H 12 13#include <cassert> 14#include <cstring> 15#include <utility> 16#include <string> 17 18namespace llvm { 19 template<typename T> 20 class SmallVectorImpl; 21 22 /// StringRef - Represent a constant reference to a string, i.e. a character 23 /// array and a length, which need not be null terminated. 24 /// 25 /// This class does not own the string data, it is expected to be used in 26 /// situations where the character data resides in some other buffer, whose 27 /// lifetime extends past that of the StringRef. For this reason, it is not in 28 /// general safe to store a StringRef. 29 class StringRef { 30 public: 31 typedef const char *iterator; 32 static const size_t npos = ~size_t(0); 33 typedef size_t size_type; 34 35 private: 36 /// The start of the string, in an external buffer. 37 const char *Data; 38 39 /// The length of the string. 40 size_t Length; 41 42 // Workaround PR5482: nearly all gcc 4.x miscompile StringRef and std::min() 43 // Changing the arg of min to be an integer, instead of a reference to an 44 // integer works around this bug. 45 size_t min(size_t a, size_t b) const 46 { 47 return a < b ? a : b; 48 } 49 50 size_t max(size_t a, size_t b) const 51 { 52 return a > b ? a : b; 53 } 54 55 public: 56 /// @name Constructors 57 /// @{ 58 59 /// Construct an empty string ref. 60 /*implicit*/ StringRef() : Data(0), Length(0) {} 61 62 /// Construct a string ref from a cstring. 63 /*implicit*/ StringRef(const char *Str) 64 : Data(Str), Length(::strlen(Str)) {} 65 66 /// Construct a string ref from a pointer and length. 67 /*implicit*/ StringRef(const char *data, size_t length) 68 : Data(data), Length(length) {} 69 70 /// Construct a string ref from an std::string. 71 /*implicit*/ StringRef(const std::string &Str) 72 : Data(Str.data()), Length(Str.length()) {} 73 74 /// @} 75 /// @name Iterators 76 /// @{ 77 78 iterator begin() const { return Data; } 79 80 iterator end() const { return Data + Length; } 81 82 /// @} 83 /// @name String Operations 84 /// @{ 85 86 /// data - Get a pointer to the start of the string (which may not be null 87 /// terminated). 88 const char *data() const { return Data; } 89 90 /// empty - Check if the string is empty. 91 bool empty() const { return Length == 0; } 92 93 /// size - Get the string size. 94 size_t size() const { return Length; } 95 96 /// front - Get the first character in the string. 97 char front() const { 98 assert(!empty()); 99 return Data[0]; 100 } 101 102 /// back - Get the last character in the string. 103 char back() const { 104 assert(!empty()); 105 return Data[Length-1]; 106 } 107 108 /// equals - Check for string equality, this is more efficient than 109 /// compare() when the relative ordering of inequal strings isn't needed. 110 bool equals(StringRef RHS) const { 111 return (Length == RHS.Length && 112 memcmp(Data, RHS.Data, RHS.Length) == 0); 113 } 114 115 /// equals_lower - Check for string equality, ignoring case. 116 bool equals_lower(StringRef RHS) const { 117 return Length == RHS.Length && compare_lower(RHS) == 0; 118 } 119 120 /// compare - Compare two strings; the result is -1, 0, or 1 if this string 121 /// is lexicographically less than, equal to, or greater than the \arg RHS. 122 int compare(StringRef RHS) const { 123 // Check the prefix for a mismatch. 124 if (int Res = memcmp(Data, RHS.Data, min(Length, RHS.Length))) 125 return Res < 0 ? -1 : 1; 126 127 // Otherwise the prefixes match, so we only need to check the lengths. 128 if (Length == RHS.Length) 129 return 0; 130 return Length < RHS.Length ? -1 : 1; 131 } 132 133 /// compare_lower - Compare two strings, ignoring case. 134 int compare_lower(StringRef RHS) const; 135 136 /// str - Get the contents as an std::string. 137 std::string str() const { return std::string(Data, Length); } 138 139 /// @} 140 /// @name Operator Overloads 141 /// @{ 142 143 char operator[](size_t Index) const { 144 assert(Index < Length && "Invalid index!"); 145 return Data[Index]; 146 } 147 148 /// @} 149 /// @name Type Conversions 150 /// @{ 151 152 operator std::string() const { 153 return str(); 154 } 155 156 /// @} 157 /// @name String Predicates 158 /// @{ 159 160 /// startswith - Check if this string starts with the given \arg Prefix. 161 bool startswith(StringRef Prefix) const { 162 return Length >= Prefix.Length && 163 memcmp(Data, Prefix.Data, Prefix.Length) == 0; 164 } 165 166 /// endswith - Check if this string ends with the given \arg Suffix. 167 bool endswith(StringRef Suffix) const { 168 return Length >= Suffix.Length && 169 memcmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0; 170 } 171 172 /// @} 173 /// @name String Searching 174 /// @{ 175 176 /// find - Search for the first character \arg C in the string. 177 /// 178 /// \return - The index of the first occurence of \arg C, or npos if not 179 /// found. 180 size_t find(char C, size_t From = 0) const { 181 for (size_t i = min(From, Length), e = Length; i != e; ++i) 182 if (Data[i] == C) 183 return i; 184 return npos; 185 } 186 187 /// find - Search for the first string \arg Str in the string. 188 /// 189 /// \return - The index of the first occurence of \arg Str, or npos if not 190 /// found. 191 size_t find(StringRef Str, size_t From = 0) const; 192 193 /// rfind - Search for the last character \arg C in the string. 194 /// 195 /// \return - The index of the last occurence of \arg C, or npos if not 196 /// found. 197 size_t rfind(char C, size_t From = npos) const { 198 From = min(From, Length); 199 size_t i = From; 200 while (i != 0) { 201 --i; 202 if (Data[i] == C) 203 return i; 204 } 205 return npos; 206 } 207 208 /// rfind - Search for the last string \arg Str in the string. 209 /// 210 /// \return - The index of the last occurence of \arg Str, or npos if not 211 /// found. 212 size_t rfind(StringRef Str) const; 213 214 /// find_first_of - Find the first character in the string that is \arg C, 215 /// or npos if not found. Same as find. 216 size_type find_first_of(char C, size_t = 0) const { return find(C); } 217 218 /// find_first_of - Find the first character in the string that is in \arg 219 /// Chars, or npos if not found. 220 /// 221 /// Note: O(size() * Chars.size()) 222 size_type find_first_of(StringRef Chars, size_t From = 0) const; 223 224 /// find_first_not_of - Find the first character in the string that is not 225 /// \arg C or npos if not found. 226 size_type find_first_not_of(char C, size_t From = 0) const; 227 228 /// find_first_not_of - Find the first character in the string that is not 229 /// in the string \arg Chars, or npos if not found. 230 /// 231 /// Note: O(size() * Chars.size()) 232 size_type find_first_not_of(StringRef Chars, size_t From = 0) const; 233 234 /// @} 235 /// @name Helpful Algorithms 236 /// @{ 237 238 /// count - Return the number of occurrences of \arg C in the string. 239 size_t count(char C) const { 240 size_t Count = 0; 241 for (size_t i = 0, e = Length; i != e; ++i) 242 if (Data[i] == C) 243 ++Count; 244 return Count; 245 } 246 247 /// count - Return the number of non-overlapped occurrences of \arg Str in 248 /// the string. 249 size_t count(StringRef Str) const; 250 251 /// getAsInteger - Parse the current string as an integer of the specified 252 /// radix. If Radix is specified as zero, this does radix autosensing using 253 /// extended C rules: 0 is octal, 0x is hex, 0b is binary. 254 /// 255 /// If the string is invalid or if only a subset of the string is valid, 256 /// this returns true to signify the error. The string is considered 257 /// erroneous if empty. 258 /// 259 bool getAsInteger(unsigned Radix, long long &Result) const; 260 bool getAsInteger(unsigned Radix, unsigned long long &Result) const; 261 bool getAsInteger(unsigned Radix, int &Result) const; 262 bool getAsInteger(unsigned Radix, unsigned &Result) const; 263 264 // TODO: Provide overloads for int/unsigned that check for overflow. 265 266 /// @} 267 /// @name Substring Operations 268 /// @{ 269 270 /// substr - Return a reference to the substring from [Start, Start + N). 271 /// 272 /// \param Start - The index of the starting character in the substring; if 273 /// the index is npos or greater than the length of the string then the 274 /// empty substring will be returned. 275 /// 276 /// \param N - The number of characters to included in the substring. If N 277 /// exceeds the number of characters remaining in the string, the string 278 /// suffix (starting with \arg Start) will be returned. 279 StringRef substr(size_t Start, size_t N = npos) const { 280 Start = min(Start, Length); 281 return StringRef(Data + Start, min(N, Length - Start)); 282 } 283 284 /// slice - Return a reference to the substring from [Start, End). 285 /// 286 /// \param Start - The index of the starting character in the substring; if 287 /// the index is npos or greater than the length of the string then the 288 /// empty substring will be returned. 289 /// 290 /// \param End - The index following the last character to include in the 291 /// substring. If this is npos, or less than \arg Start, or exceeds the 292 /// number of characters remaining in the string, the string suffix 293 /// (starting with \arg Start) will be returned. 294 StringRef slice(size_t Start, size_t End) const { 295 Start = min(Start, Length); 296 End = min(max(Start, End), Length); 297 return StringRef(Data + Start, End - Start); 298 } 299 300 /// split - Split into two substrings around the first occurence of a 301 /// separator character. 302 /// 303 /// If \arg Separator is in the string, then the result is a pair (LHS, RHS) 304 /// such that (*this == LHS + Separator + RHS) is true and RHS is 305 /// maximal. If \arg Separator is not in the string, then the result is a 306 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 307 /// 308 /// \param Separator - The character to split on. 309 /// \return - The split substrings. 310 std::pair<StringRef, StringRef> split(char Separator) const { 311 size_t Idx = find(Separator); 312 if (Idx == npos) 313 return std::make_pair(*this, StringRef()); 314 return std::make_pair(slice(0, Idx), slice(Idx+1, npos)); 315 } 316 317 /// split - Split into two substrings around the first occurence of a 318 /// separator string. 319 /// 320 /// If \arg Separator is in the string, then the result is a pair (LHS, RHS) 321 /// such that (*this == LHS + Separator + RHS) is true and RHS is 322 /// maximal. If \arg Separator is not in the string, then the result is a 323 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 324 /// 325 /// \param Separator - The string to split on. 326 /// \return - The split substrings. 327 std::pair<StringRef, StringRef> split(StringRef Separator) const { 328 size_t Idx = find(Separator); 329 if (Idx == npos) 330 return std::make_pair(*this, StringRef()); 331 return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); 332 } 333 334 /// split - Split into substrings around the occurences of a separator 335 /// string. 336 /// 337 /// Each substring is stored in \arg A. If \arg MaxSplit is >= 0, at most 338 /// \arg MaxSplit splits are done and consequently <= \arg MaxSplit 339 /// elements are added to A. 340 /// If \arg KeepEmpty is false, empty strings are not added to \arg A. They 341 /// still count when considering \arg MaxSplit 342 /// An useful invariant is that 343 /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true 344 /// 345 /// \param A - Where to put the substrings. 346 /// \param Separator - The string to split on. 347 /// \param MaxSplit - The maximum number of times the string is split. 348 /// \parm KeepEmpty - True if empty substring should be added. 349 void split(SmallVectorImpl<StringRef> &A, 350 StringRef Separator, int MaxSplit = -1, 351 bool KeepEmpty = true) const; 352 353 /// rsplit - Split into two substrings around the last occurence of a 354 /// separator character. 355 /// 356 /// If \arg Separator is in the string, then the result is a pair (LHS, RHS) 357 /// such that (*this == LHS + Separator + RHS) is true and RHS is 358 /// minimal. If \arg Separator is not in the string, then the result is a 359 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 360 /// 361 /// \param Separator - The character to split on. 362 /// \return - The split substrings. 363 std::pair<StringRef, StringRef> rsplit(char Separator) const { 364 size_t Idx = rfind(Separator); 365 if (Idx == npos) 366 return std::make_pair(*this, StringRef()); 367 return std::make_pair(slice(0, Idx), slice(Idx+1, npos)); 368 } 369 370 /// @} 371 }; 372 373 /// @name StringRef Comparison Operators 374 /// @{ 375 376 inline bool operator==(StringRef LHS, StringRef RHS) { 377 return LHS.equals(RHS); 378 } 379 380 inline bool operator!=(StringRef LHS, StringRef RHS) { 381 return !(LHS == RHS); 382 } 383 384 inline bool operator<(StringRef LHS, StringRef RHS) { 385 return LHS.compare(RHS) == -1; 386 } 387 388 inline bool operator<=(StringRef LHS, StringRef RHS) { 389 return LHS.compare(RHS) != 1; 390 } 391 392 inline bool operator>(StringRef LHS, StringRef RHS) { 393 return LHS.compare(RHS) == 1; 394 } 395 396 inline bool operator>=(StringRef LHS, StringRef RHS) { 397 return LHS.compare(RHS) != -1; 398 } 399 400 /// @} 401 402} 403 404#endif 405