StringRef.h revision c78c0c99a0fe1703ae72fc51e440aaa8e4e19e91
1//===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#ifndef LLVM_ADT_STRINGREF_H 11#define LLVM_ADT_STRINGREF_H 12 13#include <algorithm> 14#include <cassert> 15#include <cstring> 16#include <string> 17 18namespace llvm { 19 template<typename T> 20 class SmallVectorImpl; 21 22 /// StringRef - Represent a constant reference to a string, i.e. a character 23 /// array and a length, which need not be null terminated. 24 /// 25 /// This class does not own the string data, it is expected to be used in 26 /// situations where the character data resides in some other buffer, whose 27 /// lifetime extends past that of the StringRef. For this reason, it is not in 28 /// general safe to store a StringRef. 29 class StringRef { 30 public: 31 typedef const char *iterator; 32 static const size_t npos = ~size_t(0); 33 typedef size_t size_type; 34 35 private: 36 /// The start of the string, in an external buffer. 37 const char *Data; 38 39 /// The length of the string. 40 size_t Length; 41 42 public: 43 /// @name Constructors 44 /// @{ 45 46 /// Construct an empty string ref. 47 /*implicit*/ StringRef() : Data(0), Length(0) {} 48 49 /// Construct a string ref from a cstring. 50 /*implicit*/ StringRef(const char *Str) 51 : Data(Str), Length(::strlen(Str)) {} 52 53 /// Construct a string ref from a pointer and length. 54 /*implicit*/ StringRef(const char *data, size_t length) 55 : Data(data), Length(length) {} 56 57 /// Construct a string ref from an std::string. 58 /*implicit*/ StringRef(const std::string &Str) 59 : Data(Str.c_str()), Length(Str.length()) {} 60 61 /// @} 62 /// @name Iterators 63 /// @{ 64 65 iterator begin() const { return Data; } 66 67 iterator end() const { return Data + Length; } 68 69 /// @} 70 /// @name String Operations 71 /// @{ 72 73 /// data - Get a pointer to the start of the string (which may not be null 74 /// terminated). 75 const char *data() const { return Data; } 76 77 /// empty - Check if the string is empty. 78 bool empty() const { return Length == 0; } 79 80 /// size - Get the string size. 81 size_t size() const { return Length; } 82 83 /// front - Get the first character in the string. 84 char front() const { 85 assert(!empty()); 86 return Data[0]; 87 } 88 89 /// back - Get the last character in the string. 90 char back() const { 91 assert(!empty()); 92 return Data[Length-1]; 93 } 94 95 /// equals - Check for string equality, this is more efficient than 96 /// compare() when the relative ordering of inequal strings isn't needed. 97 bool equals(StringRef RHS) const { 98 return (Length == RHS.Length && 99 memcmp(Data, RHS.Data, RHS.Length) == 0); 100 } 101 102 /// equals_lower - Check for string equality, ignoring case. 103 bool equals_lower(StringRef RHS) const { 104 return Length == RHS.Length && compare_lower(RHS) == 0; 105 } 106 107 /// compare - Compare two strings; the result is -1, 0, or 1 if this string 108 /// is lexicographically less than, equal to, or greater than the \arg RHS. 109 int compare(StringRef RHS) const { 110 // Check the prefix for a mismatch. 111 if (int Res = memcmp(Data, RHS.Data, std::min(Length, RHS.Length))) 112 return Res < 0 ? -1 : 1; 113 114 // Otherwise the prefixes match, so we only need to check the lengths. 115 if (Length == RHS.Length) 116 return 0; 117 return Length < RHS.Length ? -1 : 1; 118 } 119 120 /// compare_lower - Compare two strings, ignoring case. 121 int compare_lower(StringRef RHS) const; 122 123 /// str - Get the contents as an std::string. 124 std::string str() const { return std::string(Data, Length); } 125 126 /// @} 127 /// @name Operator Overloads 128 /// @{ 129 130 char operator[](size_t Index) const { 131 assert(Index < Length && "Invalid index!"); 132 return Data[Index]; 133 } 134 135 /// @} 136 /// @name Type Conversions 137 /// @{ 138 139 operator std::string() const { 140 return str(); 141 } 142 143 /// @} 144 /// @name String Predicates 145 /// @{ 146 147 /// startswith - Check if this string starts with the given \arg Prefix. 148 bool startswith(StringRef Prefix) const { 149 return substr(0, Prefix.Length).equals(Prefix); 150 } 151 152 /// endswith - Check if this string ends with the given \arg Suffix. 153 bool endswith(StringRef Suffix) const { 154 return slice(size() - Suffix.Length, size()).equals(Suffix); 155 } 156 157 /// @} 158 /// @name String Searching 159 /// @{ 160 161 /// find - Search for the first character \arg C in the string. 162 /// 163 /// \return - The index of the first occurence of \arg C, or npos if not 164 /// found. 165 size_t find(char C, size_t From = 0) const { 166 for (size_t i = std::min(From, Length), e = Length; i != e; ++i) 167 if (Data[i] == C) 168 return i; 169 return npos; 170 } 171 172 /// find - Search for the first string \arg Str in the string. 173 /// 174 /// \return - The index of the first occurence of \arg Str, or npos if not 175 /// found. 176 size_t find(StringRef Str, size_t From = 0) const; 177 178 /// rfind - Search for the last character \arg C in the string. 179 /// 180 /// \return - The index of the last occurence of \arg C, or npos if not 181 /// found. 182 size_t rfind(char C, size_t From = npos) const { 183 From = std::min(From, Length); 184 size_t i = From; 185 while (i != 0) { 186 --i; 187 if (Data[i] == C) 188 return i; 189 } 190 return npos; 191 } 192 193 /// rfind - Search for the last string \arg Str in the string. 194 /// 195 /// \return - The index of the last occurence of \arg Str, or npos if not 196 /// found. 197 size_t rfind(StringRef Str) const; 198 199 /// find_first_of - Find the first character in the string that is \arg C, 200 /// or npos if not found. Same as find. 201 size_type find_first_of(char C, size_t From = 0) const { return find(C); } 202 203 /// find_first_of - Find the first character in the string that is in \arg 204 /// Chars, or npos if not found. 205 /// 206 /// Note: O(size() * Chars.size()) 207 size_type find_first_of(StringRef Chars, size_t From = 0) const; 208 209 /// find_first_not_of - Find the first character in the string that is not 210 /// \arg C or npos if not found. 211 size_type find_first_not_of(char C, size_t From = 0) const; 212 213 /// find_first_not_of - Find the first character in the string that is not 214 /// in the string \arg Chars, or npos if not found. 215 /// 216 /// Note: O(size() * Chars.size()) 217 size_type find_first_not_of(StringRef Chars, size_t From = 0) const; 218 219 /// @} 220 /// @name Helpful Algorithms 221 /// @{ 222 223 /// count - Return the number of occurrences of \arg C in the string. 224 size_t count(char C) const { 225 size_t Count = 0; 226 for (size_t i = 0, e = Length; i != e; ++i) 227 if (Data[i] == C) 228 ++Count; 229 return Count; 230 } 231 232 /// count - Return the number of non-overlapped occurrences of \arg Str in 233 /// the string. 234 size_t count(StringRef Str) const; 235 236 /// getAsInteger - Parse the current string as an integer of the specified 237 /// radix. If Radix is specified as zero, this does radix autosensing using 238 /// extended C rules: 0 is octal, 0x is hex, 0b is binary. 239 /// 240 /// If the string is invalid or if only a subset of the string is valid, 241 /// this returns true to signify the error. The string is considered 242 /// erroneous if empty. 243 /// 244 bool getAsInteger(unsigned Radix, long long &Result) const; 245 bool getAsInteger(unsigned Radix, unsigned long long &Result) const; 246 bool getAsInteger(unsigned Radix, int &Result) const; 247 bool getAsInteger(unsigned Radix, unsigned &Result) const; 248 249 // TODO: Provide overloads for int/unsigned that check for overflow. 250 251 /// @} 252 /// @name Substring Operations 253 /// @{ 254 255 /// substr - Return a reference to the substring from [Start, Start + N). 256 /// 257 /// \param Start - The index of the starting character in the substring; if 258 /// the index is npos or greater than the length of the string then the 259 /// empty substring will be returned. 260 /// 261 /// \param N - The number of characters to included in the substring. If N 262 /// exceeds the number of characters remaining in the string, the string 263 /// suffix (starting with \arg Start) will be returned. 264 StringRef substr(size_t Start, size_t N = npos) const { 265 Start = std::min(Start, Length); 266 return StringRef(Data + Start, std::min(N, Length - Start)); 267 } 268 269 /// slice - Return a reference to the substring from [Start, End). 270 /// 271 /// \param Start - The index of the starting character in the substring; if 272 /// the index is npos or greater than the length of the string then the 273 /// empty substring will be returned. 274 /// 275 /// \param End - The index following the last character to include in the 276 /// substring. If this is npos, or less than \arg Start, or exceeds the 277 /// number of characters remaining in the string, the string suffix 278 /// (starting with \arg Start) will be returned. 279 StringRef slice(size_t Start, size_t End) const { 280 Start = std::min(Start, Length); 281 End = std::min(std::max(Start, End), Length); 282 return StringRef(Data + Start, End - Start); 283 } 284 285 /// split - Split into two substrings around the first occurence of a 286 /// separator character. 287 /// 288 /// If \arg Separator is in the string, then the result is a pair (LHS, RHS) 289 /// such that (*this == LHS + Separator + RHS) is true and RHS is 290 /// maximal. If \arg Separator is not in the string, then the result is a 291 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 292 /// 293 /// \param Separator - The character to split on. 294 /// \return - The split substrings. 295 std::pair<StringRef, StringRef> split(char Separator) const { 296 size_t Idx = find(Separator); 297 if (Idx == npos) 298 return std::make_pair(*this, StringRef()); 299 return std::make_pair(slice(0, Idx), slice(Idx+1, npos)); 300 } 301 302 /// split - Split into two substrings around the first occurence of a 303 /// separator string. 304 /// 305 /// If \arg Separator is in the string, then the result is a pair (LHS, RHS) 306 /// such that (*this == LHS + Separator + RHS) is true and RHS is 307 /// maximal. If \arg Separator is not in the string, then the result is a 308 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 309 /// 310 /// \param Separator - The string to split on. 311 /// \return - The split substrings. 312 std::pair<StringRef, StringRef> split(StringRef Separator) const { 313 size_t Idx = find(Separator); 314 if (Idx == npos) 315 return std::make_pair(*this, StringRef()); 316 return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); 317 } 318 319 /// split - Split into substrings around the occurences of a separator 320 /// string. 321 /// 322 /// Each substring is stored in \arg A. If \arg MaxSplit is >= 0, at most 323 /// \arg MaxSplit splits are done and consequently <= \arg MaxSplit 324 /// elements are added to A. 325 /// If \arg KeepEmpty is false, empty strings are not added to \arg A. They 326 /// still count when considering \arg MaxSplit 327 /// An useful invariant is that 328 /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true 329 /// 330 /// \param A - Where to put the substrings. 331 /// \param Separator - The string to split on. 332 /// \param MaxSplit - The maximum number of times the string is split. 333 /// \parm KeepEmpty - True if empty substring should be added. 334 void split(SmallVectorImpl<StringRef> &A, 335 StringRef Separator, int MaxSplit = -1, 336 bool KeepEmpty = true) const; 337 338 /// rsplit - Split into two substrings around the last occurence of a 339 /// separator character. 340 /// 341 /// If \arg Separator is in the string, then the result is a pair (LHS, RHS) 342 /// such that (*this == LHS + Separator + RHS) is true and RHS is 343 /// minimal. If \arg Separator is not in the string, then the result is a 344 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 345 /// 346 /// \param Separator - The character to split on. 347 /// \return - The split substrings. 348 std::pair<StringRef, StringRef> rsplit(char Separator) const { 349 size_t Idx = rfind(Separator); 350 if (Idx == npos) 351 return std::make_pair(*this, StringRef()); 352 return std::make_pair(slice(0, Idx), slice(Idx+1, npos)); 353 } 354 355 /// @} 356 }; 357 358 /// @name StringRef Comparison Operators 359 /// @{ 360 361 inline bool operator==(StringRef LHS, StringRef RHS) { 362 return LHS.equals(RHS); 363 } 364 365 inline bool operator!=(StringRef LHS, StringRef RHS) { 366 return !(LHS == RHS); 367 } 368 369 inline bool operator<(StringRef LHS, StringRef RHS) { 370 return LHS.compare(RHS) == -1; 371 } 372 373 inline bool operator<=(StringRef LHS, StringRef RHS) { 374 return LHS.compare(RHS) != 1; 375 } 376 377 inline bool operator>(StringRef LHS, StringRef RHS) { 378 return LHS.compare(RHS) == 1; 379 } 380 381 inline bool operator>=(StringRef LHS, StringRef RHS) { 382 return LHS.compare(RHS) != -1; 383 } 384 385 /// @} 386 387} 388 389#endif 390