SourceManager.h revision b6427f821de8cce1566fb6e755143ea0918d5543
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_CLANG_SOURCEMANAGER_H 15#define LLVM_CLANG_SOURCEMANAGER_H 16 17#include "clang/Basic/SourceLocation.h" 18#include "llvm/Bitcode/SerializationFwd.h" 19#include <vector> 20#include <set> 21#include <list> 22#include <cassert> 23 24namespace llvm { 25class MemoryBuffer; 26} 27 28namespace clang { 29 30class SourceManager; 31class FileEntry; 32class IdentifierTokenInfo; 33 34/// SrcMgr - Private classes that are part of the SourceManager implementation. 35/// 36namespace SrcMgr { 37 /// ContentCache - Once instance of this struct is kept for every file 38 /// loaded or used. This object owns the MemoryBuffer object. 39 struct ContentCache { 40 /// Reference to the file entry. This reference does not own 41 /// the FileEntry object. It is possible for this to be NULL if 42 /// the ContentCache encapsulates an imaginary text buffer. 43 const FileEntry* Entry; 44 45 /// Buffer - The actual buffer containing the characters from the input 46 /// file. This is owned by the ContentCache object. 47 const llvm::MemoryBuffer* Buffer; 48 49 /// SourceLineCache - A new[]'d array of offsets for each source line. This 50 /// is lazily computed. This is owned by the ContentCache object. 51 unsigned* SourceLineCache; 52 53 /// NumLines - The number of lines in this ContentCache. This is only valid 54 /// if SourceLineCache is non-null. 55 unsigned NumLines; 56 57 ContentCache(const FileEntry* e = NULL) 58 : Entry(e), Buffer(NULL), SourceLineCache(NULL), NumLines(0) {} 59 60 ~ContentCache(); 61 62 /// The copy ctor does not allow copies where source object has either 63 /// a non-NULL Buffer or SourceLineCache. Ownership of allocated memory 64 /// is not transfered, so this is a logical error. 65 ContentCache(const ContentCache& RHS) : Buffer(NULL),SourceLineCache(NULL) { 66 Entry = RHS.Entry; 67 68 assert (RHS.Buffer == NULL && RHS.SourceLineCache == NULL 69 && "Passed ContentCache object cannot own a buffer."); 70 71 NumLines = RHS.NumLines; 72 } 73 74 private: 75 // Disable assignments. 76 ContentCache& operator=(const ContentCache& RHS); 77 }; 78 79 /// FileIDInfo - Information about a FileID, basically just the logical file 80 /// that it represents and include stack information. A File SourceLocation 81 /// is a byte offset from the start of this. 82 /// 83 /// FileID's are used to compute the location of a character in memory as well 84 /// as the logical source location, which can be differ from the physical 85 /// location. It is different when #line's are active or when macros have 86 /// been expanded. 87 /// 88 /// Each FileID has include stack information, indicating where it came from. 89 /// For the primary translation unit, it comes from SourceLocation() aka 0. 90 /// This information encodes the #include chain that a token was instantiated 91 /// from. 92 /// 93 /// FileIDInfos contain a "ContentCache *", describing the source file, 94 /// and a Chunk number, which allows a SourceLocation to index into very 95 /// large files (those which there are not enough FilePosBits to address). 96 /// 97 struct FileIDInfo { 98 private: 99 /// IncludeLoc - The location of the #include that brought in this file. 100 /// This SourceLocation object has an invalid SLOC for the main file. 101 SourceLocation IncludeLoc; 102 103 /// ChunkNo - Really large buffers are broken up into chunks that are 104 /// each (1 << SourceLocation::FilePosBits) in size. This specifies the 105 /// chunk number of this FileID. 106 unsigned ChunkNo; 107 108 /// Content - Information about the source buffer itself. 109 const ContentCache* Content; 110 111 public: 112 /// get - Return a FileIDInfo object. 113 static FileIDInfo get(SourceLocation IL, unsigned CN, 114 const ContentCache *Con) { 115 FileIDInfo X; 116 X.IncludeLoc = IL; 117 X.ChunkNo = CN; 118 X.Content = Con; 119 return X; 120 } 121 122 SourceLocation getIncludeLoc() const { return IncludeLoc; } 123 unsigned getChunkNo() const { return ChunkNo; } 124 const ContentCache* getContentCache() const { return Content; } 125 }; 126 127 /// MacroIDInfo - Macro SourceLocations refer to these records by their ID. 128 /// Each MacroIDInfo encodes the Instantiation location - where the macro was 129 /// instantiated, and the PhysicalLoc - where the actual character data for 130 /// the token came from. An actual macro SourceLocation stores deltas from 131 /// these positions. 132 class MacroIDInfo { 133 SourceLocation VirtualLoc, PhysicalLoc; 134 public: 135 SourceLocation getVirtualLoc() const { return VirtualLoc; } 136 SourceLocation getPhysicalLoc() const { return PhysicalLoc; } 137 138 /// get - Return a MacroID for a macro expansion. VL specifies 139 /// the instantiation location (where the macro is expanded), and PL 140 /// specifies the physical location (where the characters from the token 141 /// come from). Both VL and PL refer to normal File SLocs. 142 static MacroIDInfo get(SourceLocation VL, SourceLocation PL) { 143 MacroIDInfo X; 144 X.VirtualLoc = VL; 145 X.PhysicalLoc = PL; 146 return X; 147 } 148 }; 149} // end SrcMgr namespace. 150} // end clang namespace 151 152namespace std { 153template <> struct less<clang::SrcMgr::ContentCache> { 154 inline bool operator()(const clang::SrcMgr::ContentCache& L, 155 const clang::SrcMgr::ContentCache& R) const { 156 return L.Entry < R.Entry; 157 } 158}; 159} // end std namespace 160 161namespace clang { 162 163/// SourceManager - This file handles loading and caching of source files into 164/// memory. This object owns the MemoryBuffer objects for all of the loaded 165/// files and assigns unique FileID's for each unique #include chain. 166/// 167/// The SourceManager can be queried for information about SourceLocation 168/// objects, turning them into either physical or logical locations. Physical 169/// locations represent where the bytes corresponding to a token came from and 170/// logical locations represent where the location is in the user's view. In 171/// the case of a macro expansion, for example, the physical location indicates 172/// where the expanded token came from and the logical location specifies where 173/// it was expanded. Logical locations are also influenced by #line directives, 174/// etc. 175class SourceManager { 176 /// FileInfos - Memoized information about all of the files tracked by this 177 /// SourceManager. This set allows us to merge ContentCache entries based 178 /// on their FileEntry*. All ContentCache objects will thus have unique, 179 /// non-null, FileEntry pointers. 180 std::set<SrcMgr::ContentCache> FileInfos; 181 182 /// MemBufferInfos - Information about various memory buffers that we have 183 /// read in. This is a list, instead of a vector, because we need pointers to 184 /// the ContentCache objects to be stable. All FileEntry* within the 185 /// stored ContentCache objects are NULL, as they do not refer to a file. 186 std::list<SrcMgr::ContentCache> MemBufferInfos; 187 188 /// FileIDs - Information about each FileID. FileID #0 is not valid, so all 189 /// entries are off by one. 190 std::vector<SrcMgr::FileIDInfo> FileIDs; 191 192 /// MacroIDs - Information about each MacroID. 193 std::vector<SrcMgr::MacroIDInfo> MacroIDs; 194 195 /// LastLineNo - These ivars serve as a cache used in the getLineNumber 196 /// method which is used to speedup getLineNumber calls to nearby locations. 197 unsigned LastLineNoFileIDQuery; 198 SrcMgr::ContentCache *LastLineNoContentCache; 199 unsigned LastLineNoFilePos; 200 unsigned LastLineNoResult; 201 202public: 203 SourceManager() : LastLineNoFileIDQuery(~0U) {} 204 ~SourceManager() {} 205 206 void clearIDTables() { 207 FileIDs.clear(); 208 MacroIDs.clear(); 209 LastLineNoFileIDQuery = ~0U; 210 LastLineNoContentCache = 0; 211 } 212 213 /// createFileID - Create a new FileID that represents the specified file 214 /// being #included from the specified IncludePosition. This returns 0 on 215 /// error and translates NULL into standard input. 216 unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){ 217 const SrcMgr::ContentCache *IR = getContentCache(SourceFile); 218 if (IR == 0) return 0; // Error opening file? 219 return createFileID(IR, IncludePos); 220 } 221 222 /// createFileIDForMemBuffer - Create a new FileID that represents the 223 /// specified memory buffer. This does no caching of the buffer and takes 224 /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once. 225 unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 226 return createFileID(createMemBufferContentCache(Buffer), SourceLocation()); 227 } 228 229 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact 230 /// that a token at Loc should actually be referenced from InstantiationLoc. 231 SourceLocation getInstantiationLoc(SourceLocation Loc, 232 SourceLocation InstantiationLoc); 233 234 /// getBuffer - Return the buffer for the specified FileID. 235 /// 236 const llvm::MemoryBuffer *getBuffer(unsigned FileID) const { 237 return getContentCache(FileID)->Buffer; 238 } 239 240 /// getBufferData - Return a pointer to the start and end of the character 241 /// data for the specified FileID. 242 std::pair<const char*, const char*> getBufferData(unsigned FileID) const; 243 244 /// getIncludeLoc - Return the location of the #include for the specified 245 /// SourceLocation. If this is a macro expansion, this transparently figures 246 /// out which file includes the file being expanded into. 247 SourceLocation getIncludeLoc(SourceLocation ID) const { 248 return getFIDInfo(getLogicalLoc(ID).getFileID())->getIncludeLoc(); 249 } 250 251 /// getCharacterData - Return a pointer to the start of the specified location 252 /// in the appropriate MemoryBuffer. 253 const char *getCharacterData(SourceLocation SL) const; 254 255 /// getColumnNumber - Return the column # for the specified file position. 256 /// This is significantly cheaper to compute than the line number. This 257 /// returns zero if the column number isn't known. This may only be called on 258 /// a file sloc, so you must choose a physical or logical location before 259 /// calling this method. 260 unsigned getColumnNumber(SourceLocation Loc) const; 261 262 unsigned getPhysicalColumnNumber(SourceLocation Loc) const { 263 return getColumnNumber(getPhysicalLoc(Loc)); 264 } 265 unsigned getLogicalColumnNumber(SourceLocation Loc) const { 266 return getColumnNumber(getLogicalLoc(Loc)); 267 } 268 269 270 /// getLineNumber - Given a SourceLocation, return the physical line number 271 /// for the position indicated. This requires building and caching a table of 272 /// line offsets for the MemoryBuffer, so this is not cheap: use only when 273 /// about to emit a diagnostic. 274 unsigned getLineNumber(SourceLocation Loc); 275 276 unsigned getLogicalLineNumber(SourceLocation Loc) { 277 return getLineNumber(getLogicalLoc(Loc)); 278 } 279 unsigned getPhysicalLineNumber(SourceLocation Loc) { 280 return getLineNumber(getPhysicalLoc(Loc)); 281 } 282 283 /// getSourceName - This method returns the name of the file or buffer that 284 /// the SourceLocation specifies. This can be modified with #line directives, 285 /// etc. 286 const char *getSourceName(SourceLocation Loc) const; 287 288 /// Given a SourceLocation object, return the logical location referenced by 289 /// the ID. This logical location is subject to #line directives, etc. 290 SourceLocation getLogicalLoc(SourceLocation Loc) const { 291 // File locations are both physical and logical. 292 if (Loc.isFileID()) return Loc; 293 294 return MacroIDs[Loc.getMacroID()].getVirtualLoc(); 295 } 296 297 /// getPhysicalLoc - Given a SourceLocation object, return the physical 298 /// location referenced by the ID. 299 SourceLocation getPhysicalLoc(SourceLocation Loc) const { 300 // File locations are both physical and logical. 301 if (Loc.isFileID()) return Loc; 302 303 SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getPhysicalLoc(); 304 return PLoc.getFileLocWithOffset(Loc.getMacroPhysOffs()); 305 } 306 307 /// getContentCacheForLoc - Return the ContentCache for the physloc of the 308 /// specified SourceLocation, if one exists. 309 const SrcMgr::ContentCache* getContentCacheForLoc(SourceLocation Loc) const { 310 Loc = getPhysicalLoc(Loc); 311 unsigned FileID = Loc.getFileID(); 312 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 313 return FileIDs[FileID-1].getContentCache(); 314 } 315 316 /// getFileEntryForLoc - Return the FileEntry record for the physloc of the 317 /// specified SourceLocation, if one exists. 318 const FileEntry* getFileEntryForLoc(SourceLocation Loc) const { 319 return getContentCacheForLoc(Loc)->Entry; 320 } 321 322 /// getDecomposedFileLoc - Decompose the specified file location into a raw 323 /// FileID + Offset pair. The first element is the FileID, the second is the 324 /// offset from the start of the buffer of the location. 325 std::pair<unsigned, unsigned> getDecomposedFileLoc(SourceLocation Loc) const { 326 assert(Loc.isFileID() && "Isn't a File SourceLocation"); 327 328 // TODO: Add a flag "is first chunk" to SLOC. 329 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); 330 331 // If this file has been split up into chunks, factor in the chunk number 332 // that the FileID references. 333 unsigned ChunkNo = FIDInfo->getChunkNo(); 334 unsigned Offset = Loc.getRawFilePos(); 335 Offset += (ChunkNo << SourceLocation::FilePosBits); 336 337 return std::pair<unsigned,unsigned>(Loc.getFileID()-ChunkNo, Offset); 338 } 339 340 /// PrintStats - Print statistics to stderr. 341 /// 342 void PrintStats() const; 343 344private: 345 /// createFileID - Create a new fileID for the specified ContentCache and 346 /// include position. This works regardless of whether the ContentCache 347 /// corresponds to a file or some other input source. 348 unsigned createFileID(const SrcMgr::ContentCache* File, 349 SourceLocation IncludePos); 350 351 /// getContentCache - Create or return a cached ContentCache for the specified 352 /// file. This returns null on failure. 353 const SrcMgr::ContentCache* getContentCache(const FileEntry* SourceFile); 354 355 /// createMemBufferContentCache - Create a new ContentCache for the specified 356 /// memory buffer. 357 const SrcMgr::ContentCache* 358 createMemBufferContentCache(const llvm::MemoryBuffer* Buf); 359 360 const SrcMgr::FileIDInfo* getFIDInfo(unsigned FileID) const { 361 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 362 return &FileIDs[FileID-1]; 363 } 364 365 const SrcMgr::ContentCache *getContentCache(unsigned FileID) const { 366 return getContentCache(getFIDInfo(FileID)); 367 } 368 369 /// Return the ContentCache structure for the specified FileID. 370 /// This is always the physical reference for the ID. 371 const SrcMgr::ContentCache* 372 getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const { 373 return FIDInfo->getContentCache(); 374 } 375 376 /// getFullFilePos - This (efficient) method returns the offset from the start 377 /// of the file that the specified physical SourceLocation represents. This 378 /// returns the location of the physical character data, not the logical file 379 /// position. 380 unsigned getFullFilePos(SourceLocation PhysLoc) const { 381 return getDecomposedFileLoc(PhysLoc).second; 382 } 383}; 384 385 386} // end namespace clang 387 388#endif 389