SourceManager.h revision 78d85f53b093867bbb0123f016956178eea7343e
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_CLANG_SOURCEMANAGER_H 15#define LLVM_CLANG_SOURCEMANAGER_H 16 17#include "clang/Basic/SourceLocation.h" 18#include "llvm/Bitcode/SerializationFwd.h" 19#include <vector> 20#include <set> 21#include <list> 22#include <cassert> 23 24namespace llvm { 25class MemoryBuffer; 26} 27 28namespace clang { 29 30class SourceManager; 31class FileEntry; 32class IdentifierTokenInfo; 33 34/// SrcMgr - Private classes that are part of the SourceManager implementation. 35/// 36namespace SrcMgr { 37 /// ContentCache - Once instance of this struct is kept for every file 38 /// loaded or used. This object owns the MemoryBuffer object. 39 struct ContentCache { 40 /// Reference to the file entry. This reference does not own 41 /// the FileEntry object. It is possible for this to be NULL if 42 /// the ContentCache encapsulates an imaginary text buffer. 43 const FileEntry* Entry; 44 45 /// Buffer - The actual buffer containing the characters from the input 46 /// file. This is owned by the FileInfo object. 47 const llvm::MemoryBuffer* Buffer; 48 49 /// SourceLineCache - A new[]'d array of offsets for each source line. This 50 /// is lazily computed. This is owned by the FileInfo object. 51 unsigned* SourceLineCache; 52 53 /// NumLines - The number of lines in this FileInfo. This is only valid if 54 /// SourceLineCache is non-null. 55 unsigned NumLines; 56 57 ContentCache(const FileEntry* e = NULL) 58 : Entry(e), Buffer(NULL), SourceLineCache(NULL), NumLines(0) {} 59 60 ~ContentCache(); 61 }; 62 63 /// FileIDInfo - Information about a FileID, basically just the logical file 64 /// that it represents and include stack information. A File SourceLocation 65 /// is a byte offset from the start of this. 66 /// 67 /// FileID's are used to compute the location of a character in memory as well 68 /// as the logical source location, which can be differ from the physical 69 /// location. It is different when #line's are active or when macros have 70 /// been expanded. 71 /// 72 /// Each FileID has include stack information, indicating where it came from. 73 /// For the primary translation unit, it comes from SourceLocation() aka 0. 74 /// This information encodes the #include chain that a token was instantiated 75 /// from. 76 /// 77 /// FileIDInfos contain a "InfoRec *", describing the source file, and a Chunk 78 /// number, which allows a SourceLocation to index into very large files 79 /// (those which there are not enough FilePosBits to address). 80 /// 81 struct FileIDInfo { 82 private: 83 /// IncludeLoc - The location of the #include that brought in this file. 84 /// This SourceLocation object has an invalid SLOC for the main file. 85 SourceLocation IncludeLoc; 86 87 /// ChunkNo - Really large buffers are broken up into chunks that are 88 /// each (1 << SourceLocation::FilePosBits) in size. This specifies the 89 /// chunk number of this FileID. 90 unsigned ChunkNo; 91 92 /// Content - Information about the source buffer itself. 93 const ContentCache* Content; 94 95 public: 96 /// get - Return a FileIDInfo object. 97 static FileIDInfo get(SourceLocation IL, unsigned CN, 98 const ContentCache *Con) { 99 FileIDInfo X; 100 X.IncludeLoc = IL; 101 X.ChunkNo = CN; 102 X.Content = Con; 103 return X; 104 } 105 106 SourceLocation getIncludeLoc() const { return IncludeLoc; } 107 unsigned getChunkNo() const { return ChunkNo; } 108 const ContentCache* getContentCache() const { return Content; } 109 }; 110 111 /// MacroIDInfo - Macro SourceLocations refer to these records by their ID. 112 /// Each MacroIDInfo encodes the Instantiation location - where the macro was 113 /// instantiated, and the PhysicalLoc - where the actual character data for 114 /// the token came from. An actual macro SourceLocation stores deltas from 115 /// these positions. 116 class MacroIDInfo { 117 SourceLocation InstantiationLoc, PhysicalLoc; 118 public: 119 SourceLocation getInstantiationLoc() const { return InstantiationLoc; } 120 SourceLocation getPhysicalLoc() const { return PhysicalLoc; } 121 122 /// get - Return a MacroID for a macro expansion. IL specifies 123 /// the instantiation location, and PL specifies the physical location 124 /// (where the characters from the token come from). Both IL and PL refer 125 /// to normal File SLocs. 126 static MacroIDInfo get(SourceLocation IL, SourceLocation PL) { 127 MacroIDInfo X; 128 X.InstantiationLoc = IL; 129 X.PhysicalLoc = PL; 130 return X; 131 } 132 }; 133} // end SrcMgr namespace. 134} // end clang namespace 135 136namespace std { 137template <> struct less<clang::SrcMgr::ContentCache> { 138 inline bool operator()(const clang::SrcMgr::ContentCache& L, 139 const clang::SrcMgr::ContentCache& R) const { 140 return L.Entry < R.Entry; 141 } 142}; 143} // end std namespace 144 145namespace clang { 146 147/// SourceManager - This file handles loading and caching of source files into 148/// memory. This object owns the MemoryBuffer objects for all of the loaded 149/// files and assigns unique FileID's for each unique #include chain. 150/// 151/// The SourceManager can be queried for information about SourceLocation 152/// objects, turning them into either physical or logical locations. Physical 153/// locations represent where the bytes corresponding to a token came from and 154/// logical locations represent where the location is in the user's view. In 155/// the case of a macro expansion, for example, the physical location indicates 156/// where the expanded token came from and the logical location specifies where 157/// it was expanded. Logical locations are also influenced by #line directives, 158/// etc. 159class SourceManager { 160 /// FileInfos - Memoized information about all of the files tracked by this 161 /// SourceManager. 162 std::set<SrcMgr::ContentCache> FileInfos; 163 164 /// MemBufferInfos - Information about various memory buffers that we have 165 /// read in. This is a list, instead of a vector, because we need pointers to 166 /// the FileInfo objects to be stable. 167 std::list<SrcMgr::ContentCache> MemBufferInfos; 168 169 /// FileIDs - Information about each FileID. FileID #0 is not valid, so all 170 /// entries are off by one. 171 std::vector<SrcMgr::FileIDInfo> FileIDs; 172 173 /// MacroIDs - Information about each MacroID. 174 std::vector<SrcMgr::MacroIDInfo> MacroIDs; 175 176 /// LastLineNo - These ivars serve as a cache used in the getLineNumber 177 /// method which is used to speedup getLineNumber calls to nearby locations. 178 unsigned LastLineNoFileIDQuery; 179 SrcMgr::ContentCache *LastLineNoContentCache; 180 unsigned LastLineNoFilePos; 181 unsigned LastLineNoResult; 182 183public: 184 SourceManager() : LastLineNoFileIDQuery(~0U) {} 185 ~SourceManager() {} 186 187 void clearIDTables() { 188 FileIDs.clear(); 189 MacroIDs.clear(); 190 LastLineNoFileIDQuery = ~0U; 191 LastLineNoContentCache = 0; 192 } 193 194 /// createFileID - Create a new FileID that represents the specified file 195 /// being #included from the specified IncludePosition. This returns 0 on 196 /// error and translates NULL into standard input. 197 unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){ 198 const SrcMgr::ContentCache *IR = getContentCache(SourceFile); 199 if (IR == 0) return 0; // Error opening file? 200 return createFileID(IR, IncludePos); 201 } 202 203 /// createFileIDForMemBuffer - Create a new FileID that represents the 204 /// specified memory buffer. This does no caching of the buffer and takes 205 /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once. 206 unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 207 return createFileID(createMemBufferContentCache(Buffer), SourceLocation()); 208 } 209 210 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact 211 /// that a token at Loc should actually be referenced from InstantiationLoc. 212 SourceLocation getInstantiationLoc(SourceLocation Loc, 213 SourceLocation InstantiationLoc); 214 215 /// getBuffer - Return the buffer for the specified FileID. 216 /// 217 const llvm::MemoryBuffer *getBuffer(unsigned FileID) const { 218 return getContentCache(FileID)->Buffer; 219 } 220 221 /// getBufferData - Return a pointer to the start and end of the character 222 /// data for the specified FileID. 223 std::pair<const char*, const char*> getBufferData(unsigned FileID) const; 224 225 /// getIncludeLoc - Return the location of the #include for the specified 226 /// SourceLocation. If this is a macro expansion, this transparently figures 227 /// out which file includes the file being expanded into. 228 SourceLocation getIncludeLoc(SourceLocation ID) const { 229 return getFIDInfo(getLogicalLoc(ID).getFileID())->getIncludeLoc(); 230 } 231 232 /// getCharacterData - Return a pointer to the start of the specified location 233 /// in the appropriate MemoryBuffer. 234 const char *getCharacterData(SourceLocation SL) const; 235 236 /// getColumnNumber - Return the column # for the specified file position. 237 /// This is significantly cheaper to compute than the line number. This 238 /// returns zero if the column number isn't known. This may only be called on 239 /// a file sloc, so you must choose a physical or logical location before 240 /// calling this method. 241 unsigned getColumnNumber(SourceLocation Loc) const; 242 243 unsigned getPhysicalColumnNumber(SourceLocation Loc) const { 244 return getColumnNumber(getPhysicalLoc(Loc)); 245 } 246 unsigned getLogicalColumnNumber(SourceLocation Loc) const { 247 return getColumnNumber(getLogicalLoc(Loc)); 248 } 249 250 251 /// getLineNumber - Given a SourceLocation, return the physical line number 252 /// for the position indicated. This requires building and caching a table of 253 /// line offsets for the MemoryBuffer, so this is not cheap: use only when 254 /// about to emit a diagnostic. 255 unsigned getLineNumber(SourceLocation Loc); 256 257 unsigned getLogicalLineNumber(SourceLocation Loc) { 258 return getLineNumber(getLogicalLoc(Loc)); 259 } 260 unsigned getPhysicalLineNumber(SourceLocation Loc) { 261 return getLineNumber(getPhysicalLoc(Loc)); 262 } 263 264 /// getSourceName - This method returns the name of the file or buffer that 265 /// the SourceLocation specifies. This can be modified with #line directives, 266 /// etc. 267 const char *getSourceName(SourceLocation Loc) const; 268 269 /// Given a SourceLocation object, return the logical location referenced by 270 /// the ID. This logical location is subject to #line directives, etc. 271 SourceLocation getLogicalLoc(SourceLocation Loc) const { 272 // File locations are both physical and logical. 273 if (Loc.isFileID()) return Loc; 274 275 SourceLocation ILoc = MacroIDs[Loc.getMacroID()].getInstantiationLoc(); 276 return ILoc.getFileLocWithOffset(Loc.getMacroLogOffs()); 277 } 278 279 /// getPhysicalLoc - Given a SourceLocation object, return the physical 280 /// location referenced by the ID. 281 SourceLocation getPhysicalLoc(SourceLocation Loc) const { 282 // File locations are both physical and logical. 283 if (Loc.isFileID()) return Loc; 284 285 SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getPhysicalLoc(); 286 return PLoc.getFileLocWithOffset(Loc.getMacroPhysOffs()); 287 } 288 289 /// getContentCacheForLoc - Return the ContentCache for the physloc of the 290 /// specified SourceLocation, if one exists. 291 const SrcMgr::ContentCache* getContentCacheForLoc(SourceLocation Loc) const { 292 Loc = getPhysicalLoc(Loc); 293 unsigned FileID = Loc.getFileID(); 294 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 295 return FileIDs[FileID-1].getContentCache(); 296 } 297 298 /// getFileEntryForLoc - Return the FileEntry record for the physloc of the 299 /// specified SourceLocation, if one exists. 300 const FileEntry* getFileEntryForLoc(SourceLocation Loc) const { 301 return getContentCacheForLoc(Loc)->Entry; 302 } 303 304 /// getDecomposedFileLoc - Decompose the specified file location into a raw 305 /// FileID + Offset pair. The first element is the FileID, the second is the 306 /// offset from the start of the buffer of the location. 307 std::pair<unsigned, unsigned> getDecomposedFileLoc(SourceLocation Loc) const { 308 assert(Loc.isFileID() && "Isn't a File SourceLocation"); 309 310 // TODO: Add a flag "is first chunk" to SLOC. 311 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); 312 313 // If this file has been split up into chunks, factor in the chunk number 314 // that the FileID references. 315 unsigned ChunkNo = FIDInfo->getChunkNo(); 316 unsigned Offset = Loc.getRawFilePos(); 317 Offset += (ChunkNo << SourceLocation::FilePosBits); 318 319 return std::pair<unsigned,unsigned>(Loc.getFileID()-ChunkNo, Offset); 320 } 321 322 /// PrintStats - Print statistics to stderr. 323 /// 324 void PrintStats() const; 325 326private: 327 /// createFileID - Create a new fileID for the specified ContentCache and 328 /// include position. This works regardless of whether the ContentCache 329 /// corresponds to a file or some other input source. 330 unsigned createFileID(const SrcMgr::ContentCache* File, 331 SourceLocation IncludePos); 332 333 /// getContentCache - Create or return a cached ContentCache for the specified 334 /// file. This returns null on failure. 335 const SrcMgr::ContentCache* getContentCache(const FileEntry* SourceFile); 336 337 /// createMemBufferContentCache - Create a new ContentCache for the specified 338 /// memory buffer. 339 const SrcMgr::ContentCache* 340 createMemBufferContentCache(const llvm::MemoryBuffer* Buf); 341 342 const SrcMgr::FileIDInfo* getFIDInfo(unsigned FileID) const { 343 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 344 return &FileIDs[FileID-1]; 345 } 346 347 const SrcMgr::ContentCache *getContentCache(unsigned FileID) const { 348 return getContentCache(getFIDInfo(FileID)); 349 } 350 351 /// Return the ContentCache structure for the specified FileID. 352 /// This is always the physical reference for the ID. 353 const SrcMgr::ContentCache* 354 getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const { 355 return FIDInfo->getContentCache(); 356 } 357 358 /// getFullFilePos - This (efficient) method returns the offset from the start 359 /// of the file that the specified physical SourceLocation represents. This 360 /// returns the location of the physical character data, not the logical file 361 /// position. 362 unsigned getFullFilePos(SourceLocation PhysLoc) const { 363 return getDecomposedFileLoc(PhysLoc).second; 364 } 365}; 366 367 368} // end namespace clang 369 370#endif 371