SourceManager.h revision 3457e8cbaa8a6fec5d69173450655fe0bc38634b
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_CLANG_SOURCEMANAGER_H 15#define LLVM_CLANG_SOURCEMANAGER_H 16 17#include "clang/Basic/SourceLocation.h" 18#include <vector> 19#include <map> 20#include <list> 21#include <cassert> 22 23namespace llvm { 24class MemoryBuffer; 25} 26 27namespace clang { 28 29class SourceManager; 30class FileEntry; 31class IdentifierTokenInfo; 32 33/// SrcMgr - Private classes that are part of the SourceManager implementation. 34/// 35namespace SrcMgr { 36 /// FileInfo - Once instance of this struct is kept for every file loaded or 37 /// used. This object owns the MemoryBuffer object. 38 struct FileInfo { 39 /// Buffer - The actual buffer containing the characters from the input 40 /// file. 41 const llvm::MemoryBuffer *Buffer; 42 43 /// SourceLineCache - A new[]'d array of offsets for each source line. This 44 /// is lazily computed. 45 /// 46 unsigned *SourceLineCache; 47 48 /// NumLines - The number of lines in this FileInfo. This is only valid if 49 /// SourceLineCache is non-null. 50 unsigned NumLines; 51 }; 52 53 typedef std::pair<const FileEntry * const, FileInfo> InfoRec; 54 55 /// FileIDInfo - Information about a FileID, basically just the logical file 56 /// that it represents and include stack information. A File SourceLocation 57 /// is a byte offset from the start of this. 58 /// 59 /// FileID's are used to compute the location of a character in memory as well 60 /// as the logical source location, which can be differ from the physical 61 /// location. It is different when #line's are active or when macros have 62 /// been expanded. 63 /// 64 /// Each FileID has include stack information, indicating where it came from. 65 /// For the primary translation unit, it comes from SourceLocation() aka 0. 66 /// This information encodes the #include chain that a token was instantiated 67 /// from. 68 /// 69 /// FileIDInfos contain a "InfoRec *", describing the source file, and a Chunk 70 /// number, which allows a SourceLocation to index into very large files 71 /// (those which there are not enough FilePosBits to address). 72 /// 73 struct FileIDInfo { 74 private: 75 /// IncludeLoc - The location of the #include that brought in this file. 76 /// This SourceLocation object has an invalid SLOC for the main file. 77 SourceLocation IncludeLoc; 78 79 /// ChunkNo - Really large buffers are broken up into chunks that are 80 /// each (1 << SourceLocation::FilePosBits) in size. This specifies the 81 /// chunk number of this FileID. 82 unsigned ChunkNo; 83 84 /// FileInfo - Information about the source buffer itself. 85 /// 86 const InfoRec *Info; 87 public: 88 89 /// get - Return a FileIDInfo object. 90 static FileIDInfo get(SourceLocation IL, unsigned CN, const InfoRec *Inf) { 91 FileIDInfo X; 92 X.IncludeLoc = IL; 93 X.ChunkNo = CN; 94 X.Info = Inf; 95 return X; 96 } 97 98 SourceLocation getIncludeLoc() const { return IncludeLoc; } 99 unsigned getChunkNo() const { return ChunkNo; } 100 const InfoRec *getInfo() const { return Info; } 101 }; 102 103 /// MacroIDInfo - Macro SourceLocations refer to these records by their ID. 104 /// Each MacroIDInfo encodes the Instantiation location - where the macro was 105 /// instantiated, and the PhysicalLoc - where the actual character data for 106 /// the token came from. An actual macro SourceLocation stores deltas from 107 /// these positions. 108 class MacroIDInfo { 109 SourceLocation InstantiationLoc, PhysicalLoc; 110 public: 111 SourceLocation getInstantiationLoc() const { return InstantiationLoc; } 112 SourceLocation getPhysicalLoc() const { return PhysicalLoc; } 113 114 /// get - Return a MacroID for a macro expansion. IL specifies 115 /// the instantiation location, and PL specifies the physical location 116 /// (where the characters from the token come from). Both IL and PL refer 117 /// to normal File SLocs. 118 static MacroIDInfo get(SourceLocation IL, SourceLocation PL) { 119 MacroIDInfo X; 120 X.InstantiationLoc = IL; 121 X.PhysicalLoc = PL; 122 return X; 123 } 124 }; 125} // end SrcMgr namespace. 126 127 128/// SourceManager - This file handles loading and caching of source files into 129/// memory. This object owns the MemoryBuffer objects for all of the loaded 130/// files and assigns unique FileID's for each unique #include chain. 131/// 132/// The SourceManager can be queried for information about SourceLocation 133/// objects, turning them into either physical or logical locations. Physical 134/// locations represent where the bytes corresponding to a token came from and 135/// logical locations represent where the location is in the user's view. In 136/// the case of a macro expansion, for example, the physical location indicates 137/// where the expanded token came from and the logical location specifies where 138/// it was expanded. Logical locations are also influenced by #line directives, 139/// etc. 140class SourceManager { 141 /// FileInfos - Memoized information about all of the files tracked by this 142 /// SourceManager. 143 std::map<const FileEntry *, SrcMgr::FileInfo> FileInfos; 144 145 /// MemBufferInfos - Information about various memory buffers that we have 146 /// read in. This is a list, instead of a vector, because we need pointers to 147 /// the FileInfo objects to be stable. 148 std::list<SrcMgr::InfoRec> MemBufferInfos; 149 150 /// FileIDs - Information about each FileID. FileID #0 is not valid, so all 151 /// entries are off by one. 152 std::vector<SrcMgr::FileIDInfo> FileIDs; 153 154 /// MacroIDs - Information about each MacroID. 155 std::vector<SrcMgr::MacroIDInfo> MacroIDs; 156 157 /// LastLineNo - These ivars serve as a cache used in the getLineNumber 158 /// method which is used to speedup getLineNumber calls to nearby locations. 159 unsigned LastLineNoFileIDQuery; 160 SrcMgr::FileInfo *LastLineNoFileInfo; 161 unsigned LastLineNoFilePos; 162 unsigned LastLineNoResult; 163public: 164 SourceManager() : LastLineNoFileIDQuery(~0U) {} 165 ~SourceManager(); 166 167 void clearIDTables() { 168 FileIDs.clear(); 169 MacroIDs.clear(); 170 LastLineNoFileIDQuery = ~0U; 171 LastLineNoFileInfo = 0; 172 } 173 174 /// createFileID - Create a new FileID that represents the specified file 175 /// being #included from the specified IncludePosition. This returns 0 on 176 /// error and translates NULL into standard input. 177 unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){ 178 const SrcMgr::InfoRec *IR = getInfoRec(SourceFile); 179 if (IR == 0) return 0; // Error opening file? 180 return createFileID(IR, IncludePos); 181 } 182 183 /// createFileIDForMemBuffer - Create a new FileID that represents the 184 /// specified memory buffer. This does no caching of the buffer and takes 185 /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once. 186 unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 187 return createFileID(createMemBufferInfoRec(Buffer), SourceLocation()); 188 } 189 190 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact 191 /// that a token at Loc should actually be referenced from InstantiationLoc. 192 SourceLocation getInstantiationLoc(SourceLocation Loc, 193 SourceLocation InstantiationLoc); 194 195 /// getBuffer - Return the buffer for the specified FileID. 196 /// 197 const llvm::MemoryBuffer *getBuffer(unsigned FileID) const { 198 return getFileInfo(FileID)->Buffer; 199 } 200 201 /// getBufferData - Return a pointer to the start and end of the character 202 /// data for the specified FileID. 203 std::pair<const char*, const char*> getBufferData(unsigned FileID) const; 204 205 /// getIncludeLoc - Return the location of the #include for the specified 206 /// SourceLocation. If this is a macro expansion, this transparently figures 207 /// out which file includes the file being expanded into. 208 SourceLocation getIncludeLoc(SourceLocation ID) const { 209 return getFIDInfo(getLogicalLoc(ID).getFileID())->getIncludeLoc(); 210 } 211 212 /// getCharacterData - Return a pointer to the start of the specified location 213 /// in the appropriate MemoryBuffer. 214 const char *getCharacterData(SourceLocation SL) const; 215 216 /// getColumnNumber - Return the column # for the specified file position. 217 /// This is significantly cheaper to compute than the line number. This 218 /// returns zero if the column number isn't known. This may only be called on 219 /// a file sloc, so you must choose a physical or logical location before 220 /// calling this method. 221 unsigned getColumnNumber(SourceLocation Loc) const; 222 223 unsigned getPhysicalColumnNumber(SourceLocation Loc) const { 224 return getColumnNumber(getPhysicalLoc(Loc)); 225 } 226 unsigned getLogicalColumnNumber(SourceLocation Loc) const { 227 return getColumnNumber(getLogicalLoc(Loc)); 228 } 229 230 231 /// getLineNumber - Given a SourceLocation, return the physical line number 232 /// for the position indicated. This requires building and caching a table of 233 /// line offsets for the MemoryBuffer, so this is not cheap: use only when 234 /// about to emit a diagnostic. 235 unsigned getLineNumber(SourceLocation Loc); 236 237 unsigned getLogicalLineNumber(SourceLocation Loc) { 238 return getLineNumber(getLogicalLoc(Loc)); 239 } 240 unsigned getPhysicalLineNumber(SourceLocation Loc) { 241 return getLineNumber(getPhysicalLoc(Loc)); 242 } 243 244 /// getSourceName - This method returns the name of the file or buffer that 245 /// the SourceLocation specifies. This can be modified with #line directives, 246 /// etc. 247 const char *getSourceName(SourceLocation Loc) const; 248 249 /// Given a SourceLocation object, return the logical location referenced by 250 /// the ID. This logical location is subject to #line directives, etc. 251 SourceLocation getLogicalLoc(SourceLocation Loc) const { 252 // File locations are both physical and logical. 253 if (Loc.isFileID()) return Loc; 254 255 SourceLocation ILoc = MacroIDs[Loc.getMacroID()].getInstantiationLoc(); 256 return ILoc.getFileLocWithOffset(Loc.getMacroLogOffs()); 257 } 258 259 /// getPhysicalLoc - Given a SourceLocation object, return the physical 260 /// location referenced by the ID. 261 SourceLocation getPhysicalLoc(SourceLocation Loc) const { 262 // File locations are both physical and logical. 263 if (Loc.isFileID()) return Loc; 264 265 SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getPhysicalLoc(); 266 return PLoc.getFileLocWithOffset(Loc.getMacroPhysOffs()); 267 } 268 269 /// getFileEntryForLoc - Return the FileEntry record for the physloc of the 270 /// specified SourceLocation, if one exists. 271 const FileEntry *getFileEntryForLoc(SourceLocation Loc) const { 272 Loc = getPhysicalLoc(Loc); 273 unsigned FileID = Loc.getFileID(); 274 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 275 return FileIDs[FileID-1].getInfo()->first; 276 } 277 278 /// getDecomposedFileLoc - Decompose the specified file location into a raw 279 /// FileID + Offset pair. The first element is the FileID, the second is the 280 /// offset from the start of the buffer of the location. 281 std::pair<unsigned, unsigned> getDecomposedFileLoc(SourceLocation Loc) const { 282 assert(Loc.isFileID() && "Isn't a File SourceLocation"); 283 284 // TODO: Add a flag "is first chunk" to SLOC. 285 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); 286 287 // If this file has been split up into chunks, factor in the chunk number 288 // that the FileID references. 289 unsigned ChunkNo = FIDInfo->getChunkNo(); 290 unsigned Offset = Loc.getRawFilePos(); 291 Offset += (ChunkNo << SourceLocation::FilePosBits); 292 293 return std::pair<unsigned,unsigned>(Loc.getFileID()-ChunkNo, Offset); 294 } 295 296 /// PrintStats - Print statistics to stderr. 297 /// 298 void PrintStats() const; 299private: 300 /// createFileID - Create a new fileID for the specified InfoRec and include 301 /// position. This works regardless of whether the InfoRec corresponds to a 302 /// file or some other input source. 303 unsigned createFileID(const SrcMgr::InfoRec *File, SourceLocation IncludePos); 304 305 /// getInfoRec - Create or return a cached FileInfo for the specified file. 306 /// This returns null on failure. 307 const SrcMgr::InfoRec *getInfoRec(const FileEntry *SourceFile); 308 309 /// createMemBufferInfoRec - Create a new info record for the specified memory 310 /// buffer. This does no caching. 311 const SrcMgr::InfoRec *createMemBufferInfoRec(const llvm::MemoryBuffer *Buf); 312 313 const SrcMgr::FileIDInfo *getFIDInfo(unsigned FileID) const { 314 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 315 return &FileIDs[FileID-1]; 316 } 317 318 const SrcMgr::InfoRec *getInfoRec(unsigned FileID) const { 319 return getInfoRec(getFIDInfo(FileID)); 320 } 321 322 SrcMgr::FileInfo *getFileInfo(unsigned FileID) const { 323 if (const SrcMgr::InfoRec *IR = getInfoRec(FileID)) 324 return const_cast<SrcMgr::FileInfo *>(&IR->second); 325 return 0; 326 } 327 328 /// Return the InfoRec structure for the specified FileID. This is always the 329 /// physical reference for the ID. 330 const SrcMgr::InfoRec *getInfoRec(const SrcMgr::FileIDInfo *FIDInfo) const { 331 return FIDInfo->getInfo(); 332 } 333 334 335 /// getFullFilePos - This (efficient) method returns the offset from the start 336 /// of the file that the specified physical SourceLocation represents. This 337 /// returns the location of the physical character data, not the logical file 338 /// position. 339 unsigned getFullFilePos(SourceLocation PhysLoc) const { 340 return getDecomposedFileLoc(PhysLoc).second; 341 } 342}; 343 344 345} // end namespace clang 346 347#endif 348