SourceManager.h revision 5f016e2cb5d11daeb237544de1c5d59f20fe1a6e
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_CLANG_SOURCEMANAGER_H 15#define LLVM_CLANG_SOURCEMANAGER_H 16 17#include "clang/Basic/SourceLocation.h" 18#include <vector> 19#include <map> 20#include <list> 21 22namespace llvm { 23class MemoryBuffer; 24} 25 26namespace clang { 27 28class SourceManager; 29class FileEntry; 30class IdentifierTokenInfo; 31 32/// SrcMgr - Private classes that are part of the SourceManager implementation. 33/// 34namespace SrcMgr { 35 /// FileInfo - Once instance of this struct is kept for every file loaded or 36 /// used. This object owns the MemoryBuffer object. 37 struct FileInfo { 38 /// Buffer - The actual buffer containing the characters from the input 39 /// file. 40 const llvm::MemoryBuffer *Buffer; 41 42 /// SourceLineCache - A new[]'d array of offsets for each source line. This 43 /// is lazily computed. 44 /// 45 unsigned *SourceLineCache; 46 47 /// NumLines - The number of lines in this FileInfo. This is only valid if 48 /// SourceLineCache is non-null. 49 unsigned NumLines; 50 }; 51 52 typedef std::pair<const FileEntry * const, FileInfo> InfoRec; 53 54 /// FileIDInfo - Information about a FileID, basically just the logical file 55 /// that it represents and include stack information. A SourceLocation is a 56 /// byte offset from the start of this. 57 /// 58 /// FileID's are used to compute the location of a character in memory as well 59 /// as the logical source location, which can be differ from the physical 60 /// location. It is different when #line's are active or when macros have 61 /// been expanded. 62 /// 63 /// Each FileID has include stack information, indicating where it came from. 64 /// For the primary translation unit, it comes from SourceLocation() aka 0. 65 /// 66 /// There are three types of FileID's: 67 /// 1. Normal MemoryBuffer (file). These are represented by a "InfoRec *", 68 /// describing the source file, and a Chunk number, which factors into 69 /// the SourceLocation's offset from the start of the buffer. 70 /// 2. Macro Expansions. These indicate that the logical location is 71 /// totally different than the physical location. The logical source 72 /// location is specified by the IncludeLoc. The physical location is 73 /// the FilePos of the token's SourceLocation combined with the FileID 74 /// from MacroTokenFileID. 75 /// 76 struct FileIDInfo { 77 enum FileIDType { 78 NormalBuffer, 79 MacroExpansion 80 }; 81 82 /// The type of this FileID. 83 FileIDType IDType; 84 85 /// IncludeLoc - The location of the #include that brought in this file. 86 /// This SourceLocation object has a FileId of 0 for the main file. 87 SourceLocation IncludeLoc; 88 89 /// This union is discriminated by IDType. 90 /// 91 union { 92 struct NormalBufferInfo { 93 /// ChunkNo - Really large buffers are broken up into chunks that are 94 /// each (1 << SourceLocation::FilePosBits) in size. This specifies the 95 /// chunk number of this FileID. 96 unsigned ChunkNo; 97 98 /// FileInfo - Information about the source buffer itself. 99 /// 100 const InfoRec *Info; 101 } NormalBuffer; 102 103 /// MacroTokenFileID - This is the File ID that contains the characters 104 /// that make up the expanded token. 105 unsigned MacroTokenFileID; 106 } u; 107 108 /// getNormalBuffer - Return a FileIDInfo object for a normal buffer 109 /// reference. 110 static FileIDInfo getNormalBuffer(SourceLocation IL, unsigned CN, 111 const InfoRec *Inf) { 112 FileIDInfo X; 113 X.IDType = NormalBuffer; 114 X.IncludeLoc = IL; 115 X.u.NormalBuffer.ChunkNo = CN; 116 X.u.NormalBuffer.Info = Inf; 117 return X; 118 } 119 120 /// getMacroExpansion - Return a FileID for a macro expansion. IL specifies 121 /// the instantiation location, and MacroFID specifies the FileID that the 122 /// token's characters come from. 123 static FileIDInfo getMacroExpansion(SourceLocation IL, 124 unsigned MacroFID) { 125 FileIDInfo X; 126 X.IDType = MacroExpansion; 127 X.IncludeLoc = IL; 128 X.u.MacroTokenFileID = MacroFID; 129 return X; 130 } 131 132 unsigned getNormalBufferChunkNo() const { 133 assert(IDType == NormalBuffer && "Not a normal buffer!"); 134 return u.NormalBuffer.ChunkNo; 135 } 136 137 const InfoRec *getNormalBufferInfo() const { 138 assert(IDType == NormalBuffer && "Not a normal buffer!"); 139 return u.NormalBuffer.Info; 140 } 141 }; 142} // end SrcMgr namespace. 143 144 145/// SourceManager - This file handles loading and caching of source files into 146/// memory. This object owns the MemoryBuffer objects for all of the loaded 147/// files and assigns unique FileID's for each unique #include chain. 148/// 149/// The SourceManager can be queried for information about SourceLocation 150/// objects, turning them into either physical or logical locations. Physical 151/// locations represent where the bytes corresponding to a token came from and 152/// logical locations represent where the location is in the user's view. In 153/// the case of a macro expansion, for example, the physical location indicates 154/// where the expanded token came from and the logical location specifies where 155/// it was expanded. Logical locations are also influenced by #line directives, 156/// etc. 157class SourceManager { 158 /// FileInfos - Memoized information about all of the files tracked by this 159 /// SourceManager. 160 std::map<const FileEntry *, SrcMgr::FileInfo> FileInfos; 161 162 /// MemBufferInfos - Information about various memory buffers that we have 163 /// read in. This is a list, instead of a vector, because we need pointers to 164 /// the FileInfo objects to be stable. 165 std::list<SrcMgr::InfoRec> MemBufferInfos; 166 167 /// FileIDs - Information about each FileID. FileID #0 is not valid, so all 168 /// entries are off by one. 169 std::vector<SrcMgr::FileIDInfo> FileIDs; 170 171 /// LastInstantiationLoc_* - Cache the last instantiation request for fast 172 /// lookup. Macros often want many tokens instantated at the same location. 173 SourceLocation LastInstantiationLoc_InstantLoc; 174 unsigned LastInstantiationLoc_MacroFID; 175 unsigned LastInstantiationLoc_Result; 176public: 177 SourceManager() { LastInstantiationLoc_MacroFID = ~0U; } 178 ~SourceManager(); 179 180 /// createFileID - Create a new FileID that represents the specified file 181 /// being #included from the specified IncludePosition. This returns 0 on 182 /// error and translates NULL into standard input. 183 unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){ 184 const SrcMgr::InfoRec *IR = getInfoRec(SourceFile); 185 if (IR == 0) return 0; // Error opening file? 186 return createFileID(IR, IncludePos); 187 } 188 189 /// createFileIDForMemBuffer - Create a new FileID that represents the 190 /// specified memory buffer. This does no caching of the buffer and takes 191 /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once. 192 unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 193 return createFileID(createMemBufferInfoRec(Buffer), SourceLocation()); 194 } 195 196 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact 197 /// that a token from physloc PhysLoc should actually be referenced from 198 /// InstantiationLoc. 199 SourceLocation getInstantiationLoc(SourceLocation PhysLoc, 200 SourceLocation InstantiationLoc); 201 202 /// getBuffer - Return the buffer for the specified FileID. 203 /// 204 const llvm::MemoryBuffer *getBuffer(unsigned FileID) const { 205 return getFileInfo(FileID)->Buffer; 206 } 207 208 /// getIncludeLoc - Return the location of the #include for the specified 209 /// FileID. 210 SourceLocation getIncludeLoc(unsigned FileID) const; 211 212 /// getFilePos - This (efficient) method returns the offset from the start of 213 /// the file that the specified SourceLocation represents. This returns the 214 /// location of the physical character data, not the logical file position. 215 unsigned getFilePos(SourceLocation Loc) const { 216 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); 217 218 // For Macros, the physical loc is specified by the MacroTokenFileID. 219 if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) 220 FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1]; 221 222 // If this file has been split up into chunks, factor in the chunk number 223 // that the FileID references. 224 unsigned ChunkNo = FIDInfo->getNormalBufferChunkNo(); 225 return Loc.getRawFilePos() + (ChunkNo << SourceLocation::FilePosBits); 226 } 227 228 /// getCharacterData - Return a pointer to the start of the specified location 229 /// in the appropriate MemoryBuffer. 230 const char *getCharacterData(SourceLocation SL) const; 231 232 /// getColumnNumber - Return the column # for the specified include position. 233 /// this is significantly cheaper to compute than the line number. This 234 /// returns zero if the column number isn't known. 235 unsigned getColumnNumber(SourceLocation Loc) const; 236 237 /// getLineNumber - Given a SourceLocation, return the physical line number 238 /// for the position indicated. This requires building and caching a table of 239 /// line offsets for the MemoryBuffer, so this is not cheap: use only when 240 /// about to emit a diagnostic. 241 unsigned getLineNumber(SourceLocation Loc); 242 243 /// getSourceFilePos - This method returns the *logical* offset from the start 244 /// of the file that the specified SourceLocation represents. This returns 245 /// the location of the *logical* character data, not the physical file 246 /// position. In the case of macros, for example, this returns where the 247 /// macro was instantiated, not where the characters for the macro can be 248 /// found. 249 unsigned getSourceFilePos(SourceLocation Loc) const; 250 251 /// getSourceName - This method returns the name of the file or buffer that 252 /// the SourceLocation specifies. This can be modified with #line directives, 253 /// etc. 254 std::string getSourceName(SourceLocation Loc); 255 256 /// getFileEntryForFileID - Return the FileEntry record for the specified 257 /// FileID if one exists. 258 const FileEntry *getFileEntryForFileID(unsigned FileID) const { 259 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 260 return FileIDs[FileID-1].getNormalBufferInfo()->first; 261 } 262 263 /// Given a SourceLocation object, return the logical location referenced by 264 /// the ID. This logical location is subject to #line directives, etc. 265 SourceLocation getLogicalLoc(SourceLocation Loc) const { 266 if (Loc.getFileID() == 0) return Loc; 267 268 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); 269 if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) 270 return FIDInfo->IncludeLoc; 271 return Loc; 272 } 273 274 /// getPhysicalLoc - Given a SourceLocation object, return the physical 275 /// location referenced by the ID. 276 SourceLocation getPhysicalLoc(SourceLocation Loc) const { 277 if (Loc.getFileID() == 0) return Loc; 278 279 // For Macros, the physical loc is specified by the MacroTokenFileID. 280 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); 281 if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) 282 return SourceLocation(FIDInfo->u.MacroTokenFileID, 283 Loc.getRawFilePos()); 284 return Loc; 285 } 286 287 /// PrintStats - Print statistics to stderr. 288 /// 289 void PrintStats() const; 290private: 291 /// createFileID - Create a new fileID for the specified InfoRec and include 292 /// position. This works regardless of whether the InfoRec corresponds to a 293 /// file or some other input source. 294 unsigned createFileID(const SrcMgr::InfoRec *File, SourceLocation IncludePos); 295 296 /// getInfoRec - Create or return a cached FileInfo for the specified file. 297 /// This returns null on failure. 298 const SrcMgr::InfoRec *getInfoRec(const FileEntry *SourceFile); 299 300 /// createMemBufferInfoRec - Create a new info record for the specified memory 301 /// buffer. This does no caching. 302 const SrcMgr::InfoRec *createMemBufferInfoRec(const llvm::MemoryBuffer *Buf); 303 304 const SrcMgr::FileIDInfo *getFIDInfo(unsigned FileID) const { 305 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 306 return &FileIDs[FileID-1]; 307 } 308 309 /// Return the InfoRec structure for the specified FileID. This is always the 310 /// physical reference for the ID. 311 const SrcMgr::InfoRec *getInfoRec(const SrcMgr::FileIDInfo *FIDInfo) const { 312 // For Macros, the physical loc is specified by the MacroTokenFileID. 313 if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) 314 FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1]; 315 return FIDInfo->getNormalBufferInfo(); 316 } 317 const SrcMgr::InfoRec *getInfoRec(unsigned FileID) const { 318 return getInfoRec(getFIDInfo(FileID)); 319 } 320 321 SrcMgr::FileInfo *getFileInfo(const SrcMgr::FileIDInfo *FIDInfo) const { 322 if (const SrcMgr::InfoRec *IR = getInfoRec(FIDInfo)) 323 return const_cast<SrcMgr::FileInfo *>(&IR->second); 324 return 0; 325 } 326 SrcMgr::FileInfo *getFileInfo(unsigned FileID) const { 327 if (const SrcMgr::InfoRec *IR = getInfoRec(FileID)) 328 return const_cast<SrcMgr::FileInfo *>(&IR->second); 329 return 0; 330 } 331 SrcMgr::FileInfo *getFileInfo(const FileEntry *SourceFile) { 332 if (const SrcMgr::InfoRec *IR = getInfoRec(SourceFile)) 333 return const_cast<SrcMgr::FileInfo *>(&IR->second); 334 return 0; 335 } 336}; 337 338 339} // end namespace clang 340 341#endif 342