clang  3.7.0
RewriteMacros.cpp
Go to the documentation of this file.
1 //===--- RewriteMacros.cpp - Rewrite macros into their expansions ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This code rewrites macro invocations into their expansions. This gives you
11 // a macro expanded file that retains comments and #includes.
12 //
13 //===----------------------------------------------------------------------===//
14 
17 #include "clang/Lex/Preprocessor.h"
19 #include "llvm/Support/Path.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include <cstdio>
22 #include <memory>
23 
24 using namespace clang;
25 
26 /// isSameToken - Return true if the two specified tokens start have the same
27 /// content.
28 static bool isSameToken(Token &RawTok, Token &PPTok) {
29  // If two tokens have the same kind and the same identifier info, they are
30  // obviously the same.
31  if (PPTok.getKind() == RawTok.getKind() &&
32  PPTok.getIdentifierInfo() == RawTok.getIdentifierInfo())
33  return true;
34 
35  // Otherwise, if they are different but have the same identifier info, they
36  // are also considered to be the same. This allows keywords and raw lexed
37  // identifiers with the same name to be treated the same.
38  if (PPTok.getIdentifierInfo() &&
39  PPTok.getIdentifierInfo() == RawTok.getIdentifierInfo())
40  return true;
41 
42  return false;
43 }
44 
45 
46 /// GetNextRawTok - Return the next raw token in the stream, skipping over
47 /// comments if ReturnComment is false.
48 static const Token &GetNextRawTok(const std::vector<Token> &RawTokens,
49  unsigned &CurTok, bool ReturnComment) {
50  assert(CurTok < RawTokens.size() && "Overran eof!");
51 
52  // If the client doesn't want comments and we have one, skip it.
53  if (!ReturnComment && RawTokens[CurTok].is(tok::comment))
54  ++CurTok;
55 
56  return RawTokens[CurTok++];
57 }
58 
59 
60 /// LexRawTokensFromMainFile - Lets all the raw tokens from the main file into
61 /// the specified vector.
63  std::vector<Token> &RawTokens) {
65 
66  // Create a lexer to lex all the tokens of the main file in raw mode. Even
67  // though it is in raw mode, it will not return comments.
68  const llvm::MemoryBuffer *FromFile = SM.getBuffer(SM.getMainFileID());
69  Lexer RawLex(SM.getMainFileID(), FromFile, SM, PP.getLangOpts());
70 
71  // Switch on comment lexing because we really do want them.
72  RawLex.SetCommentRetentionState(true);
73 
74  Token RawTok;
75  do {
76  RawLex.LexFromRawLexer(RawTok);
77 
78  // If we have an identifier with no identifier info for our raw token, look
79  // up the indentifier info. This is important for equality comparison of
80  // identifier tokens.
81  if (RawTok.is(tok::raw_identifier))
82  PP.LookUpIdentifierInfo(RawTok);
83 
84  RawTokens.push_back(RawTok);
85  } while (RawTok.isNot(tok::eof));
86 }
87 
88 
89 /// RewriteMacrosInInput - Implement -rewrite-macros mode.
90 void clang::RewriteMacrosInInput(Preprocessor &PP, raw_ostream *OS) {
92 
93  Rewriter Rewrite;
94  Rewrite.setSourceMgr(SM, PP.getLangOpts());
95  RewriteBuffer &RB = Rewrite.getEditBuffer(SM.getMainFileID());
96 
97  std::vector<Token> RawTokens;
98  LexRawTokensFromMainFile(PP, RawTokens);
99  unsigned CurRawTok = 0;
100  Token RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
101 
102 
103  // Get the first preprocessing token.
104  PP.EnterMainSourceFile();
105  Token PPTok;
106  PP.Lex(PPTok);
107 
108  // Preprocess the input file in parallel with raw lexing the main file. Ignore
109  // all tokens that are preprocessed from a file other than the main file (e.g.
110  // a header). If we see tokens that are in the preprocessed file but not the
111  // lexed file, we have a macro expansion. If we see tokens in the lexed file
112  // that aren't in the preprocessed view, we have macros that expand to no
113  // tokens, or macro arguments etc.
114  while (RawTok.isNot(tok::eof) || PPTok.isNot(tok::eof)) {
115  SourceLocation PPLoc = SM.getExpansionLoc(PPTok.getLocation());
116 
117  // If PPTok is from a different source file, ignore it.
118  if (!SM.isWrittenInMainFile(PPLoc)) {
119  PP.Lex(PPTok);
120  continue;
121  }
122 
123  // If the raw file hits a preprocessor directive, they will be extra tokens
124  // in the raw file that don't exist in the preprocsesed file. However, we
125  // choose to preserve them in the output file and otherwise handle them
126  // specially.
127  if (RawTok.is(tok::hash) && RawTok.isAtStartOfLine()) {
128  // If this is a #warning directive or #pragma mark (GNU extensions),
129  // comment the line out.
130  if (RawTokens[CurRawTok].is(tok::identifier)) {
131  const IdentifierInfo *II = RawTokens[CurRawTok].getIdentifierInfo();
132  if (II->getName() == "warning") {
133  // Comment out #warning.
134  RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//");
135  } else if (II->getName() == "pragma" &&
136  RawTokens[CurRawTok+1].is(tok::identifier) &&
137  (RawTokens[CurRawTok+1].getIdentifierInfo()->getName() ==
138  "mark")) {
139  // Comment out #pragma mark.
140  RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//");
141  }
142  }
143 
144  // Otherwise, if this is a #include or some other directive, just leave it
145  // in the file by skipping over the line.
146  RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
147  while (!RawTok.isAtStartOfLine() && RawTok.isNot(tok::eof))
148  RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
149  continue;
150  }
151 
152  // Okay, both tokens are from the same file. Get their offsets from the
153  // start of the file.
154  unsigned PPOffs = SM.getFileOffset(PPLoc);
155  unsigned RawOffs = SM.getFileOffset(RawTok.getLocation());
156 
157  // If the offsets are the same and the token kind is the same, ignore them.
158  if (PPOffs == RawOffs && isSameToken(RawTok, PPTok)) {
159  RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
160  PP.Lex(PPTok);
161  continue;
162  }
163 
164  // If the PP token is farther along than the raw token, something was
165  // deleted. Comment out the raw token.
166  if (RawOffs <= PPOffs) {
167  // Comment out a whole run of tokens instead of bracketing each one with
168  // comments. Add a leading space if RawTok didn't have one.
169  bool HasSpace = RawTok.hasLeadingSpace();
170  RB.InsertTextAfter(RawOffs, &" /*"[HasSpace]);
171  unsigned EndPos;
172 
173  do {
174  EndPos = RawOffs+RawTok.getLength();
175 
176  RawTok = GetNextRawTok(RawTokens, CurRawTok, true);
177  RawOffs = SM.getFileOffset(RawTok.getLocation());
178 
179  if (RawTok.is(tok::comment)) {
180  // Skip past the comment.
181  RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
182  break;
183  }
184 
185  } while (RawOffs <= PPOffs && !RawTok.isAtStartOfLine() &&
186  (PPOffs != RawOffs || !isSameToken(RawTok, PPTok)));
187 
188  RB.InsertTextBefore(EndPos, "*/");
189  continue;
190  }
191 
192  // Otherwise, there was a replacement an expansion. Insert the new token
193  // in the output buffer. Insert the whole run of new tokens at once to get
194  // them in the right order.
195  unsigned InsertPos = PPOffs;
196  std::string Expansion;
197  while (PPOffs < RawOffs) {
198  Expansion += ' ' + PP.getSpelling(PPTok);
199  PP.Lex(PPTok);
200  PPLoc = SM.getExpansionLoc(PPTok.getLocation());
201  PPOffs = SM.getFileOffset(PPLoc);
202  }
203  Expansion += ' ';
204  RB.InsertTextBefore(InsertPos, Expansion);
205  }
206 
207  // Get the buffer corresponding to MainFileID. If we haven't changed it, then
208  // we are done.
209  if (const RewriteBuffer *RewriteBuf =
210  Rewrite.getRewriteBufferFor(SM.getMainFileID())) {
211  //printf("Changed:\n");
212  *OS << std::string(RewriteBuf->begin(), RewriteBuf->end());
213  } else {
214  fprintf(stderr, "No changes\n");
215  }
216  OS->flush();
217 }
bool isAtStartOfLine() const
Definition: Token.h:261
SourceManager & getSourceManager() const
Definition: Preprocessor.h:682
Defines the SourceManager interface.
const RewriteBuffer * getRewriteBufferFor(FileID FID) const
Definition: Rewriter.h:170
llvm::MemoryBuffer * getBuffer(FileID FID, SourceLocation Loc, bool *Invalid=nullptr) const
Return the buffer for the specified FileID.
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:265
RewriteBuffer & getEditBuffer(FileID FID)
Definition: Rewriter.cpp:225
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
static bool isSameToken(Token &RawTok, Token &PPTok)
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:679
void RewriteMacrosInInput(Preprocessor &PP, raw_ostream *OS)
RewriteMacrosInInput - Implement -rewrite-macros mode.
tok::TokenKind getKind() const
Definition: Token.h:90
SourceManager & SM
StringRef getName() const
Return the actual identifier string.
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc...
Defines the clang::Preprocessor interface.
void InsertTextAfter(unsigned OrigOffset, StringRef Str)
Definition: RewriteBuffer.h:80
bool isWrittenInMainFile(SourceLocation Loc) const
Returns true if the spelling location for the given location is in the main file buffer.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:124
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
static void LexRawTokensFromMainFile(Preprocessor &PP, std::vector< Token > &RawTokens)
void setSourceMgr(SourceManager &SM, const LangOptions &LO)
Definition: Rewriter.h:60
Encodes a location in the source. The SourceManager can decode this to get at the full include stack...
void Lex(Token &Result)
Lex the next token for this preprocessor.
FileID getMainFileID() const
Returns the FileID of the main source file.
bool is(tok::TokenKind K) const
Definition: Token.h:95
void InsertTextBefore(unsigned OrigOffset, StringRef Str)
Definition: RewriteBuffer.h:73
static const Token & GetNextRawTok(const std::vector< Token > &RawTokens, unsigned &CurTok, bool ReturnComment)
void SetCommentRetentionState(bool Mode)
Definition: Lexer.h:187
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
unsigned getLength() const
Definition: Token.h:127
unsigned getFileOffset(SourceLocation SpellingLoc) const
Returns the offset from the start of the file that the specified SourceLocation represents.
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID...
This class handles loading and caching of source files into memory.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:96
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177