clang  3.7.0
TokenLexer.cpp
Go to the documentation of this file.
1 //===--- TokenLexer.cpp - Lex from a token stream -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the TokenLexer interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Lex/TokenLexer.h"
17 #include "clang/Lex/MacroArgs.h"
18 #include "clang/Lex/MacroInfo.h"
19 #include "clang/Lex/Preprocessor.h"
20 #include "llvm/ADT/SmallString.h"
21 using namespace clang;
22 
23 
24 /// Create a TokenLexer for the specified macro with the specified actual
25 /// arguments. Note that this ctor takes ownership of the ActualArgs pointer.
27  MacroArgs *Actuals) {
28  // If the client is reusing a TokenLexer, make sure to free any memory
29  // associated with it.
30  destroy();
31 
32  Macro = MI;
33  ActualArgs = Actuals;
34  CurToken = 0;
35 
36  ExpandLocStart = Tok.getLocation();
37  ExpandLocEnd = ELEnd;
38  AtStartOfLine = Tok.isAtStartOfLine();
39  HasLeadingSpace = Tok.hasLeadingSpace();
40  NextTokGetsSpace = false;
41  Tokens = &*Macro->tokens_begin();
42  OwnsTokens = false;
43  DisableMacroExpansion = false;
44  NumTokens = Macro->tokens_end()-Macro->tokens_begin();
45  MacroExpansionStart = SourceLocation();
46 
48  MacroStartSLocOffset = SM.getNextLocalOffset();
49 
50  if (NumTokens > 0) {
51  assert(Tokens[0].getLocation().isValid());
52  assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) &&
53  "Macro defined in macro?");
54  assert(ExpandLocStart.isValid());
55 
56  // Reserve a source location entry chunk for the length of the macro
57  // definition. Tokens that get lexed directly from the definition will
58  // have their locations pointing inside this chunk. This is to avoid
59  // creating separate source location entries for each token.
60  MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation());
61  MacroDefLength = Macro->getDefinitionLength(SM);
62  MacroExpansionStart = SM.createExpansionLoc(MacroDefStart,
63  ExpandLocStart,
64  ExpandLocEnd,
65  MacroDefLength);
66  }
67 
68  // If this is a function-like macro, expand the arguments and change
69  // Tokens to point to the expanded tokens.
70  if (Macro->isFunctionLike() && Macro->getNumArgs())
71  ExpandFunctionArguments();
72 
73  // Mark the macro as currently disabled, so that it is not recursively
74  // expanded. The macro must be disabled only after argument pre-expansion of
75  // function-like macro arguments occurs.
76  Macro->DisableMacro();
77 }
78 
79 
80 
81 /// Create a TokenLexer for the specified token stream. This does not
82 /// take ownership of the specified token vector.
83 void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
84  bool disableMacroExpansion, bool ownsTokens) {
85  // If the client is reusing a TokenLexer, make sure to free any memory
86  // associated with it.
87  destroy();
88 
89  Macro = nullptr;
90  ActualArgs = nullptr;
91  Tokens = TokArray;
92  OwnsTokens = ownsTokens;
93  DisableMacroExpansion = disableMacroExpansion;
94  NumTokens = NumToks;
95  CurToken = 0;
96  ExpandLocStart = ExpandLocEnd = SourceLocation();
97  AtStartOfLine = false;
98  HasLeadingSpace = false;
99  NextTokGetsSpace = false;
100  MacroExpansionStart = SourceLocation();
101 
102  // Set HasLeadingSpace/AtStartOfLine so that the first token will be
103  // returned unmodified.
104  if (NumToks != 0) {
105  AtStartOfLine = TokArray[0].isAtStartOfLine();
106  HasLeadingSpace = TokArray[0].hasLeadingSpace();
107  }
108 }
109 
110 
111 void TokenLexer::destroy() {
112  // If this was a function-like macro that actually uses its arguments, delete
113  // the expanded tokens.
114  if (OwnsTokens) {
115  delete [] Tokens;
116  Tokens = nullptr;
117  OwnsTokens = false;
118  }
119 
120  // TokenLexer owns its formal arguments.
121  if (ActualArgs) ActualArgs->destroy(PP);
122 }
123 
124 bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
125  SmallVectorImpl<Token> &ResultToks, bool HasPasteOperator, MacroInfo *Macro,
126  unsigned MacroArgNo, Preprocessor &PP) {
127  // Is the macro argument __VA_ARGS__?
128  if (!Macro->isVariadic() || MacroArgNo != Macro->getNumArgs()-1)
129  return false;
130 
131  // In Microsoft-compatibility mode, a comma is removed in the expansion
132  // of " ... , __VA_ARGS__ " if __VA_ARGS__ is empty. This extension is
133  // not supported by gcc.
134  if (!HasPasteOperator && !PP.getLangOpts().MSVCCompat)
135  return false;
136 
137  // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if
138  // __VA_ARGS__ is empty, but not in strict C99 mode where there are no
139  // named arguments, where it remains. In all other modes, including C99
140  // with GNU extensions, it is removed regardless of named arguments.
141  // Microsoft also appears to support this extension, unofficially.
142  if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode
143  && Macro->getNumArgs() < 2)
144  return false;
145 
146  // Is a comma available to be removed?
147  if (ResultToks.empty() || !ResultToks.back().is(tok::comma))
148  return false;
149 
150  // Issue an extension diagnostic for the paste operator.
151  if (HasPasteOperator)
152  PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
153 
154  // Remove the comma.
155  ResultToks.pop_back();
156 
157  // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"),
158  // then removal of the comma should produce a placemarker token (in C99
159  // terms) which we model by popping off the previous ##, giving us a plain
160  // "X" when __VA_ARGS__ is empty.
161  if (!ResultToks.empty() && ResultToks.back().is(tok::hashhash))
162  ResultToks.pop_back();
163 
164  // Never add a space, even if the comma, ##, or arg had a space.
165  NextTokGetsSpace = false;
166  return true;
167 }
168 
169 /// Expand the arguments of a function-like macro so that we can quickly
170 /// return preexpanded tokens from Tokens.
171 void TokenLexer::ExpandFunctionArguments() {
172 
173  SmallVector<Token, 128> ResultToks;
174 
175  // Loop through 'Tokens', expanding them into ResultToks. Keep
176  // track of whether we change anything. If not, no need to keep them. If so,
177  // we install the newly expanded sequence as the new 'Tokens' list.
178  bool MadeChange = false;
179 
180  for (unsigned i = 0, e = NumTokens; i != e; ++i) {
181  // If we found the stringify operator, get the argument stringified. The
182  // preprocessor already verified that the following token is a macro name
183  // when the #define was parsed.
184  const Token &CurTok = Tokens[i];
185  if (i != 0 && !Tokens[i-1].is(tok::hashhash) && CurTok.hasLeadingSpace())
186  NextTokGetsSpace = true;
187 
188  if (CurTok.isOneOf(tok::hash, tok::hashat)) {
189  int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo());
190  assert(ArgNo != -1 && "Token following # is not an argument?");
191 
192  SourceLocation ExpansionLocStart =
193  getExpansionLocForMacroDefLoc(CurTok.getLocation());
194  SourceLocation ExpansionLocEnd =
195  getExpansionLocForMacroDefLoc(Tokens[i+1].getLocation());
196 
197  Token Res;
198  if (CurTok.is(tok::hash)) // Stringify
199  Res = ActualArgs->getStringifiedArgument(ArgNo, PP,
200  ExpansionLocStart,
201  ExpansionLocEnd);
202  else {
203  // 'charify': don't bother caching these.
204  Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo),
205  PP, true,
206  ExpansionLocStart,
207  ExpansionLocEnd);
208  }
210 
211  // The stringified/charified string leading space flag gets set to match
212  // the #/#@ operator.
213  if (NextTokGetsSpace)
215 
216  ResultToks.push_back(Res);
217  MadeChange = true;
218  ++i; // Skip arg name.
219  NextTokGetsSpace = false;
220  continue;
221  }
222 
223  // Find out if there is a paste (##) operator before or after the token.
224  bool NonEmptyPasteBefore =
225  !ResultToks.empty() && ResultToks.back().is(tok::hashhash);
226  bool PasteBefore = i != 0 && Tokens[i-1].is(tok::hashhash);
227  bool PasteAfter = i+1 != e && Tokens[i+1].is(tok::hashhash);
228  assert(!NonEmptyPasteBefore || PasteBefore);
229 
230  // Otherwise, if this is not an argument token, just add the token to the
231  // output buffer.
232  IdentifierInfo *II = CurTok.getIdentifierInfo();
233  int ArgNo = II ? Macro->getArgumentNum(II) : -1;
234  if (ArgNo == -1) {
235  // This isn't an argument, just add it.
236  ResultToks.push_back(CurTok);
237 
238  if (NextTokGetsSpace) {
239  ResultToks.back().setFlag(Token::LeadingSpace);
240  NextTokGetsSpace = false;
241  } else if (PasteBefore && !NonEmptyPasteBefore)
242  ResultToks.back().clearFlag(Token::LeadingSpace);
243 
244  continue;
245  }
246 
247  // An argument is expanded somehow, the result is different than the
248  // input.
249  MadeChange = true;
250 
251  // Otherwise, this is a use of the argument.
252 
253  // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there
254  // are no trailing commas if __VA_ARGS__ is empty.
255  if (!PasteBefore && ActualArgs->isVarargsElidedUse() &&
256  MaybeRemoveCommaBeforeVaArgs(ResultToks,
257  /*HasPasteOperator=*/false,
258  Macro, ArgNo, PP))
259  continue;
260 
261  // If it is not the LHS/RHS of a ## operator, we must pre-expand the
262  // argument and substitute the expanded tokens into the result. This is
263  // C99 6.10.3.1p1.
264  if (!PasteBefore && !PasteAfter) {
265  const Token *ResultArgToks;
266 
267  // Only preexpand the argument if it could possibly need it. This
268  // avoids some work in common cases.
269  const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
270  if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP))
271  ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, Macro, PP)[0];
272  else
273  ResultArgToks = ArgTok; // Use non-preexpanded tokens.
274 
275  // If the arg token expanded into anything, append it.
276  if (ResultArgToks->isNot(tok::eof)) {
277  unsigned FirstResult = ResultToks.size();
278  unsigned NumToks = MacroArgs::getArgLength(ResultArgToks);
279  ResultToks.append(ResultArgToks, ResultArgToks+NumToks);
280 
281  // In Microsoft-compatibility mode, we follow MSVC's preprocessing
282  // behavior by not considering single commas from nested macro
283  // expansions as argument separators. Set a flag on the token so we can
284  // test for this later when the macro expansion is processed.
285  if (PP.getLangOpts().MSVCCompat && NumToks == 1 &&
286  ResultToks.back().is(tok::comma))
287  ResultToks.back().setFlag(Token::IgnoredComma);
288 
289  // If the '##' came from expanding an argument, turn it into 'unknown'
290  // to avoid pasting.
291  for (unsigned i = FirstResult, e = ResultToks.size(); i != e; ++i) {
292  Token &Tok = ResultToks[i];
293  if (Tok.is(tok::hashhash))
294  Tok.setKind(tok::unknown);
295  }
296 
297  if(ExpandLocStart.isValid()) {
298  updateLocForMacroArgTokens(CurTok.getLocation(),
299  ResultToks.begin()+FirstResult,
300  ResultToks.end());
301  }
302 
303  // If any tokens were substituted from the argument, the whitespace
304  // before the first token should match the whitespace of the arg
305  // identifier.
306  ResultToks[FirstResult].setFlagValue(Token::LeadingSpace,
307  NextTokGetsSpace);
308  NextTokGetsSpace = false;
309  }
310  continue;
311  }
312 
313  // Okay, we have a token that is either the LHS or RHS of a paste (##)
314  // argument. It gets substituted as its non-pre-expanded tokens.
315  const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo);
316  unsigned NumToks = MacroArgs::getArgLength(ArgToks);
317  if (NumToks) { // Not an empty argument?
318  // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned
319  // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when
320  // the expander trys to paste ',' with the first token of the __VA_ARGS__
321  // expansion.
322  if (NonEmptyPasteBefore && ResultToks.size() >= 2 &&
323  ResultToks[ResultToks.size()-2].is(tok::comma) &&
324  (unsigned)ArgNo == Macro->getNumArgs()-1 &&
325  Macro->isVariadic()) {
326  // Remove the paste operator, report use of the extension.
327  PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma);
328  }
329 
330  ResultToks.append(ArgToks, ArgToks+NumToks);
331 
332  // If the '##' came from expanding an argument, turn it into 'unknown'
333  // to avoid pasting.
334  for (unsigned i = ResultToks.size() - NumToks, e = ResultToks.size();
335  i != e; ++i) {
336  Token &Tok = ResultToks[i];
337  if (Tok.is(tok::hashhash))
338  Tok.setKind(tok::unknown);
339  }
340 
341  if (ExpandLocStart.isValid()) {
342  updateLocForMacroArgTokens(CurTok.getLocation(),
343  ResultToks.end()-NumToks, ResultToks.end());
344  }
345 
346  // If this token (the macro argument) was supposed to get leading
347  // whitespace, transfer this information onto the first token of the
348  // expansion.
349  //
350  // Do not do this if the paste operator occurs before the macro argument,
351  // as in "A ## MACROARG". In valid code, the first token will get
352  // smooshed onto the preceding one anyway (forming AMACROARG). In
353  // assembler-with-cpp mode, invalid pastes are allowed through: in this
354  // case, we do not want the extra whitespace to be added. For example,
355  // we want ". ## foo" -> ".foo" not ". foo".
356  if (NextTokGetsSpace)
357  ResultToks[ResultToks.size()-NumToks].setFlag(Token::LeadingSpace);
358 
359  NextTokGetsSpace = false;
360  continue;
361  }
362 
363  // If an empty argument is on the LHS or RHS of a paste, the standard (C99
364  // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We
365  // implement this by eating ## operators when a LHS or RHS expands to
366  // empty.
367  if (PasteAfter) {
368  // Discard the argument token and skip (don't copy to the expansion
369  // buffer) the paste operator after it.
370  ++i;
371  continue;
372  }
373 
374  // If this is on the RHS of a paste operator, we've already copied the
375  // paste operator to the ResultToks list, unless the LHS was empty too.
376  // Remove it.
377  assert(PasteBefore);
378  if (NonEmptyPasteBefore) {
379  assert(ResultToks.back().is(tok::hashhash));
380  ResultToks.pop_back();
381  }
382 
383  // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
384  // and if the macro had at least one real argument, and if the token before
385  // the ## was a comma, remove the comma. This is a GCC extension which is
386  // disabled when using -std=c99.
387  if (ActualArgs->isVarargsElidedUse())
388  MaybeRemoveCommaBeforeVaArgs(ResultToks,
389  /*HasPasteOperator=*/true,
390  Macro, ArgNo, PP);
391 
392  continue;
393  }
394 
395  // If anything changed, install this as the new Tokens list.
396  if (MadeChange) {
397  assert(!OwnsTokens && "This would leak if we already own the token list");
398  // This is deleted in the dtor.
399  NumTokens = ResultToks.size();
400  // The tokens will be added to Preprocessor's cache and will be removed
401  // when this TokenLexer finishes lexing them.
402  Tokens = PP.cacheMacroExpandedTokens(this, ResultToks);
403 
404  // The preprocessor cache of macro expanded tokens owns these tokens,not us.
405  OwnsTokens = false;
406  }
407 }
408 
409 /// \brief Checks if two tokens form wide string literal.
410 static bool isWideStringLiteralFromMacro(const Token &FirstTok,
411  const Token &SecondTok) {
412  return FirstTok.is(tok::identifier) &&
413  FirstTok.getIdentifierInfo()->isStr("L") && SecondTok.isLiteral() &&
414  SecondTok.stringifiedInMacro();
415 }
416 
417 /// Lex - Lex and return a token from this macro stream.
418 ///
419 bool TokenLexer::Lex(Token &Tok) {
420  // Lexing off the end of the macro, pop this macro off the expansion stack.
421  if (isAtEnd()) {
422  // If this is a macro (not a token stream), mark the macro enabled now
423  // that it is no longer being expanded.
424  if (Macro) Macro->EnableMacro();
425 
426  Tok.startToken();
427  Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
428  Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace);
429  if (CurToken == 0)
431  return PP.HandleEndOfTokenLexer(Tok);
432  }
433 
435 
436  // If this is the first token of the expanded result, we inherit spacing
437  // properties later.
438  bool isFirstToken = CurToken == 0;
439 
440  // Get the next token to return.
441  Tok = Tokens[CurToken++];
442 
443  bool TokenIsFromPaste = false;
444 
445  // If this token is followed by a token paste (##) operator, paste the tokens!
446  // Note that ## is a normal token when not expanding a macro.
447  if (!isAtEnd() && Macro &&
448  (Tokens[CurToken].is(tok::hashhash) ||
449  // Special processing of L#x macros in -fms-compatibility mode.
450  // Microsoft compiler is able to form a wide string literal from
451  // 'L#macro_arg' construct in a function-like macro.
452  (PP.getLangOpts().MSVCCompat &&
453  isWideStringLiteralFromMacro(Tok, Tokens[CurToken])))) {
454  // When handling the microsoft /##/ extension, the final token is
455  // returned by PasteTokens, not the pasted token.
456  if (PasteTokens(Tok))
457  return true;
458 
459  TokenIsFromPaste = true;
460  }
461 
462  // The token's current location indicate where the token was lexed from. We
463  // need this information to compute the spelling of the token, but any
464  // diagnostics for the expanded token should appear as if they came from
465  // ExpansionLoc. Pull this information together into a new SourceLocation
466  // that captures all of this.
467  if (ExpandLocStart.isValid() && // Don't do this for token streams.
468  // Check that the token's location was not already set properly.
469  SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) {
470  SourceLocation instLoc;
471  if (Tok.is(tok::comment)) {
472  instLoc = SM.createExpansionLoc(Tok.getLocation(),
473  ExpandLocStart,
474  ExpandLocEnd,
475  Tok.getLength());
476  } else {
477  instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation());
478  }
479 
480  Tok.setLocation(instLoc);
481  }
482 
483  // If this is the first token, set the lexical properties of the token to
484  // match the lexical properties of the macro identifier.
485  if (isFirstToken) {
486  Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
487  Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
488  } else {
489  // If this is not the first token, we may still need to pass through
490  // leading whitespace if we've expanded a macro.
491  if (AtStartOfLine) Tok.setFlag(Token::StartOfLine);
492  if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace);
493  }
494  AtStartOfLine = false;
495  HasLeadingSpace = false;
496 
497  // Handle recursive expansion!
498  if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) {
499  // Change the kind of this identifier to the appropriate token kind, e.g.
500  // turning "for" into a keyword.
501  IdentifierInfo *II = Tok.getIdentifierInfo();
502  Tok.setKind(II->getTokenID());
503 
504  // If this identifier was poisoned and from a paste, emit an error. This
505  // won't be handled by Preprocessor::HandleIdentifier because this is coming
506  // from a macro expansion.
507  if (II->isPoisoned() && TokenIsFromPaste) {
508  PP.HandlePoisonedIdentifier(Tok);
509  }
510 
511  if (!DisableMacroExpansion && II->isHandleIdentifierCase())
512  return PP.HandleIdentifier(Tok);
513  }
514 
515  // Otherwise, return a normal token.
516  return true;
517 }
518 
519 /// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ##
520 /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there
521 /// are more ## after it, chomp them iteratively. Return the result as Tok.
522 /// If this returns true, the caller should immediately return the token.
523 bool TokenLexer::PasteTokens(Token &Tok) {
524  // MSVC: If previous token was pasted, this must be a recovery from an invalid
525  // paste operation. Ignore spaces before this token to mimic MSVC output.
526  // Required for generating valid UUID strings in some MS headers.
527  if (PP.getLangOpts().MicrosoftExt && (CurToken >= 2) &&
528  Tokens[CurToken - 2].is(tok::hashhash))
530 
531  SmallString<128> Buffer;
532  const char *ResultTokStrPtr = nullptr;
533  SourceLocation StartLoc = Tok.getLocation();
534  SourceLocation PasteOpLoc;
535  do {
536  // Consume the ## operator if any.
537  PasteOpLoc = Tokens[CurToken].getLocation();
538  if (Tokens[CurToken].is(tok::hashhash))
539  ++CurToken;
540  assert(!isAtEnd() && "No token on the RHS of a paste operator!");
541 
542  // Get the RHS token.
543  const Token &RHS = Tokens[CurToken];
544 
545  // Allocate space for the result token. This is guaranteed to be enough for
546  // the two tokens.
547  Buffer.resize(Tok.getLength() + RHS.getLength());
548 
549  // Get the spelling of the LHS token in Buffer.
550  const char *BufPtr = &Buffer[0];
551  bool Invalid = false;
552  unsigned LHSLen = PP.getSpelling(Tok, BufPtr, &Invalid);
553  if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer!
554  memcpy(&Buffer[0], BufPtr, LHSLen);
555  if (Invalid)
556  return true;
557 
558  BufPtr = Buffer.data() + LHSLen;
559  unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid);
560  if (Invalid)
561  return true;
562  if (RHSLen && BufPtr != &Buffer[LHSLen])
563  // Really, we want the chars in Buffer!
564  memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
565 
566  // Trim excess space.
567  Buffer.resize(LHSLen+RHSLen);
568 
569  // Plop the pasted result (including the trailing newline and null) into a
570  // scratch buffer where we can lex it.
571  Token ResultTokTmp;
572  ResultTokTmp.startToken();
573 
574  // Claim that the tmp token is a string_literal so that we can get the
575  // character pointer back from CreateString in getLiteralData().
576  ResultTokTmp.setKind(tok::string_literal);
577  PP.CreateString(Buffer, ResultTokTmp);
578  SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
579  ResultTokStrPtr = ResultTokTmp.getLiteralData();
580 
581  // Lex the resultant pasted token into Result.
582  Token Result;
583 
584  if (Tok.isAnyIdentifier() && RHS.isAnyIdentifier()) {
585  // Common paste case: identifier+identifier = identifier. Avoid creating
586  // a lexer and other overhead.
587  PP.IncrementPasteCounter(true);
588  Result.startToken();
589  Result.setKind(tok::raw_identifier);
590  Result.setRawIdentifierData(ResultTokStrPtr);
591  Result.setLocation(ResultTokLoc);
592  Result.setLength(LHSLen+RHSLen);
593  } else {
594  PP.IncrementPasteCounter(false);
595 
596  assert(ResultTokLoc.isFileID() &&
597  "Should be a raw location into scratch buffer");
599  FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
600 
601  bool Invalid = false;
602  const char *ScratchBufStart
603  = SourceMgr.getBufferData(LocFileID, &Invalid).data();
604  if (Invalid)
605  return false;
606 
607  // Make a lexer to lex this string from. Lex just this one token.
608  // Make a lexer object so that we lex and expand the paste result.
609  Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
610  PP.getLangOpts(), ScratchBufStart,
611  ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen);
612 
613  // Lex a token in raw mode. This way it won't look up identifiers
614  // automatically, lexing off the end will return an eof token, and
615  // warnings are disabled. This returns true if the result token is the
616  // entire buffer.
617  bool isInvalid = !TL.LexFromRawLexer(Result);
618 
619  // If we got an EOF token, we didn't form even ONE token. For example, we
620  // did "/ ## /" to get "//".
621  isInvalid |= Result.is(tok::eof);
622 
623  // If pasting the two tokens didn't form a full new token, this is an
624  // error. This occurs with "x ## +" and other stuff. Return with Tok
625  // unmodified and with RHS as the next token to lex.
626  if (isInvalid) {
627  // Test for the Microsoft extension of /##/ turning into // here on the
628  // error path.
629  if (PP.getLangOpts().MicrosoftExt && Tok.is(tok::slash) &&
630  RHS.is(tok::slash)) {
631  HandleMicrosoftCommentPaste(Tok);
632  return true;
633  }
634 
635  // Do not emit the error when preprocessing assembler code.
636  if (!PP.getLangOpts().AsmPreprocessor) {
637  // Explicitly convert the token location to have proper expansion
638  // information so that the user knows where it came from.
640  SourceLocation Loc =
641  SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2);
642  // If we're in microsoft extensions mode, downgrade this from a hard
643  // error to an extension that defaults to an error. This allows
644  // disabling it.
645  PP.Diag(Loc, PP.getLangOpts().MicrosoftExt ? diag::ext_pp_bad_paste_ms
646  : diag::err_pp_bad_paste)
647  << Buffer;
648  }
649 
650  // An error has occurred so exit loop.
651  break;
652  }
653 
654  // Turn ## into 'unknown' to avoid # ## # from looking like a paste
655  // operator.
656  if (Result.is(tok::hashhash))
657  Result.setKind(tok::unknown);
658  }
659 
660  // Transfer properties of the LHS over the Result.
663 
664  // Finally, replace LHS with the result, consume the RHS, and iterate.
665  ++CurToken;
666  Tok = Result;
667  } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash));
668 
669  SourceLocation EndLoc = Tokens[CurToken - 1].getLocation();
670 
671  // The token's current location indicate where the token was lexed from. We
672  // need this information to compute the spelling of the token, but any
673  // diagnostics for the expanded token should appear as if the token was
674  // expanded from the full ## expression. Pull this information together into
675  // a new SourceLocation that captures all of this.
676  SourceManager &SM = PP.getSourceManager();
677  if (StartLoc.isFileID())
678  StartLoc = getExpansionLocForMacroDefLoc(StartLoc);
679  if (EndLoc.isFileID())
680  EndLoc = getExpansionLocForMacroDefLoc(EndLoc);
681  FileID MacroFID = SM.getFileID(MacroExpansionStart);
682  while (SM.getFileID(StartLoc) != MacroFID)
683  StartLoc = SM.getImmediateExpansionRange(StartLoc).first;
684  while (SM.getFileID(EndLoc) != MacroFID)
685  EndLoc = SM.getImmediateExpansionRange(EndLoc).second;
686 
687  Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc,
688  Tok.getLength()));
689 
690  // Now that we got the result token, it will be subject to expansion. Since
691  // token pasting re-lexes the result token in raw mode, identifier information
692  // isn't looked up. As such, if the result is an identifier, look up id info.
693  if (Tok.is(tok::raw_identifier)) {
694  // Look up the identifier info for the token. We disabled identifier lookup
695  // by saying we're skipping contents, so we need to do this manually.
696  PP.LookUpIdentifierInfo(Tok);
697  }
698  return false;
699 }
700 
701 /// isNextTokenLParen - If the next token lexed will pop this macro off the
702 /// expansion stack, return 2. If the next unexpanded token is a '(', return
703 /// 1, otherwise return 0.
705  // Out of tokens?
706  if (isAtEnd())
707  return 2;
708  return Tokens[CurToken].is(tok::l_paren);
709 }
710 
711 /// isParsingPreprocessorDirective - Return true if we are in the middle of a
712 /// preprocessor directive.
714  return Tokens[NumTokens-1].is(tok::eod) && !isAtEnd();
715 }
716 
717 /// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes
718 /// together to form a comment that comments out everything in the current
719 /// macro, other active macros, and anything left on the current physical
720 /// source line of the expanded buffer. Handle this by returning the
721 /// first token on the next line.
722 void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok) {
723  // We 'comment out' the rest of this macro by just ignoring the rest of the
724  // tokens that have not been lexed yet, if any.
725 
726  // Since this must be a macro, mark the macro enabled now that it is no longer
727  // being expanded.
728  assert(Macro && "Token streams can't paste comments");
729  Macro->EnableMacro();
730 
732 }
733 
734 /// \brief If \arg loc is a file ID and points inside the current macro
735 /// definition, returns the appropriate source location pointing at the
736 /// macro expansion source location entry, otherwise it returns an invalid
737 /// SourceLocation.
739 TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {
740  assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() &&
741  "Not appropriate for token streams");
742  assert(loc.isValid() && loc.isFileID());
743 
744  SourceManager &SM = PP.getSourceManager();
745  assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) &&
746  "Expected loc to come from the macro definition");
747 
748  unsigned relativeOffset = 0;
749  SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset);
750  return MacroExpansionStart.getLocWithOffset(relativeOffset);
751 }
752 
753 /// \brief Finds the tokens that are consecutive (from the same FileID)
754 /// creates a single SLocEntry, and assigns SourceLocations to each token that
755 /// point to that SLocEntry. e.g for
756 /// assert(foo == bar);
757 /// There will be a single SLocEntry for the "foo == bar" chunk and locations
758 /// for the 'foo', '==', 'bar' tokens will point inside that chunk.
759 ///
760 /// \arg begin_tokens will be updated to a position past all the found
761 /// consecutive tokens.
763  SourceLocation InstLoc,
764  Token *&begin_tokens,
765  Token * end_tokens) {
766  assert(begin_tokens < end_tokens);
767 
768  SourceLocation FirstLoc = begin_tokens->getLocation();
769  SourceLocation CurLoc = FirstLoc;
770 
771  // Compare the source location offset of tokens and group together tokens that
772  // are close, even if their locations point to different FileIDs. e.g.
773  //
774  // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs)
775  // ^ ^
776  // |bar foo cake| (one SLocEntry chunk for all tokens)
777  //
778  // we can perform this "merge" since the token's spelling location depends
779  // on the relative offset.
780 
781  Token *NextTok = begin_tokens + 1;
782  for (; NextTok < end_tokens; ++NextTok) {
783  SourceLocation NextLoc = NextTok->getLocation();
784  if (CurLoc.isFileID() != NextLoc.isFileID())
785  break; // Token from different kind of FileID.
786 
787  int RelOffs;
788  if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs))
789  break; // Token from different local/loaded location.
790  // Check that token is not before the previous token or more than 50
791  // "characters" away.
792  if (RelOffs < 0 || RelOffs > 50)
793  break;
794  CurLoc = NextLoc;
795  }
796 
797  // For the consecutive tokens, find the length of the SLocEntry to contain
798  // all of them.
799  Token &LastConsecutiveTok = *(NextTok-1);
800  int LastRelOffs = 0;
801  SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(),
802  &LastRelOffs);
803  unsigned FullLength = LastRelOffs + LastConsecutiveTok.getLength();
804 
805  // Create a macro expansion SLocEntry that will "contain" all of the tokens.
806  SourceLocation Expansion =
807  SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength);
808 
809  // Change the location of the tokens from the spelling location to the new
810  // expanded location.
811  for (; begin_tokens < NextTok; ++begin_tokens) {
812  Token &Tok = *begin_tokens;
813  int RelOffs = 0;
814  SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs);
815  Tok.setLocation(Expansion.getLocWithOffset(RelOffs));
816  }
817 }
818 
819 /// \brief Creates SLocEntries and updates the locations of macro argument
820 /// tokens to their new expanded locations.
821 ///
822 /// \param ArgIdDefLoc the location of the macro argument id inside the macro
823 /// definition.
824 /// \param Tokens the macro argument tokens to update.
825 void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
826  Token *begin_tokens,
827  Token *end_tokens) {
828  SourceManager &SM = PP.getSourceManager();
829 
830  SourceLocation InstLoc =
831  getExpansionLocForMacroDefLoc(ArgIdSpellLoc);
832 
833  while (begin_tokens < end_tokens) {
834  // If there's only one token just create a SLocEntry for it.
835  if (end_tokens - begin_tokens == 1) {
836  Token &Tok = *begin_tokens;
838  InstLoc,
839  Tok.getLength()));
840  return;
841  }
842 
843  updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens);
844  }
845 }
846 
847 void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
848  AtStartOfLine = Result.isAtStartOfLine();
849  HasLeadingSpace = Result.hasLeadingSpace();
850 }
bool isAtStartOfLine() const
Definition: Token.h:261
SourceManager & getSourceManager() const
Definition: Preprocessor.h:682
bool isPoisoned() const
Return true if this token has been poisoned.
void DisableMacro()
Definition: MacroInfo.h:263
void Init(Token &Tok, SourceLocation ILEnd, MacroInfo *MI, MacroArgs *ActualArgs)
Definition: TokenLexer.cpp:26
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition: Token.h:252
unsigned isNextTokenLParen() const
Definition: TokenLexer.cpp:704
Defines the SourceManager interface.
unsigned getNextLocalOffset() const
static void updateConsecutiveMacroArgTokens(SourceManager &SM, SourceLocation InstLoc, Token *&begin_tokens, Token *end_tokens)
Finds the tokens that are consecutive (from the same FileID) creates a single SLocEntry, and assigns SourceLocations to each token that point to that SLocEntry. e.g for assert(foo == bar); There will be a single SLocEntry for the "foo == bar" chunk and locations for the 'foo', '==', 'bar' tokens will point inside that chunk.
Definition: TokenLexer.cpp:762
bool isParsingPreprocessorDirective() const
Definition: TokenLexer.cpp:713
Defines the clang::MacroInfo and clang::MacroDirective classes.
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:265
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition: Token.h:234
bool isVarargsElidedUse() const
Definition: MacroArgs.h:107
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:107
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
bool HandleEndOfTokenLexer(Token &Result)
Callback invoked when the current TokenLexer hits the end of its token stream.
bool stringifiedInMacro() const
Definition: Token.h:297
void setRawIdentifierData(const char *Ptr)
Definition: Token.h:207
bool isFileID() const
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:679
void setKind(tok::TokenKind K)
Definition: Token.h:91
bool Lex(Token &Tok)
Lex - Lex and return a token from this macro stream.
Definition: TokenLexer.cpp:419
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
tok::TokenKind getTokenID() const
const std::vector< Token > & getPreExpArgument(unsigned Arg, const MacroInfo *MI, Preprocessor &PP)
Definition: MacroArgs.cpp:147
void destroy(Preprocessor &PP)
Definition: MacroArgs.cpp:73
const Token * getUnexpArgument(unsigned Arg) const
Definition: MacroArgs.cpp:113
tokens_iterator tokens_begin() const
Definition: MacroInfo.h:240
bool isVariadic() const
Definition: MacroInfo.h:205
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
const Token & getStringifiedArgument(unsigned ArgNo, Preprocessor &PP, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd)
Definition: MacroArgs.cpp:297
static bool isWideStringLiteralFromMacro(const Token &FirstTok, const Token &SecondTok)
Checks if two tokens form wide string literal.
Definition: TokenLexer.cpp:410
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
static Token StringifyArgument(const Token *ArgToks, Preprocessor &PP, bool Charify, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd)
Definition: MacroArgs.cpp:194
SourceManager & SM
SourceManager & SourceMgr
Definition: Format.cpp:1205
unsigned getNumArgs() const
Definition: MacroInfo.h:180
SourceLocation createMacroArgExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLoc, unsigned TokLength)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
Defines the clang::Preprocessor interface.
int getArgumentNum(const IdentifierInfo *Arg) const
Return the argument number of the specified identifier, or -1 if the identifier is not a formal argum...
Definition: MacroInfo.h:187
void HandleMicrosoftCommentPaste(Token &Tok)
void IncrementPasteCounter(bool isFast)
Increment the counters for the number of token paste operations performed.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:124
unsigned getDefinitionLength(SourceManager &SM) const
Get length in characters of the macro definition.
Definition: MacroInfo.h:129
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
SourceLocation createExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLength, int LoadedID=0, unsigned LoadedOffset=0)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
The result type of a method or function.
bool isBeforeInSLocAddrSpace(SourceLocation LHS, SourceLocation RHS) const
Determines the order of 2 source locations in the "source location address space".
const char * getLiteralData() const
Definition: Token.h:215
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier...
Encodes a location in the source. The SourceManager can decode this to get at the full include stack...
void setLength(unsigned Len)
Definition: Token.h:133
bool isValid() const
Return true if this is a valid SourceLocation object.
bool ArgNeedsPreexpansion(const Token *ArgTok, Preprocessor &PP) const
Definition: MacroArgs.cpp:131
bool is(tok::TokenKind K) const
Definition: Token.h:95
bool isStr(const char(&Str)[StrLen]) const
Return true if this is the identifier for the specified string.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool isInSLocAddrSpace(SourceLocation Loc, SourceLocation Start, unsigned Length, unsigned *RelativeOffset=nullptr) const
Returns true if Loc is inside the [Start, +Length) chunk of the source location address space...
std::pair< SourceLocation, SourceLocation > getImmediateExpansionRange(SourceLocation Loc) const
Return the start/end of the expansion information for an expansion location.
static unsigned getArgLength(const Token *ArgPtr)
Definition: MacroArgs.cpp:103
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
bool isFunctionLike() const
Definition: MacroInfo.h:197
Encapsulates the data about a macro definition (e.g. its tokens).
Definition: MacroInfo.h:34
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:97
bool isInSameSLocAddrSpace(SourceLocation LHS, SourceLocation RHS, int *RelativeOffset) const
Return true if both LHS and RHS are in the local source location address space or the loaded one...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
tokens_iterator tokens_end() const
Definition: MacroInfo.h:241
static bool isInvalid(SourceLocation Loc, bool *Invalid)
void HandlePoisonedIdentifier(Token &Tok)
Display reason for poisoned identifier.
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
unsigned getLength() const
Definition: Token.h:127
void setLocation(SourceLocation L)
Definition: Token.h:132
void EnableMacro()
Definition: MacroInfo.h:258
void clearFlag(TokenFlags Flag)
Unset the specified flag.
Definition: Token.h:239
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID...
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:118
This class handles loading and caching of source files into memory.
void startToken()
Reset all flags to cleared.
Definition: Token.h:169
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:96
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177