clang  3.7.0
Preprocessor.cpp
Go to the documentation of this file.
1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 // -H - Print the name of each header file used.
16 // -d[DNI] - Dump various things.
17 // -fworking-directory - #line's with preprocessor's working dir.
18 // -fpreprocessed
19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 // -W*
21 // -w
22 //
23 // Messages to emit:
24 // "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
32 #include "clang/Basic/TargetInfo.h"
35 #include "clang/Lex/HeaderSearch.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/Pragma.h"
45 #include "llvm/ADT/APFloat.h"
46 #include "llvm/ADT/STLExtras.h"
47 #include "llvm/ADT/SmallString.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/Support/Capacity.h"
50 #include "llvm/Support/ConvertUTF.h"
51 #include "llvm/Support/MemoryBuffer.h"
52 #include "llvm/Support/raw_ostream.h"
53 using namespace clang;
54 
55 //===----------------------------------------------------------------------===//
57 
59  DiagnosticsEngine &diags, LangOptions &opts,
60  SourceManager &SM, HeaderSearch &Headers,
61  ModuleLoader &TheModuleLoader,
62  IdentifierInfoLookup *IILookup, bool OwnsHeaders,
63  TranslationUnitKind TUKind)
64  : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
65  FileMgr(Headers.getFileMgr()), SourceMgr(SM),
66  ScratchBuf(new ScratchBuffer(SourceMgr)),HeaderInfo(Headers),
67  TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
68  Identifiers(opts, IILookup),
69  PragmaHandlers(new PragmaNamespace(StringRef())),
70  IncrementalProcessing(false), TUKind(TUKind),
71  CodeComplete(nullptr), CodeCompletionFile(nullptr),
72  CodeCompletionOffset(0), LastTokenWasAt(false),
73  ModuleImportExpectsIdentifier(false), CodeCompletionReached(0),
74  MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
75  CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr),
76  Callbacks(nullptr), CurSubmoduleState(&NullSubmoduleState),
77  MacroArgCache(nullptr), Record(nullptr),
78  MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
79  OwnsHeaderSearch = OwnsHeaders;
80 
81  CounterValue = 0; // __COUNTER__ starts at 0.
82 
83  // Clear stats.
84  NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
85  NumIf = NumElse = NumEndif = 0;
86  NumEnteredSourceFiles = 0;
87  NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
88  NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
89  MaxIncludeStackDepth = 0;
90  NumSkipped = 0;
91 
92  // Default to discarding comments.
93  KeepComments = false;
94  KeepMacroComments = false;
95  SuppressIncludeNotFoundError = false;
96 
97  // Macro expansion is enabled.
98  DisableMacroExpansion = false;
99  MacroExpansionInDirectivesOverride = false;
100  InMacroArgs = false;
101  InMacroArgPreExpansion = false;
102  NumCachedTokenLexers = 0;
103  PragmasEnabled = true;
104  ParsingIfOrElifDirective = false;
105  PreprocessedOutput = false;
106 
107  CachedLexPos = 0;
108 
109  // We haven't read anything from the external source.
110  ReadMacrosFromExternalSource = false;
111 
112  // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
113  // This gets unpoisoned where it is allowed.
114  (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
115  SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
116 
117  // Initialize the pragma handlers.
118  RegisterBuiltinPragmas();
119 
120  // Initialize builtin macros like __LINE__ and friends.
121  RegisterBuiltinMacros();
122 
123  if(LangOpts.Borland) {
124  Ident__exception_info = getIdentifierInfo("_exception_info");
125  Ident___exception_info = getIdentifierInfo("__exception_info");
126  Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
127  Ident__exception_code = getIdentifierInfo("_exception_code");
128  Ident___exception_code = getIdentifierInfo("__exception_code");
129  Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
130  Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
131  Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
132  Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
133  } else {
134  Ident__exception_info = Ident__exception_code = nullptr;
135  Ident__abnormal_termination = Ident___exception_info = nullptr;
136  Ident___exception_code = Ident___abnormal_termination = nullptr;
137  Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
138  Ident_AbnormalTermination = nullptr;
139  }
140 }
141 
143  assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
144 
145  IncludeMacroStack.clear();
146 
147  // Destroy any macro definitions.
148  while (MacroInfoChain *I = MIChainHead) {
149  MIChainHead = I->Next;
150  I->~MacroInfoChain();
151  }
152 
153  // Free any cached macro expanders.
154  // This populates MacroArgCache, so all TokenLexers need to be destroyed
155  // before the code below that frees up the MacroArgCache list.
156  std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
157  CurTokenLexer.reset();
158 
159  while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
160  DeserialMIChainHead = I->Next;
161  I->~DeserializedMacroInfoChain();
162  }
163 
164  // Free any cached MacroArgs.
165  for (MacroArgs *ArgList = MacroArgCache; ArgList;)
166  ArgList = ArgList->deallocate();
167 
168  // Delete the header search info, if we own it.
169  if (OwnsHeaderSearch)
170  delete &HeaderInfo;
171 }
172 
174  assert((!this->Target || this->Target == &Target) &&
175  "Invalid override of target information");
176  this->Target = &Target;
177 
178  // Initialize information about built-ins.
179  BuiltinInfo.InitializeTarget(Target);
180  HeaderInfo.setTarget(Target);
181 }
182 
184  NumEnteredSourceFiles = 0;
185 
186  // Reset pragmas
187  PragmaHandlersBackup = std::move(PragmaHandlers);
188  PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
189  RegisterBuiltinPragmas();
190 
191  // Reset PredefinesFileID
192  PredefinesFileID = FileID();
193 }
194 
196  NumEnteredSourceFiles = 1;
197 
198  PragmaHandlers = std::move(PragmaHandlersBackup);
199 }
200 
202  PTH.reset(pm);
203  FileMgr.addStatCache(PTH->createStatCache());
204 }
205 
206 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
207  llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
208  << getSpelling(Tok) << "'";
209 
210  if (!DumpFlags) return;
211 
212  llvm::errs() << "\t";
213  if (Tok.isAtStartOfLine())
214  llvm::errs() << " [StartOfLine]";
215  if (Tok.hasLeadingSpace())
216  llvm::errs() << " [LeadingSpace]";
217  if (Tok.isExpandDisabled())
218  llvm::errs() << " [ExpandDisabled]";
219  if (Tok.needsCleaning()) {
220  const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
221  llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
222  << "']";
223  }
224 
225  llvm::errs() << "\tLoc=<";
226  DumpLocation(Tok.getLocation());
227  llvm::errs() << ">";
228 }
229 
231  Loc.dump(SourceMgr);
232 }
233 
234 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
235  llvm::errs() << "MACRO: ";
236  for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
238  llvm::errs() << " ";
239  }
240  llvm::errs() << "\n";
241 }
242 
244  llvm::errs() << "\n*** Preprocessor Stats:\n";
245  llvm::errs() << NumDirectives << " directives found:\n";
246  llvm::errs() << " " << NumDefined << " #define.\n";
247  llvm::errs() << " " << NumUndefined << " #undef.\n";
248  llvm::errs() << " #include/#include_next/#import:\n";
249  llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
250  llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
251  llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
252  llvm::errs() << " " << NumElse << " #else/#elif.\n";
253  llvm::errs() << " " << NumEndif << " #endif.\n";
254  llvm::errs() << " " << NumPragma << " #pragma.\n";
255  llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
256 
257  llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
258  << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
259  << NumFastMacroExpanded << " on the fast path.\n";
260  llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
261  << " token paste (##) operations performed, "
262  << NumFastTokenPaste << " on the fast path.\n";
263 
264  llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
265 
266  llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
267  llvm::errs() << "\n Macro Expanded Tokens: "
268  << llvm::capacity_in_bytes(MacroExpandedTokens);
269  llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
270  // FIXME: List information for all submodules.
271  llvm::errs() << "\n Macros: "
272  << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
273  llvm::errs() << "\n #pragma push_macro Info: "
274  << llvm::capacity_in_bytes(PragmaPushMacroInfo);
275  llvm::errs() << "\n Poison Reasons: "
276  << llvm::capacity_in_bytes(PoisonReasons);
277  llvm::errs() << "\n Comment Handlers: "
278  << llvm::capacity_in_bytes(CommentHandlers) << "\n";
279 }
280 
282 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
283  if (IncludeExternalMacros && ExternalSource &&
284  !ReadMacrosFromExternalSource) {
285  ReadMacrosFromExternalSource = true;
286  ExternalSource->ReadDefinedMacros();
287  }
288 
289  // Make sure we cover all macros in visible modules.
290  for (const ModuleMacro &Macro : ModuleMacros)
291  CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
292 
293  return CurSubmoduleState->Macros.begin();
294 }
295 
297  return BP.getTotalMemory()
298  + llvm::capacity_in_bytes(MacroExpandedTokens)
299  + Predefines.capacity() /* Predefines buffer. */
300  // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
301  // and ModuleMacros.
302  + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
303  + llvm::capacity_in_bytes(PragmaPushMacroInfo)
304  + llvm::capacity_in_bytes(PoisonReasons)
305  + llvm::capacity_in_bytes(CommentHandlers);
306 }
307 
309 Preprocessor::macro_end(bool IncludeExternalMacros) const {
310  if (IncludeExternalMacros && ExternalSource &&
311  !ReadMacrosFromExternalSource) {
312  ReadMacrosFromExternalSource = true;
313  ExternalSource->ReadDefinedMacros();
314  }
315 
316  return CurSubmoduleState->Macros.end();
317 }
318 
319 /// \brief Compares macro tokens with a specified token value sequence.
320 static bool MacroDefinitionEquals(const MacroInfo *MI,
322  return Tokens.size() == MI->getNumTokens() &&
323  std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
324 }
325 
327  SourceLocation Loc,
329  SourceLocation BestLocation;
330  StringRef BestSpelling;
332  I != E; ++I) {
334  Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
335  if (!Def || !Def.getMacroInfo())
336  continue;
337  if (!Def.getMacroInfo()->isObjectLike())
338  continue;
340  continue;
341  SourceLocation Location = Def.getLocation();
342  // Choose the macro defined latest.
343  if (BestLocation.isInvalid() ||
344  (Location.isValid() &&
345  SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
346  BestLocation = Location;
347  BestSpelling = I->first->getName();
348  }
349  }
350  return BestSpelling;
351 }
352 
354  if (CurLexer)
355  CurLexerKind = CLK_Lexer;
356  else if (CurPTHLexer)
357  CurLexerKind = CLK_PTHLexer;
358  else if (CurTokenLexer)
359  CurLexerKind = CLK_TokenLexer;
360  else
361  CurLexerKind = CLK_CachingLexer;
362 }
363 
365  unsigned CompleteLine,
366  unsigned CompleteColumn) {
367  assert(File);
368  assert(CompleteLine && CompleteColumn && "Starts from 1:1");
369  assert(!CodeCompletionFile && "Already set");
370 
371  using llvm::MemoryBuffer;
372 
373  // Load the actual file's contents.
374  bool Invalid = false;
375  const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
376  if (Invalid)
377  return true;
378 
379  // Find the byte position of the truncation point.
380  const char *Position = Buffer->getBufferStart();
381  for (unsigned Line = 1; Line < CompleteLine; ++Line) {
382  for (; *Position; ++Position) {
383  if (*Position != '\r' && *Position != '\n')
384  continue;
385 
386  // Eat \r\n or \n\r as a single line.
387  if ((Position[1] == '\r' || Position[1] == '\n') &&
388  Position[0] != Position[1])
389  ++Position;
390  ++Position;
391  break;
392  }
393  }
394 
395  Position += CompleteColumn - 1;
396 
397  // If pointing inside the preamble, adjust the position at the beginning of
398  // the file after the preamble.
399  if (SkipMainFilePreamble.first &&
401  if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
402  Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
403  }
404 
405  if (Position > Buffer->getBufferEnd())
406  Position = Buffer->getBufferEnd();
407 
408  CodeCompletionFile = File;
409  CodeCompletionOffset = Position - Buffer->getBufferStart();
410 
411  std::unique_ptr<MemoryBuffer> NewBuffer =
412  MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
413  Buffer->getBufferIdentifier());
414  char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
415  char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
416  *NewPos = '\0';
417  std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
418  SourceMgr.overrideFileContents(File, std::move(NewBuffer));
419 
420  return false;
421 }
422 
424  if (CodeComplete)
425  CodeComplete->CodeCompleteNaturalLanguage();
427 }
428 
429 /// getSpelling - This method is used to get the spelling of a token into a
430 /// SmallVector. Note that the returned StringRef may not point to the
431 /// supplied buffer if a copy can be avoided.
432 StringRef Preprocessor::getSpelling(const Token &Tok,
433  SmallVectorImpl<char> &Buffer,
434  bool *Invalid) const {
435  // NOTE: this has to be checked *before* testing for an IdentifierInfo.
436  if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
437  // Try the fast path.
438  if (const IdentifierInfo *II = Tok.getIdentifierInfo())
439  return II->getName();
440  }
441 
442  // Resize the buffer if we need to copy into it.
443  if (Tok.needsCleaning())
444  Buffer.resize(Tok.getLength());
445 
446  const char *Ptr = Buffer.data();
447  unsigned Len = getSpelling(Tok, Ptr, Invalid);
448  return StringRef(Ptr, Len);
449 }
450 
451 /// CreateString - Plop the specified string into a scratch buffer and return a
452 /// location for it. If specified, the source location provides a source
453 /// location for the token.
454 void Preprocessor::CreateString(StringRef Str, Token &Tok,
455  SourceLocation ExpansionLocStart,
456  SourceLocation ExpansionLocEnd) {
457  Tok.setLength(Str.size());
458 
459  const char *DestPtr;
460  SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
461 
462  if (ExpansionLocStart.isValid())
463  Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
464  ExpansionLocEnd, Str.size());
465  Tok.setLocation(Loc);
466 
467  // If this is a raw identifier or a literal token, set the pointer data.
468  if (Tok.is(tok::raw_identifier))
469  Tok.setRawIdentifierData(DestPtr);
470  else if (Tok.isLiteral())
471  Tok.setLiteralData(DestPtr);
472 }
473 
475  if (getLangOpts().CurrentModule.empty())
476  return nullptr;
477 
478  return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
479 }
480 
481 //===----------------------------------------------------------------------===//
482 // Preprocessor Initialization Methods
483 //===----------------------------------------------------------------------===//
484 
485 
486 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
487 /// which implicitly adds the builtin defines etc.
489  // We do not allow the preprocessor to reenter the main file. Doing so will
490  // cause FileID's to accumulate information from both runs (e.g. #line
491  // information) and predefined macros aren't guaranteed to be set properly.
492  assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
493  FileID MainFileID = SourceMgr.getMainFileID();
494 
495  // If MainFileID is loaded it means we loaded an AST file, no need to enter
496  // a main file.
497  if (!SourceMgr.isLoadedFileID(MainFileID)) {
498  // Enter the main file source buffer.
499  EnterSourceFile(MainFileID, nullptr, SourceLocation());
500 
501  // If we've been asked to skip bytes in the main file (e.g., as part of a
502  // precompiled preamble), do so now.
503  if (SkipMainFilePreamble.first > 0)
504  CurLexer->SkipBytes(SkipMainFilePreamble.first,
505  SkipMainFilePreamble.second);
506 
507  // Tell the header info that the main file was entered. If the file is later
508  // #imported, it won't be re-entered.
509  if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
510  HeaderInfo.IncrementIncludeCount(FE);
511  }
512 
513  // Preprocess Predefines to populate the initial preprocessor state.
514  std::unique_ptr<llvm::MemoryBuffer> SB =
515  llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
516  assert(SB && "Cannot create predefined source buffer");
517  FileID FID = SourceMgr.createFileID(std::move(SB));
518  assert(!FID.isInvalid() && "Could not create FileID for predefines?");
519  setPredefinesFileID(FID);
520 
521  // Start parsing the predefines.
522  EnterSourceFile(FID, nullptr, SourceLocation());
523 }
524 
526  // Notify the client that we reached the end of the source file.
527  if (Callbacks)
528  Callbacks->EndOfMainFile();
529 }
530 
531 //===----------------------------------------------------------------------===//
532 // Lexer Event Handling.
533 //===----------------------------------------------------------------------===//
534 
535 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
536 /// identifier information for the token and install it into the token,
537 /// updating the token kind accordingly.
539  assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
540 
541  // Look up this token, see if it is a macro, or if it is a language keyword.
542  IdentifierInfo *II;
543  if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
544  // No cleaning needed, just use the characters from the lexed buffer.
545  II = getIdentifierInfo(Identifier.getRawIdentifier());
546  } else {
547  // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
548  SmallString<64> IdentifierBuffer;
549  StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
550 
551  if (Identifier.hasUCN()) {
552  SmallString<64> UCNIdentifierBuffer;
553  expandUCNs(UCNIdentifierBuffer, CleanedStr);
554  II = getIdentifierInfo(UCNIdentifierBuffer);
555  } else {
556  II = getIdentifierInfo(CleanedStr);
557  }
558  }
559 
560  // Update the token info (identifier info and appropriate token kind).
561  Identifier.setIdentifierInfo(II);
562  Identifier.setKind(II->getTokenID());
563 
564  return II;
565 }
566 
567 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
568  PoisonReasons[II] = DiagID;
569 }
570 
572  assert(Ident__exception_code && Ident__exception_info);
573  assert(Ident___exception_code && Ident___exception_info);
574  Ident__exception_code->setIsPoisoned(Poison);
575  Ident___exception_code->setIsPoisoned(Poison);
576  Ident_GetExceptionCode->setIsPoisoned(Poison);
577  Ident__exception_info->setIsPoisoned(Poison);
578  Ident___exception_info->setIsPoisoned(Poison);
579  Ident_GetExceptionInfo->setIsPoisoned(Poison);
580  Ident__abnormal_termination->setIsPoisoned(Poison);
581  Ident___abnormal_termination->setIsPoisoned(Poison);
582  Ident_AbnormalTermination->setIsPoisoned(Poison);
583 }
584 
586  assert(Identifier.getIdentifierInfo() &&
587  "Can't handle identifiers without identifier info!");
588  llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
589  PoisonReasons.find(Identifier.getIdentifierInfo());
590  if(it == PoisonReasons.end())
591  Diag(Identifier, diag::err_pp_used_poisoned_id);
592  else
593  Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
594 }
595 
596 /// \brief Returns a diagnostic message kind for reporting a future keyword as
597 /// appropriate for the identifier and specified language.
599  const LangOptions &LangOpts) {
600  assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
601 
602  if (LangOpts.CPlusPlus)
603  return llvm::StringSwitch<diag::kind>(II.getName())
604 #define CXX11_KEYWORD(NAME, FLAGS) \
605  .Case(#NAME, diag::warn_cxx11_keyword)
606 #include "clang/Basic/TokenKinds.def"
607  ;
608 
609  llvm_unreachable(
610  "Keyword not known to come from a newer Standard or proposed Standard");
611 }
612 
613 /// HandleIdentifier - This callback is invoked when the lexer reads an
614 /// identifier. This callback looks up the identifier in the map and/or
615 /// potentially macro expands it or turns it into a named token (like 'for').
616 ///
617 /// Note that callers of this method are guarded by checking the
618 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
619 /// IdentifierInfo methods that compute these properties will need to change to
620 /// match.
622  assert(Identifier.getIdentifierInfo() &&
623  "Can't handle identifiers without identifier info!");
624 
625  IdentifierInfo &II = *Identifier.getIdentifierInfo();
626 
627  // If the information about this identifier is out of date, update it from
628  // the external source.
629  // We have to treat __VA_ARGS__ in a special way, since it gets
630  // serialized with isPoisoned = true, but our preprocessor may have
631  // unpoisoned it if we're defining a C99 macro.
632  if (II.isOutOfDate()) {
633  bool CurrentIsPoisoned = false;
634  if (&II == Ident__VA_ARGS__)
635  CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
636 
637  ExternalSource->updateOutOfDateIdentifier(II);
638  Identifier.setKind(II.getTokenID());
639 
640  if (&II == Ident__VA_ARGS__)
641  II.setIsPoisoned(CurrentIsPoisoned);
642  }
643 
644  // If this identifier was poisoned, and if it was not produced from a macro
645  // expansion, emit an error.
646  if (II.isPoisoned() && CurPPLexer) {
647  HandlePoisonedIdentifier(Identifier);
648  }
649 
650  // If this is a macro to be expanded, do it.
651  if (MacroDefinition MD = getMacroDefinition(&II)) {
652  auto *MI = MD.getMacroInfo();
653  assert(MI && "macro definition with no macro info?");
654  if (!DisableMacroExpansion) {
655  if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
656  // C99 6.10.3p10: If the preprocessing token immediately after the
657  // macro name isn't a '(', this macro should not be expanded.
658  if (!MI->isFunctionLike() || isNextPPTokenLParen())
659  return HandleMacroExpandedIdentifier(Identifier, MD);
660  } else {
661  // C99 6.10.3.4p2 says that a disabled macro may never again be
662  // expanded, even if it's in a context where it could be expanded in the
663  // future.
664  Identifier.setFlag(Token::DisableExpand);
665  if (MI->isObjectLike() || isNextPPTokenLParen())
666  Diag(Identifier, diag::pp_disabled_macro_expansion);
667  }
668  }
669  }
670 
671  // If this identifier is a keyword in a newer Standard or proposed Standard,
672  // produce a warning. Don't warn if we're not considering macro expansion,
673  // since this identifier might be the name of a macro.
674  // FIXME: This warning is disabled in cases where it shouldn't be, like
675  // "#define constexpr constexpr", "int constexpr;"
676  if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
677  Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
678  << II.getName();
679  // Don't diagnose this keyword again in this translation unit.
680  II.setIsFutureCompatKeyword(false);
681  }
682 
683  // C++ 2.11p2: If this is an alternative representation of a C++ operator,
684  // then we act as if it is the actual operator and not the textual
685  // representation of it.
686  if (II.isCPlusPlusOperatorKeyword())
687  Identifier.setIdentifierInfo(nullptr);
688 
689  // If this is an extension token, diagnose its use.
690  // We avoid diagnosing tokens that originate from macro definitions.
691  // FIXME: This warning is disabled in cases where it shouldn't be,
692  // like "#define TY typeof", "TY(1) x".
693  if (II.isExtensionToken() && !DisableMacroExpansion)
694  Diag(Identifier, diag::ext_token_used);
695 
696  // If this is the 'import' contextual keyword following an '@', note
697  // that the next token indicates a module name.
698  //
699  // Note that we do not treat 'import' as a contextual
700  // keyword when we're in a caching lexer, because caching lexers only get
701  // used in contexts where import declarations are disallowed.
702  if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
703  !DisableMacroExpansion &&
704  (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
705  CurLexerKind != CLK_CachingLexer) {
706  ModuleImportLoc = Identifier.getLocation();
707  ModuleImportPath.clear();
708  ModuleImportExpectsIdentifier = true;
709  CurLexerKind = CLK_LexAfterModuleImport;
710  }
711  return true;
712 }
713 
714 void Preprocessor::Lex(Token &Result) {
715  // We loop here until a lex function retuns a token; this avoids recursion.
716  bool ReturnedToken;
717  do {
718  switch (CurLexerKind) {
719  case CLK_Lexer:
720  ReturnedToken = CurLexer->Lex(Result);
721  break;
722  case CLK_PTHLexer:
723  ReturnedToken = CurPTHLexer->Lex(Result);
724  break;
725  case CLK_TokenLexer:
726  ReturnedToken = CurTokenLexer->Lex(Result);
727  break;
728  case CLK_CachingLexer:
729  CachingLex(Result);
730  ReturnedToken = true;
731  break;
732  case CLK_LexAfterModuleImport:
733  LexAfterModuleImport(Result);
734  ReturnedToken = true;
735  break;
736  }
737  } while (!ReturnedToken);
738 
739  LastTokenWasAt = Result.is(tok::at);
740 }
741 
742 
743 /// \brief Lex a token following the 'import' contextual keyword.
744 ///
746  // Figure out what kind of lexer we actually have.
748 
749  // Lex the next token.
750  Lex(Result);
751 
752  // The token sequence
753  //
754  // import identifier (. identifier)*
755  //
756  // indicates a module import directive. We already saw the 'import'
757  // contextual keyword, so now we're looking for the identifiers.
758  if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
759  // We expected to see an identifier here, and we did; continue handling
760  // identifiers.
761  ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
762  Result.getLocation()));
763  ModuleImportExpectsIdentifier = false;
764  CurLexerKind = CLK_LexAfterModuleImport;
765  return;
766  }
767 
768  // If we're expecting a '.' or a ';', and we got a '.', then wait until we
769  // see the next identifier.
770  if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
771  ModuleImportExpectsIdentifier = true;
772  CurLexerKind = CLK_LexAfterModuleImport;
773  return;
774  }
775 
776  // If we have a non-empty module path, load the named module.
777  if (!ModuleImportPath.empty()) {
778  Module *Imported = nullptr;
779  if (getLangOpts().Modules) {
780  Imported = TheModuleLoader.loadModule(ModuleImportLoc,
781  ModuleImportPath,
783  /*IsIncludeDirective=*/false);
784  if (Imported)
785  makeModuleVisible(Imported, ModuleImportLoc);
786  }
787  if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
788  Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
789  }
790 }
791 
793  CurSubmoduleState->VisibleModules.setVisible(
794  M, Loc, [](Module *) {},
795  [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
796  // FIXME: Include the path in the diagnostic.
797  // FIXME: Include the import location for the conflicting module.
798  Diag(ModuleImportLoc, diag::warn_module_conflict)
799  << Path[0]->getFullModuleName()
800  << Conflict->getFullModuleName()
801  << Message;
802  });
803 
804  // Add this module to the imports list of the currently-built submodule.
805  if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
806  BuildingSubmoduleStack.back().M->Imports.insert(M);
807 }
808 
809 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
810  const char *DiagnosticTag,
811  bool AllowMacroExpansion) {
812  // We need at least one string literal.
813  if (Result.isNot(tok::string_literal)) {
814  Diag(Result, diag::err_expected_string_literal)
815  << /*Source='in...'*/0 << DiagnosticTag;
816  return false;
817  }
818 
819  // Lex string literal tokens, optionally with macro expansion.
820  SmallVector<Token, 4> StrToks;
821  do {
822  StrToks.push_back(Result);
823 
824  if (Result.hasUDSuffix())
825  Diag(Result, diag::err_invalid_string_udl);
826 
827  if (AllowMacroExpansion)
828  Lex(Result);
829  else
830  LexUnexpandedToken(Result);
831  } while (Result.is(tok::string_literal));
832 
833  // Concatenate and parse the strings.
834  StringLiteralParser Literal(StrToks, *this);
835  assert(Literal.isAscii() && "Didn't allow wide strings in");
836 
837  if (Literal.hadError)
838  return false;
839 
840  if (Literal.Pascal) {
841  Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
842  << /*Source='in...'*/0 << DiagnosticTag;
843  return false;
844  }
845 
846  String = Literal.GetString();
847  return true;
848 }
849 
851  assert(Tok.is(tok::numeric_constant));
852  SmallString<8> IntegerBuffer;
853  bool NumberInvalid = false;
854  StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
855  if (NumberInvalid)
856  return false;
857  NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
858  if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
859  return false;
860  llvm::APInt APVal(64, 0);
861  if (Literal.GetIntegerValue(APVal))
862  return false;
863  Lex(Tok);
864  Value = APVal.getLimitedValue();
865  return true;
866 }
867 
869  assert(Handler && "NULL comment handler");
870  assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
871  CommentHandlers.end() && "Comment handler already registered");
872  CommentHandlers.push_back(Handler);
873 }
874 
876  std::vector<CommentHandler *>::iterator Pos
877  = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
878  assert(Pos != CommentHandlers.end() && "Comment handler not registered");
879  CommentHandlers.erase(Pos);
880 }
881 
883  bool AnyPendingTokens = false;
884  for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
885  HEnd = CommentHandlers.end();
886  H != HEnd; ++H) {
887  if ((*H)->HandleComment(*this, Comment))
888  AnyPendingTokens = true;
889  }
890  if (!AnyPendingTokens || getCommentRetentionState())
891  return false;
892  Lex(result);
893  return true;
894 }
895 
897 
899 
901 
903  if (Record)
904  return;
905 
906  Record = new PreprocessingRecord(getSourceManager());
907  addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
908 }
StringRef getLastMacroWithSpelling(SourceLocation Loc, ArrayRef< TokenValue > Tokens) const
Return the name of the macro defined before Loc that has spelling Tokens. If there are multiple macro...
bool isAtStartOfLine() const
Definition: Token.h:261
SourceManager & getSourceManager() const
Definition: Preprocessor.h:682
bool isPoisoned() const
Return true if this token has been poisoned.
int Position
void FinalizeForModelFile()
Cleanup after model file parsing.
bool isLoadedFileID(FileID FID) const
Returns true if FID came from a PCH/Module.
Defines the clang::FileManager interface and associated types.
Defines the SourceManager interface.
IdentifierInfo * getIdentifierInfo(StringRef Name) const
Definition: Preprocessor.h:922
static const Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:22
Module * getCurrentModule()
Retrieves the module that we're currently building, if any.
Defines the FileSystemStatCache interface.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer...
void EndSourceFile()
Inform the preprocessor callbacks that processing is complete.
bool isObjectLike() const
Definition: MacroInfo.h:198
virtual void CodeCompleteNaturalLanguage()
Callback invoked when performing code completion in a part of the file where we expect natural langua...
Defines the clang::MacroInfo and clang::MacroDirective classes.
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:265
A description of the current definition of a macro.
Definition: MacroInfo.h:564
void LexAfterModuleImport(Token &Result)
Lex a token following the 'import' contextual keyword.
bool hasUCN() const
Returns true if this token contains a universal character name.
Definition: Token.h:293
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition: Token.h:234
macro_iterator macro_begin(bool IncludeExternalMacros=true) const
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:280
MacroMap::const_iterator macro_iterator
Definition: Preprocessor.h:897
void setCodeCompletionReached()
Note that we hit the code-completion point.
void createPreprocessingRecord()
Create a new preprocessing record, which will keep track of all macro expansions, macro definitions...
bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value)
Parses a simple integer literal to get its numeric value. Floating point literals and user defined li...
void setPTHManager(PTHManager *pm)
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
void DumpToken(const Token &Tok, bool DumpFlags=false) const
Print the token to stderr, used for debugging.
void SetPoisonReason(IdentifierInfo *II, unsigned DiagID)
Specifies the reason for poisoning an identifier.
Represents a macro directive exported by a module.
Definition: MacroInfo.h:498
void setRawIdentifierData(const char *Ptr)
Definition: Token.h:207
SourceLocation getLocation() const
Definition: MacroInfo.h:472
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:679
void setTarget(const TargetInfo &Target)
Set the target information for the header search, if not already known.
void setKind(tok::TokenKind K)
Definition: Token.h:91
void removeCommentHandler(CommentHandler *Handler)
Remove the specified comment handler.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:48
Describes a module or submodule.
Definition: Basic/Module.h:49
bool SetCodeCompletionPoint(const FileEntry *File, unsigned Line, unsigned Column)
Specify the point at which code-completion will be performed.
A record of the steps taken while preprocessing a source file, including the various preprocessing di...
bool getCommentRetentionState() const
Definition: Preprocessor.h:721
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:135
HeaderSearch & getHeaderSearchInfo() const
Definition: Preprocessor.h:683
bool isFutureCompatKeyword() const
tokens_iterator tokens_begin() const
Definition: MacroInfo.h:240
void dump(const SourceManager &SM) const
tok::TokenKind getKind() const
Definition: Token.h:90
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
bool isInvalid() const
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
void recomputeCurLexerKind()
Recompute the current lexer kind based on the CurLexer/CurPTHLexer/ CurTokenLexer pointers...
Encapsulates the information needed to find the file referenced by a #include or #include_next, (sub-)framework lookup, etc.
Definition: HeaderSearch.h:151
StringRef getRawIdentifier() const
Definition: Token.h:203
const FileEntry * getFileEntryForID(FileID FID) const
Returns the FileEntry record for the provided FileID.
Provides lookups to, and iteration over, IdentiferInfo objects.
SourceManager & SM
Exposes information about the current target.
StringRef getName() const
Return the actual identifier string.
void makeModuleVisible(Module *M, SourceLocation Loc)
bool isBeforeInTranslationUnit(SourceLocation LHS, SourceLocation RHS) const
Determines the order of 2 source locations in the translation unit.
FileID createFileID(const FileEntry *SourceFile, SourceLocation IncludePos, SrcMgr::CharacteristicKind FileCharacter, int LoadedID=0, unsigned LoadedOffset=0)
Create a new FileID that represents the specified file being #included from the specified IncludePosi...
virtual void ReadDefinedMacros()=0
Read the set of macros defined by this external macro source.
SourceManager & SourceMgr
Definition: Format.cpp:1205
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc...
Defines the clang::Preprocessor interface.
bool hasUDSuffix() const
Return true if this token is a string or character literal which has a ud-suffix. ...
Definition: Token.h:290
void setIsPoisoned(bool Value=true)
static bool MacroDefinitionEquals(const MacroInfo *MI, ArrayRef< TokenValue > Tokens)
Compares macro tokens with a specified token value sequence.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:124
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
unsigned getNumTokens() const
Return the number of tokens that this macro expands to.
Definition: MacroInfo.h:232
SourceLocation createExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLength, int LoadedID=0, unsigned LoadedOffset=0)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
void IncrementIncludeCount(const FileEntry *File)
Increment the count for the number of times the specified FileEntry has been entered.
Definition: HeaderSearch.h:455
Preprocessor(IntrusiveRefCntPtr< PreprocessorOptions > PPOpts, DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM, HeaderSearch &Headers, ModuleLoader &TheModuleLoader, IdentifierInfoLookup *IILookup=nullptr, bool OwnsHeaderSearch=false, TranslationUnitKind TUKind=TU_Complete)
Module * lookupModule(StringRef ModuleName, bool AllowSearch=true)
Lookup a module Search for a module with the given name.
void overrideFileContents(const FileEntry *SourceFile, llvm::MemoryBuffer *Buffer, bool DoNotFree)
Override the contents of the given source file by providing an already-allocated buffer.
size_t getTotalMemory() const
#define false
Definition: stdbool.h:33
void addStatCache(std::unique_ptr< FileSystemStatCache > statCache, bool AtBeginning=false)
Installs the provided FileSystemStatCache object within the FileManager.
Definition: FileManager.cpp:68
Encodes a location in the source. The SourceManager can decode this to get at the full include stack...
void setLength(unsigned Len)
Definition: Token.h:133
AnnotatedLine & Line
StringRef GetString() const
bool isValid() const
Return true if this is a valid SourceLocation object.
MacroDefinition getMacroDefinition(const IdentifierInfo *II)
Definition: Preprocessor.h:805
macro_iterator macro_end(bool IncludeExternalMacros=true) const
All of the names in this module are hidden.
Definition: Basic/Module.h:207
Cached information about one file (either on disk or in the virtual file system). ...
Definition: FileManager.h:53
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:186
void Lex(Token &Result)
Lex the next token for this preprocessor.
llvm::MemoryBuffer * getMemoryBufferForFile(const FileEntry *File, bool *Invalid=nullptr)
Retrieve the memory buffer associated with the given file.
SmallVector< FormatToken *, 16 > Tokens
Definition: Format.cpp:1214
const Token & getReplacementToken(unsigned Tok) const
Definition: MacroInfo.h:234
FileID getMainFileID() const
Returns the FileID of the main source file.
bool is(tok::TokenKind K) const
Definition: Token.h:95
void expandUCNs(SmallVectorImpl< char > &Buf, StringRef Input)
Copy characters from Input to Buf, expanding any UCNs.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, const LangOptions &LangOpts)
Returns a diagnostic message kind for reporting a future keyword as appropriate for the identifier an...
void addCommentHandler(CommentHandler *Handler)
Add the specified comment handler to the preprocessor.
virtual ModuleLoadResult loadModule(SourceLocation ImportLoc, ModuleIdPath Path, Module::NameVisibilityKind Visibility, bool IsInclusionDirective)=0
Attempt to load the given module.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
Abstract interface for a module loader.
Definition: ModuleLoader.h:56
void PoisonSEHIdentifiers(bool Poison=true)
void setLiteralData(const char *Ptr)
Definition: Token.h:219
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
Encapsulates the data about a macro definition (e.g. its tokens).
Definition: MacroInfo.h:34
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
bool FinishLexStringLiteral(Token &Result, std::string &String, const char *DiagnosticTag, bool AllowMacroExpansion)
Complete the lexing of a string literal where the first token has already been lexed (see LexStringLi...
bool isInvalid() const
virtual void updateOutOfDateIdentifier(IdentifierInfo &II)=0
Update an out-of-date identifier.
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
void InitializeForModelFile()
Initialize the preprocessor to parse a model file.
void HandlePoisonedIdentifier(Token &Tok)
Display reason for poisoned identifier.
#define CXX11_KEYWORD(NAME, FLAGS)
bool HandleComment(Token &Token, SourceRange Comment)
void DumpMacro(const MacroInfo &MI) const
TranslationUnitKind
Describes the kind of translation unit being processed.
Definition: LangOptions.h:163
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:43
void Initialize(const TargetInfo &Target)
Initialize the preprocessor using information about the target.
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
Definition: TokenKinds.cpp:25
Defines the clang::TargetInfo interface.
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Abstract base class that describes a handler that will receive source ranges for each of the comments...
unsigned getLength() const
Definition: Token.h:127
void DumpLocation(SourceLocation Loc) const
void setLocation(SourceLocation L)
Definition: Token.h:132
#define true
Definition: stdbool.h:32
A trivial tuple used to represent a source range.
bool isExpandDisabled() const
Return true if this identifier token should never be expanded in the future, due to C99 6...
Definition: Token.h:269
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
Definition: Preprocessor.h:773
This class handles loading and caching of source files into memory.
bool EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir, SourceLocation Loc)
Add a source file to the top of the include stack and start lexing tokens from it instead of the curr...
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177