14 #ifndef LLVM_CLANG_LEX_LEXER_H
15 #define LLVM_CLANG_LEX_LEXER_H
19 #include "llvm/ADT/SmallVector.h"
24 class DiagnosticsEngine;
27 class DiagnosticBuilder;
47 void anchor()
override;
51 const char *BufferStart;
52 const char *BufferEnd;
68 unsigned char ExtendedTokenMode;
77 const char *BufferPtr;
83 bool IsAtPhysicalStartOfLine;
87 bool HasLeadingEmptyMacro;
93 void operator=(
const Lexer &) =
delete;
96 void InitLexer(
const char *BufStart,
const char *BufPtr,
const char *BufEnd);
109 const char *BufStart,
const char *BufPtr,
const char *BufEnd);
159 return BufferPtr == BufferEnd;
167 return ExtendedTokenMode > 1;
174 "Can only retain whitespace in raw mode or -traditional-cpp");
175 ExtendedTokenMode = Val ? 2 : 0;
181 return ExtendedTokenMode > 0;
189 "Can't play with comment retention state when retaining whitespace");
190 ExtendedTokenMode = Mode ? 1 : 0;
203 return StringRef(BufferStart, BufferEnd - BufferStart);
231 static std::string
Stringify(StringRef Str,
bool Charify =
false);
251 bool *Invalid =
nullptr);
261 bool *Invalid =
nullptr);
275 bool *invalid =
nullptr);
290 bool IgnoreWhiteSpace =
false);
400 bool *Invalid =
nullptr);
430 unsigned MaxLines = 0);
441 bool SkipTrailingWhitespaceAndNewLine);
452 if (isObviouslySimpleCharacter(Ptr[0])) {
458 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
468 bool LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine);
470 bool CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
const char *CurPtr);
475 bool LexUnicode(
Token &
Result, uint32_t
C,
const char *CurPtr);
482 void FormTokenWithChars(
Token &
Result,
const char *TokEnd,
484 unsigned TokLen = TokEnd-BufferPtr;
494 unsigned isNextPPTokenLParen();
518 static bool isObviouslySimpleCharacter(
char C) {
519 return C !=
'?' && C !=
'\\';
526 inline char getAndAdvanceChar(
const char *&Ptr,
Token &Tok) {
529 if (isObviouslySimpleCharacter(Ptr[0]))
return *Ptr++;
532 char C = getCharAndSizeSlow(Ptr, Size, &Tok);
541 const char *ConsumeChar(
const char *Ptr,
unsigned Size,
Token &Tok) {
549 getCharAndSizeSlow(Ptr, Size, &Tok);
557 inline char getCharAndSize(
const char *Ptr,
unsigned &Size) {
560 if (isObviouslySimpleCharacter(Ptr[0])) {
566 return getCharAndSizeSlow(Ptr, Size);
571 char getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
572 Token *Tok =
nullptr);
577 static unsigned getEscapedNewLineSize(
const char *
P);
582 static const char *SkipEscapedNewLines(
const char *
P);
586 static char getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
587 const LangOptions &LangOpts);
592 void SkipBytes(
unsigned Bytes,
bool StartOfLine);
594 void PropagateLineStartLeadingSpaceInfo(
Token &Result);
596 const char *LexUDSuffix(
Token &Result,
const char *CurPtr,
597 bool IsStringLiteral);
600 bool LexIdentifier (
Token &Result,
const char *CurPtr);
601 bool LexNumericConstant (
Token &Result,
const char *CurPtr);
602 bool LexStringLiteral (
Token &Result,
const char *CurPtr,
604 bool LexRawStringLiteral (
Token &Result,
const char *CurPtr,
606 bool LexAngledStringLiteral(
Token &Result,
const char *CurPtr);
607 bool LexCharConstant (
Token &Result,
const char *CurPtr,
609 bool LexEndOfFile (
Token &Result,
const char *CurPtr);
610 bool SkipWhitespace (
Token &Result,
const char *CurPtr,
611 bool &TokAtPhysicalStartOfLine);
612 bool SkipLineComment (
Token &Result,
const char *CurPtr,
613 bool &TokAtPhysicalStartOfLine);
614 bool SkipBlockComment (
Token &Result,
const char *CurPtr,
615 bool &TokAtPhysicalStartOfLine);
616 bool SaveLineComment (
Token &Result,
const char *CurPtr);
618 bool IsStartOfConflictMarker(
const char *CurPtr);
619 bool HandleEndOfConflictMarker(
const char *CurPtr);
621 bool isCodeCompletionPoint(
const char *CurPtr)
const;
622 void cutOffLexing() { BufferPtr = BufferEnd; }
624 bool isHexaLiteral(
const char *Start,
const LangOptions &LangOpts);
639 uint32_t tryReadUCN(
const char *&CurPtr,
const char *SlashLoc,
Token *Tok);
652 bool tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
660 bool tryConsumeIdentifierUTF8Char(
const char *&CurPtr);
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
SourceLocation getEnd() const
static std::pair< unsigned, bool > ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
bool LexFromRawLexer(Token &Result)
StringRef getBuffer() const
Gets source code buffer.
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion...
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
void setKind(tok::TokenKind K)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
void resetExtendedTokenMode()
SourceLocation getSourceLocation() override
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Character, const SourceManager &SM, const LangOptions &LangOpts)
A little helper class used to produce diagnostics.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
Defines the clang::LangOptions interface.
bool LexingRawMode
True if in raw mode.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Represents a character-granular source range.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
The result type of a method or function.
static CharSourceRange getCharRange(SourceRange R)
const SourceRange & getAsRange() const
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token. Return false if the end o...
Encodes a location in the source. The SourceManager can decode this to get at the full include stack...
void setLength(unsigned Len)
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
SourceLocation getBegin() const
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool inKeepCommentMode() const
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Given a token range, produce a corresponding CharSourceRange that is not a token range. This allows the source range to be used by components that don't have access to the lexer and thus can't find the end of the range for themselves.
static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
const LangOptions & getLangOpts() const
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
Not within a conflict marker.
void SetCommentRetentionState(bool Mode)
bool isKeepWhitespaceMode() const
void setLocation(SourceLocation L)
A trivial tuple used to represent a source range.
Defines the PreprocessorLexer interface.
void SetKeepWhitespaceMode(bool Val)
This class handles loading and caching of source files into memory.
const char * getBufferLocation() const
Return the current location in the buffer.
SourceLocation getFileLoc() const
static std::string Stringify(StringRef Str, bool Charify=false)
Engages in a tight little dance with the lexer to efficiently preprocess tokens.