22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/Support/Compiler.h"
26 #include "llvm/Support/ConvertUTF.h"
27 #include "llvm/Support/MemoryBuffer.h"
29 using namespace clang;
38 return II->getObjCKeywordID() == objcKey;
53 void Lexer::anchor() { }
55 void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
57 BufferStart = BufStart;
61 assert(BufEnd[0] == 0 &&
62 "We assume that the input buffer has a null character at the end"
63 " to simplify lexing!");
68 if (BufferStart == BufferPtr) {
70 StringRef Buf(BufferStart, BufferEnd - BufferStart);
71 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
72 .StartsWith(
"\xEF\xBB\xBF", 3)
76 BufferPtr += BOMLength;
79 Is_PragmaLexer =
false;
80 CurrentConflictMarkerState =
CMK_None;
83 IsAtStartOfLine =
true;
84 IsAtPhysicalStartOfLine =
true;
86 HasLeadingSpace =
false;
87 HasLeadingEmptyMacro =
false;
102 ExtendedTokenMode = 0;
111 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
112 LangOpts(PP.getLangOpts()) {
114 InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
115 InputFile->getBufferEnd());
121 assert(
PP &&
"Cannot reset token mode without a preprocessor");
122 if (LangOpts.TraditionalCPP)
132 const char *BufStart,
const char *BufPtr,
const char *BufEnd)
133 : FileLoc(fileloc), LangOpts(langOpts) {
135 InitLexer(BufStart, BufPtr, BufEnd);
144 Lexer::Lexer(
FileID FID,
const llvm::MemoryBuffer *FromFile,
146 :
Lexer(SM.getLocForStartOfFile(FID), langOpts, FromFile->getBufferStart(),
147 FromFile->getBufferStart(), FromFile->getBufferEnd()) {}
172 const llvm::MemoryBuffer *InputFile = SM.
getBuffer(SpellingFID);
173 Lexer *L =
new Lexer(SpellingFID, InputFile, PP);
180 L->BufferPtr = StrData;
181 L->BufferEnd = StrData+TokLen;
182 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
188 ExpansionLocEnd, TokLen);
195 L->Is_PragmaLexer =
true;
204 char Quote = Charify ?
'\'' :
'"';
205 for (
unsigned i = 0, e = Result.size(); i != e; ++i) {
206 if (Result[i] ==
'\\' || Result[i] == Quote) {
207 Result.insert(Result.begin()+i,
'\\');
217 for (
unsigned i = 0, e = Str.size(); i != e; ++i) {
218 if (Str[i] ==
'\\' || Str[i] ==
'"') {
219 Str.insert(Str.begin()+i,
'\\');
233 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
236 const char *BufEnd = BufPtr + Tok.
getLength();
240 while (BufPtr < BufEnd) {
245 if (Spelling[Length - 1] ==
'"')
253 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
256 const char *RawEnd = BufEnd;
257 do --RawEnd;
while (*RawEnd !=
'"');
258 size_t RawLength = RawEnd - BufPtr + 1;
261 memcpy(Spelling + Length, BufPtr, RawLength);
269 while (BufPtr < BufEnd) {
276 "NeedsCleaning flag set on token that didn't need cleaning!");
294 bool invalidTemp =
false;
295 StringRef file = SM.
getBufferData(locInfo.first, &invalidTemp);
297 if (invalid) *invalid =
true;
301 const char *tokenBegin = file.data() + locInfo.second;
305 file.begin(), tokenBegin, file.end());
307 lexer.LexFromRawLexer(token);
312 if (!token.needsCleaning())
313 return StringRef(tokenBegin, length);
316 buffer.resize(length);
317 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
318 return StringRef(buffer.data(), buffer.size());
328 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
330 bool CharDataInvalid =
false;
334 *Invalid = CharDataInvalid;
336 return std::string();
340 return std::string(TokStart, TokStart + Tok.
getLength());
344 Result.resize(
getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin()));
361 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
363 const char *TokStart =
nullptr;
365 if (Tok.
is(tok::raw_identifier))
370 Buffer = II->getNameStart();
371 return II->getLength();
381 bool CharDataInvalid =
false;
384 *Invalid = CharDataInvalid;
385 if (CharDataInvalid) {
398 return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
420 bool IgnoreWhiteSpace) {
431 bool Invalid =
false;
436 const char *StrData = Buffer.data()+LocInfo.second;
443 Buffer.begin(), StrData, Buffer.end());
445 TheLexer.LexFromRawLexer(Result);
454 if (LocInfo.first.isInvalid())
457 bool Invalid =
false;
464 const char *BufStart = Buffer.data();
465 if (LocInfo.second >= Buffer.size())
468 const char *StrData = BufStart+LocInfo.second;
469 if (StrData[0] ==
'\n' || StrData[0] ==
'\r')
472 const char *LexStart = StrData;
473 while (LexStart != BufStart) {
474 if (LexStart[0] ==
'\n' || LexStart[0] ==
'\r') {
484 Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end());
490 TheLexer.LexFromRawLexer(TheTok);
492 if (TheLexer.getBufferLocation() > StrData) {
496 if (TheLexer.getBufferLocation() - TheTok.
getLength() <= StrData)
521 std::pair<FileID, unsigned> BeginFileLocInfo
523 assert(FileLocInfo.first == BeginFileLocInfo.first &&
524 FileLocInfo.second >= BeginFileLocInfo.second);
543 const unsigned StartOffset = 1;
545 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
552 bool InPreprocessorDirective =
false;
555 unsigned IfCount = 0;
558 unsigned MaxLineOffset = 0;
560 const char *CurPtr = Buffer.begin();
561 unsigned CurLine = 0;
562 while (CurPtr != Buffer.end()) {
566 if (CurLine == MaxLines)
570 if (CurPtr != Buffer.end())
571 MaxLineOffset = CurPtr - Buffer.begin();
575 TheLexer.LexFromRawLexer(TheTok);
577 if (InPreprocessorDirective) {
590 InPreprocessorDirective =
false;
599 if (MaxLineOffset && TokOffset >= MaxLineOffset)
604 if (TheTok.
getKind() == tok::comment) {
612 Token HashTok = TheTok;
613 InPreprocessorDirective =
true;
619 TheLexer.LexFromRawLexer(TheTok);
623 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
624 .Case(
"include", PDK_Skipped)
625 .Case(
"__include_macros", PDK_Skipped)
626 .Case(
"define", PDK_Skipped)
627 .Case(
"undef", PDK_Skipped)
628 .Case(
"line", PDK_Skipped)
629 .Case(
"error", PDK_Skipped)
630 .Case(
"pragma", PDK_Skipped)
631 .Case(
"import", PDK_Skipped)
632 .Case(
"include_next", PDK_Skipped)
633 .Case(
"warning", PDK_Skipped)
634 .Case(
"ident", PDK_Skipped)
635 .Case(
"sccs", PDK_Skipped)
636 .Case(
"assert", PDK_Skipped)
637 .Case(
"unassert", PDK_Skipped)
638 .Case(
"if", PDK_StartIf)
639 .Case(
"ifdef", PDK_StartIf)
640 .Case(
"ifndef", PDK_StartIf)
641 .Case(
"elif", PDK_Skipped)
642 .Case(
"else", PDK_Skipped)
643 .Case(
"endif", PDK_EndIf)
644 .Default(PDK_Unknown);
652 IfStartTok = HashTok;
674 InPreprocessorDirective =
false;
687 else if (ActiveCommentLoc.
isValid())
688 End = ActiveCommentLoc;
707 bool Invalid =
false;
711 if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
714 unsigned PhysOffset = 0;
719 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
722 ++TokPtr, --CharNo, ++PhysOffset;
727 for (; CharNo; --CharNo) {
738 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
739 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
791 if (expansionLoc.isFileID()) {
794 *MacroBegin = expansionLoc;
822 *MacroEnd = expansionLoc;
896 bool Invalid =
false;
926 if (Invalid) *Invalid =
true;
932 if (beginInfo.first.isInvalid()) {
933 if (Invalid) *Invalid =
true;
939 beginInfo.second > EndOffs) {
940 if (Invalid) *Invalid =
true;
945 bool invalidTemp =
false;
946 StringRef file = SM.
getBufferData(beginInfo.first, &invalidTemp);
948 if (Invalid) *Invalid =
true;
952 if (Invalid) *Invalid =
false;
953 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
959 assert(Loc.
isMacroID() &&
"Only reasonble to call this on macros");
999 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1000 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1020 unsigned CharNo,
unsigned TokLen) {
1021 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1035 std::pair<SourceLocation,SourceLocation> II =
1036 SM.getImmediateExpansionRange(FileLoc);
1038 return SM.createExpansionLoc(SpellingLoc, II.first, II.second, TokLen);
1044 unsigned TokLen)
const {
1045 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1046 "Location out of range for this buffer!");
1050 unsigned CharNo = Loc-BufferStart;
1056 assert(
PP &&
"This doesn't work on raw lexers");
1075 case '=':
return '#';
1076 case ')':
return ']';
1077 case '(':
return '[';
1078 case '!':
return '|';
1079 case '\'':
return '^';
1080 case '>':
return '}';
1081 case '/':
return '\\';
1082 case '<':
return '{';
1083 case '-':
return '~';
1093 if (!Res || !L)
return Res;
1097 L->
Diag(CP-2, diag::trigraph_ignored);
1102 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1109 unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1114 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1118 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1119 Ptr[Size-1] != Ptr[Size])
1132 const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1134 const char *AfterEscape;
1137 }
else if (*P ==
'?') {
1139 if (P[1] !=
'?' || P[2] !=
'/')
1146 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1147 if (NewLineSize == 0)
return P;
1148 P = AfterEscape+NewLineSize;
1160 bool SkipTrailingWhitespaceAndNewLine) {
1171 bool InvalidTemp =
false;
1172 StringRef File = SM.
getBufferData(LocInfo.first, &InvalidTemp);
1176 const char *TokenBegin = File.data() + LocInfo.second;
1180 TokenBegin, File.end());
1183 lexer.LexFromRawLexer(Tok);
1184 if (Tok.isNot(TKind))
1189 unsigned NumWhitespaceChars = 0;
1190 if (SkipTrailingWhitespaceAndNewLine) {
1193 unsigned char C = *TokenEnd;
1196 NumWhitespaceChars++;
1200 if (C ==
'\n' || C ==
'\r') {
1203 NumWhitespaceChars++;
1204 if ((C ==
'\n' || C ==
'\r') && C != PrevC)
1205 NumWhitespaceChars++;
1228 char Lexer::getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
1231 if (Ptr[0] ==
'\\') {
1240 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1246 Diag(Ptr, diag::backslash_newline_space);
1249 Size += EscapedNewLineSize;
1250 Ptr += EscapedNewLineSize;
1255 if (*Ptr ==
'\n' || *Ptr ==
'\r' || *Ptr ==
'\0')
1259 return getCharAndSizeSlow(Ptr, Size, Tok);
1267 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1276 if (
C ==
'\\')
goto Slash;
1293 char Lexer::getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
1296 if (Ptr[0] ==
'\\') {
1304 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1306 Size += EscapedNewLineSize;
1307 Ptr += EscapedNewLineSize;
1312 if (*Ptr ==
'\n' || *Ptr ==
'\r' || *Ptr ==
'\0')
1316 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
1324 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1330 if (
C ==
'\\')
goto Slash;
1345 void Lexer::SkipBytes(
unsigned Bytes,
bool StartOfLine) {
1347 if (BufferPtr > BufferEnd)
1348 BufferPtr = BufferEnd;
1352 IsAtStartOfLine = StartOfLine;
1353 IsAtPhysicalStartOfLine = StartOfLine;
1357 if (LangOpts.AsmPreprocessor) {
1359 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1360 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1362 return C11AllowedIDChars.contains(C);
1363 }
else if (LangOpts.CPlusPlus) {
1364 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1366 return CXX03AllowedIDChars.contains(C);
1368 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1370 return C99AllowedIDChars.contains(C);
1376 if (LangOpts.AsmPreprocessor) {
1378 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1379 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1381 return !C11DisallowedInitialIDChars.contains(C);
1382 }
else if (LangOpts.CPlusPlus) {
1385 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1387 return !C99DisallowedInitialIDChars.contains(C);
1402 CannotAppearInIdentifier = 0,
1403 CannotStartIdentifier
1406 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1408 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1410 if (!C99AllowedIDChars.contains(C)) {
1413 << CannotAppearInIdentifier;
1414 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) {
1417 << CannotStartIdentifier;
1423 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1425 if (!CXX03AllowedIDChars.contains(C)) {
1426 Diags.
Report(Range.
getBegin(), diag::warn_cxx98_compat_unicode_id)
1432 bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1434 const char *UCNPtr = CurPtr + Size;
1435 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1445 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1446 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1449 while (CurPtr != UCNPtr)
1450 (void)getAndAdvanceChar(CurPtr, Result);
1454 bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr) {
1455 const char *UnicodePtr = CurPtr;
1457 ConversionResult Result =
1458 llvm::convertUTF8Sequence((
const UTF8 **)&UnicodePtr,
1459 (
const UTF8 *)BufferEnd,
1462 if (Result != conversionOK ||
1471 CurPtr = UnicodePtr;
1475 bool Lexer::LexIdentifier(
Token &Result,
const char *CurPtr) {
1478 unsigned char C = *CurPtr++;
1489 if (
isASCII(C) && C !=
'\\' && C !=
'?' &&
1490 (C !=
'$' || !LangOpts.DollarIdents)) {
1492 const char *IdStart = BufferPtr;
1493 FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
1515 C = getCharAndSize(CurPtr, Size);
1519 if (!LangOpts.DollarIdents)
goto FinishIdentifier;
1523 Diag(CurPtr, diag::ext_dollar_in_identifier);
1524 CurPtr = ConsumeChar(CurPtr, Size, Result);
1525 C = getCharAndSize(CurPtr, Size);
1528 }
else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
1529 C = getCharAndSize(CurPtr, Size);
1531 }
else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
1532 C = getCharAndSize(CurPtr, Size);
1535 goto FinishIdentifier;
1539 CurPtr = ConsumeChar(CurPtr, Size, Result);
1541 C = getCharAndSize(CurPtr, Size);
1543 CurPtr = ConsumeChar(CurPtr, Size, Result);
1544 C = getCharAndSize(CurPtr, Size);
1551 bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
1557 return (C2 ==
'x' || C2 ==
'X');
1563 bool Lexer::LexNumericConstant(
Token &Result,
const char *CurPtr) {
1565 char C = getCharAndSize(CurPtr, Size);
1568 CurPtr = ConsumeChar(CurPtr, Size, Result);
1570 C = getCharAndSize(CurPtr, Size);
1574 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
1577 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
1578 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1582 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
1586 bool IsHexFloat =
true;
1587 if (!LangOpts.C99) {
1588 if (!isHexaLiteral(BufferPtr, LangOpts))
1590 else if (std::find(BufferPtr, CurPtr,
'_') != CurPtr)
1594 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1603 Diag(CurPtr, diag::warn_cxx11_compat_digit_separator);
1604 CurPtr = ConsumeChar(CurPtr, Size, Result);
1605 CurPtr = ConsumeChar(CurPtr, NextSize, Result);
1606 return LexNumericConstant(Result, CurPtr);
1611 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1612 return LexNumericConstant(Result, CurPtr);
1613 if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1614 return LexNumericConstant(Result, CurPtr);
1617 const char *TokStart = BufferPtr;
1618 FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
1625 const char *Lexer::LexUDSuffix(
Token &Result,
const char *CurPtr,
1626 bool IsStringLiteral) {
1631 char C = getCharAndSize(CurPtr, Size);
1632 bool Consumed =
false;
1635 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1637 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1646 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
1647 : diag::warn_cxx11_compat_reserved_user_defined_literal)
1658 bool IsUDSuffix =
false;
1665 const unsigned MaxStandardSuffixLength = 3;
1666 char Buffer[MaxStandardSuffixLength] = { C };
1667 unsigned Consumed = Size;
1675 IsUDSuffix = (Chars == 1 && Buffer[0] ==
's') ||
1681 if (Chars == MaxStandardSuffixLength)
1685 Buffer[Chars++] =
Next;
1686 Consumed += NextSize;
1693 ? diag::ext_ms_reserved_user_defined_literal
1694 : diag::ext_reserved_user_defined_literal)
1699 CurPtr = ConsumeChar(CurPtr, Size, Result);
1704 C = getCharAndSize(CurPtr, Size);
1706 else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {}
1707 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {}
1716 bool Lexer::LexStringLiteral(
Token &Result,
const char *CurPtr,
1719 const char *NulCharacter =
nullptr;
1722 (Kind == tok::utf8_string_literal ||
1723 Kind == tok::utf16_string_literal ||
1724 Kind == tok::utf32_string_literal))
1726 ? diag::warn_cxx98_compat_unicode_literal
1727 : diag::warn_c99_compat_unicode_literal);
1729 char C = getAndAdvanceChar(CurPtr, Result);
1734 C = getAndAdvanceChar(CurPtr, Result);
1736 if (C ==
'\n' || C ==
'\r' ||
1737 (C == 0 && CurPtr-1 == BufferEnd)) {
1739 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
1740 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1745 if (isCodeCompletionPoint(CurPtr-1)) {
1747 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1752 NulCharacter = CurPtr-1;
1754 C = getAndAdvanceChar(CurPtr, Result);
1759 CurPtr = LexUDSuffix(Result, CurPtr,
true);
1763 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
1766 const char *TokStart = BufferPtr;
1767 FormTokenWithChars(Result, CurPtr, Kind);
1774 bool Lexer::LexRawStringLiteral(
Token &Result,
const char *CurPtr,
1782 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
1784 unsigned PrefixLen = 0;
1790 if (CurPtr[PrefixLen] !=
'(') {
1792 const char *PrefixEnd = &CurPtr[PrefixLen];
1793 if (PrefixLen == 16) {
1794 Diag(PrefixEnd, diag::err_raw_delim_too_long);
1796 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
1797 << StringRef(PrefixEnd, 1);
1809 if (C == 0 && CurPtr-1 == BufferEnd) {
1815 FormTokenWithChars(Result, CurPtr, tok::unknown);
1820 const char *Prefix = CurPtr;
1821 CurPtr += PrefixLen + 1;
1828 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
1829 CurPtr += PrefixLen + 1;
1832 }
else if (C == 0 && CurPtr-1 == BufferEnd) {
1834 Diag(BufferPtr, diag::err_unterminated_raw_string)
1835 << StringRef(Prefix, PrefixLen);
1836 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1843 CurPtr = LexUDSuffix(Result, CurPtr,
true);
1846 const char *TokStart = BufferPtr;
1847 FormTokenWithChars(Result, CurPtr, Kind);
1854 bool Lexer::LexAngledStringLiteral(
Token &Result,
const char *CurPtr) {
1856 const char *NulCharacter =
nullptr;
1857 const char *AfterLessPos = CurPtr;
1858 char C = getAndAdvanceChar(CurPtr, Result);
1861 if (C ==
'\\' && CurPtr < BufferEnd) {
1863 getAndAdvanceChar(CurPtr, Result);
1864 }
else if (C ==
'\n' || C ==
'\r' ||
1865 (C == 0 && (CurPtr-1 == BufferEnd ||
1866 isCodeCompletionPoint(CurPtr-1)))) {
1869 FormTokenWithChars(Result, AfterLessPos, tok::less);
1871 }
else if (C == 0) {
1872 NulCharacter = CurPtr-1;
1874 C = getAndAdvanceChar(CurPtr, Result);
1879 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
1882 const char *TokStart = BufferPtr;
1883 FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
1891 bool Lexer::LexCharConstant(
Token &Result,
const char *CurPtr,
1894 const char *NulCharacter =
nullptr;
1897 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
1899 ? diag::warn_cxx98_compat_unicode_literal
1900 : diag::warn_c99_compat_unicode_literal);
1901 else if (Kind == tok::utf8_char_constant)
1902 Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
1905 char C = getAndAdvanceChar(CurPtr, Result);
1908 Diag(BufferPtr, diag::ext_empty_character);
1909 FormTokenWithChars(Result, CurPtr, tok::unknown);
1916 C = getAndAdvanceChar(CurPtr, Result);
1918 if (C ==
'\n' || C ==
'\r' ||
1919 (C == 0 && CurPtr-1 == BufferEnd)) {
1921 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
1922 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1927 if (isCodeCompletionPoint(CurPtr-1)) {
1929 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1934 NulCharacter = CurPtr-1;
1936 C = getAndAdvanceChar(CurPtr, Result);
1941 CurPtr = LexUDSuffix(Result, CurPtr,
false);
1945 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
1948 const char *TokStart = BufferPtr;
1949 FormTokenWithChars(Result, CurPtr, Kind);
1959 bool Lexer::SkipWhitespace(
Token &Result,
const char *CurPtr,
1960 bool &TokAtPhysicalStartOfLine) {
1964 unsigned char Char = *CurPtr;
1989 FormTokenWithChars(Result, CurPtr, tok::unknown);
1991 IsAtStartOfLine =
true;
1992 IsAtPhysicalStartOfLine =
true;
1999 char PrevChar = CurPtr[-1];
2005 TokAtPhysicalStartOfLine =
true;
2018 bool Lexer::SkipLineComment(
Token &Result,
const char *CurPtr,
2019 bool &TokAtPhysicalStartOfLine) {
2023 Diag(BufferPtr, diag::ext_line_comment);
2027 LangOpts.LineComment =
true;
2038 C !=
'\n' && C !=
'\r')
2041 const char *NextLine = CurPtr;
2044 const char *EscapePtr = CurPtr-1;
2045 bool HasSpace =
false;
2051 if (*EscapePtr ==
'\\')
2053 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2054 EscapePtr[-2] ==
'?')
2055 CurPtr = EscapePtr-2;
2061 Diag(EscapePtr, diag::backslash_newline_space);
2068 const char *OldPtr = CurPtr;
2071 C = getAndAdvanceChar(CurPtr, Result);
2076 if (C != 0 && CurPtr == OldPtr+1) {
2084 if (CurPtr != OldPtr+1 && C !=
'/' && CurPtr[0] !=
'/') {
2085 for (; OldPtr != CurPtr; ++OldPtr)
2086 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2090 const char *ForwardPtr = CurPtr;
2093 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2098 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2103 if (CurPtr == BufferEnd+1) {
2108 if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2114 }
while (C !=
'\n' && C !=
'\r');
2127 return SaveLineComment(Result, CurPtr);
2145 TokAtPhysicalStartOfLine =
true;
2154 bool Lexer::SaveLineComment(
Token &Result,
const char *CurPtr) {
2157 FormTokenWithChars(Result, CurPtr, tok::comment);
2169 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2184 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2190 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2192 if (CurPtr[0] == CurPtr[1])
2200 bool HasSpace =
false;
2207 if (*CurPtr ==
'\\') {
2208 if (CurPtr[-1] !=
'*')
return false;
2211 if (CurPtr[0] !=
'/' || CurPtr[-1] !=
'?' || CurPtr[-2] !=
'?' ||
2222 L->
Diag(CurPtr, diag::trigraph_ignored_block_comment);
2226 L->
Diag(CurPtr, diag::trigraph_ends_block_comment);
2231 L->
Diag(CurPtr, diag::escaped_newline_block_comment_end);
2235 L->
Diag(CurPtr, diag::backslash_newline_space);
2256 bool Lexer::SkipBlockComment(
Token &Result,
const char *CurPtr,
2257 bool &TokAtPhysicalStartOfLine) {
2267 unsigned char C = getCharAndSize(CurPtr, CharSize);
2269 if (C == 0 && CurPtr == BufferEnd+1) {
2271 Diag(BufferPtr, diag::err_unterminated_block_comment);
2277 FormTokenWithChars(Result, CurPtr, tok::unknown);
2293 if (CurPtr + 24 < BufferEnd &&
2298 while (C !=
'/' && ((intptr_t)CurPtr & 0x0F) != 0)
2301 if (C ==
'/')
goto FoundSlash;
2305 while (CurPtr+16 <= BufferEnd) {
2312 CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1;
2318 __vector
unsigned char Slashes = {
2319 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2320 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2322 while (CurPtr+16 <= BufferEnd &&
2323 !
vec_any_eq(*(
const vector
unsigned char*)CurPtr, Slashes))
2327 while (CurPtr[0] !=
'/' &&
2331 CurPtr+4 < BufferEnd) {
2341 while (C !=
'/' && C !=
'\0')
2346 if (CurPtr[-2] ==
'*')
2349 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
2356 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
2361 Diag(CurPtr-1, diag::warn_nested_block_comment);
2363 }
else if (C == 0 && CurPtr == BufferEnd+1) {
2365 Diag(BufferPtr, diag::err_unterminated_block_comment);
2374 FormTokenWithChars(Result, CurPtr, tok::unknown);
2380 }
else if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2399 FormTokenWithChars(Result, CurPtr, tok::comment);
2408 SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
2426 "Must be in a preprocessing directive!");
2430 const char *CurPtr = BufferPtr;
2432 char Char = getAndAdvanceChar(CurPtr, Tmp);
2436 Result->push_back(Char);
2440 if (CurPtr-1 != BufferEnd) {
2441 if (isCodeCompletionPoint(CurPtr-1)) {
2449 Result->push_back(Char);
2456 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
2457 BufferPtr = CurPtr-1;
2461 if (Tmp.
is(tok::code_completion)) {
2466 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
2478 bool Lexer::LexEndOfFile(
Token &Result,
const char *CurPtr) {
2486 FormTokenWithChars(Result, CurPtr, tok::eod);
2498 BufferPtr = BufferEnd;
2499 FormTokenWithChars(Result, BufferEnd,
tok::eof);
2509 diag::err_pp_unterminated_conditional);
2515 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
2520 if (LangOpts.CPlusPlus11) {
2524 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
2525 DiagID = diag::warn_cxx98_compat_no_newline_eof;
2527 DiagID = diag::warn_no_newline_eof;
2530 DiagID = diag::ext_no_newline_eof;
2533 Diag(BufferEnd, DiagID)
2547 unsigned Lexer::isNextPPTokenLParen() {
2548 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
2556 const char *TmpBufferPtr = BufferPtr;
2558 bool atStartOfLine = IsAtStartOfLine;
2559 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
2560 bool leadingSpace = HasLeadingSpace;
2566 BufferPtr = TmpBufferPtr;
2568 HasLeadingSpace = leadingSpace;
2569 IsAtStartOfLine = atStartOfLine;
2570 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
2577 return Tok.
is(tok::l_paren);
2583 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
2585 StringRef RestOfBuffer(CurPtr+TermLen, BufferEnd-CurPtr-TermLen);
2586 size_t Pos = RestOfBuffer.find(Terminator);
2587 while (Pos != StringRef::npos) {
2590 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
2591 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
2592 Pos = RestOfBuffer.find(Terminator);
2595 return RestOfBuffer.data()+Pos;
2604 bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
2606 if (CurPtr != BufferStart &&
2607 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2611 if ((BufferEnd-CurPtr < 8 || StringRef(CurPtr, 7) !=
"<<<<<<<") &&
2612 (BufferEnd-CurPtr < 6 || StringRef(CurPtr, 5) !=
">>>> "))
2627 Diag(CurPtr, diag::err_conflict_marker);
2628 CurrentConflictMarkerState =
Kind;
2632 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
2633 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
2649 bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
2651 if (CurPtr != BufferStart &&
2652 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2661 for (
unsigned i = 1; i != 4; ++i)
2662 if (CurPtr[i] != CurPtr[0])
2669 CurrentConflictMarkerState)) {
2673 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
2679 CurrentConflictMarkerState =
CMK_None;
2686 bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
2695 uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
2698 char Kind = getCharAndSize(StartPtr, CharSize);
2700 unsigned NumHexDigits;
2703 else if (Kind ==
'U')
2708 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
2710 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
2714 const char *CurPtr = StartPtr + CharSize;
2715 const char *KindLoc = &CurPtr[-1];
2717 uint32_t CodePoint = 0;
2718 for (
unsigned i = 0; i < NumHexDigits; ++i) {
2719 char C = getCharAndSize(CurPtr, CharSize);
2721 unsigned Value = llvm::hexDigitValue(C);
2725 Diag(BufferPtr, diag::warn_ucn_escape_no_digits)
2726 << StringRef(KindLoc, 1);
2728 Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
2731 if (i == 4 && NumHexDigits == 8) {
2733 Diag(KindLoc, diag::note_ucn_four_not_eight)
2750 if (CurPtr - StartPtr == (
ptrdiff_t)NumHexDigits + 2)
2753 while (StartPtr != CurPtr)
2754 (void)getAndAdvanceChar(StartPtr, *Result);
2760 if (LangOpts.AsmPreprocessor)
2774 if (CodePoint < 0xA0) {
2775 if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60)
2781 if (CodePoint < 0x20 || CodePoint >= 0x7F)
2782 Diag(BufferPtr, diag::err_ucn_control_character);
2784 char C =
static_cast<char>(CodePoint);
2785 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1);
2791 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
2796 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
2797 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
2799 Diag(BufferPtr, diag::err_ucn_escape_invalid);
2807 bool Lexer::CheckUnicodeWhitespace(
Token &Result, uint32_t C,
2808 const char *CurPtr) {
2809 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
2812 UnicodeWhitespaceChars.contains(C)) {
2813 Diag(BufferPtr, diag::ext_unicode_whitespace)
2822 bool Lexer::LexUnicode(
Token &Result, uint32_t C,
const char *CurPtr) {
2832 return LexIdentifier(Result, CurPtr);
2847 Diag(BufferPtr, diag::err_non_ascii)
2857 FormTokenWithChars(Result, CurPtr, tok::unknown);
2861 void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &Result) {
2868 bool Lexer::Lex(
Token &Result) {
2873 if (IsAtStartOfLine) {
2875 IsAtStartOfLine =
false;
2878 if (HasLeadingSpace) {
2880 HasLeadingSpace =
false;
2883 if (HasLeadingEmptyMacro) {
2885 HasLeadingEmptyMacro =
false;
2888 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
2889 IsAtPhysicalStartOfLine =
false;
2892 bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
2894 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
2895 return returnedToken;
2903 bool Lexer::LexTokenInternal(
Token &Result,
bool TokAtPhysicalStartOfLine) {
2910 const char *CurPtr = BufferPtr;
2913 if ((*CurPtr ==
' ') || (*CurPtr ==
'\t')) {
2915 while ((*CurPtr ==
' ') || (*CurPtr ==
'\t'))
2922 FormTokenWithChars(Result, CurPtr, tok::unknown);
2931 unsigned SizeTmp, SizeTmp2;
2934 char Char = getAndAdvanceChar(CurPtr, Result);
2940 if (CurPtr-1 == BufferEnd)
2941 return LexEndOfFile(Result, CurPtr-1);
2944 if (isCodeCompletionPoint(CurPtr-1)) {
2947 FormTokenWithChars(Result, CurPtr, tok::code_completion);
2952 Diag(CurPtr-1, diag::null_in_file);
2954 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
2963 if (LangOpts.MicrosoftExt) {
2965 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
2966 return LexEndOfFile(Result, CurPtr-1);
2970 Kind = tok::unknown;
2986 IsAtStartOfLine =
true;
2987 IsAtPhysicalStartOfLine =
true;
2996 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3006 SkipHorizontalWhitespace:
3008 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3017 LangOpts.LineComment &&
3018 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3019 if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3021 goto SkipIgnoredUnits;
3023 if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3025 goto SkipIgnoredUnits;
3027 goto SkipHorizontalWhitespace;
3035 case '0':
case '1':
case '2':
case '3':
case '4':
3036 case '5':
case '6':
case '7':
case '8':
case '9':
3039 return LexNumericConstant(Result, CurPtr);
3045 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3046 Char = getCharAndSize(CurPtr, SizeTmp);
3050 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3051 tok::utf16_string_literal);
3055 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3056 tok::utf16_char_constant);
3059 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3060 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3061 return LexRawStringLiteral(Result,
3062 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3064 tok::utf16_string_literal);
3067 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3071 return LexStringLiteral(Result,
3072 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3074 tok::utf8_string_literal);
3075 if (Char2 ==
'\'' && LangOpts.CPlusPlus1z)
3076 return LexCharConstant(
3077 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3079 tok::utf8_char_constant);
3081 if (Char2 ==
'R' && LangOpts.CPlusPlus11) {
3083 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3086 return LexRawStringLiteral(Result,
3087 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3090 tok::utf8_string_literal);
3097 return LexIdentifier(Result, CurPtr);
3103 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3104 Char = getCharAndSize(CurPtr, SizeTmp);
3108 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3109 tok::utf32_string_literal);
3113 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3114 tok::utf32_char_constant);
3117 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3118 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3119 return LexRawStringLiteral(Result,
3120 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3122 tok::utf32_string_literal);
3126 return LexIdentifier(Result, CurPtr);
3132 if (LangOpts.CPlusPlus11) {
3133 Char = getCharAndSize(CurPtr, SizeTmp);
3136 return LexRawStringLiteral(Result,
3137 ConsumeChar(CurPtr, SizeTmp, Result),
3138 tok::string_literal);
3142 return LexIdentifier(Result, CurPtr);
3147 Char = getCharAndSize(CurPtr, SizeTmp);
3151 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3152 tok::wide_string_literal);
3155 if (LangOpts.CPlusPlus11 && Char ==
'R' &&
3156 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3157 return LexRawStringLiteral(Result,
3158 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3160 tok::wide_string_literal);
3164 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3165 tok::wide_char_constant);
3169 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
3170 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
3171 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
3172 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
3173 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
3174 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
3175 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
3176 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
3180 return LexIdentifier(Result, CurPtr);
3183 if (LangOpts.DollarIdents) {
3185 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
3188 return LexIdentifier(Result, CurPtr);
3191 Kind = tok::unknown;
3198 return LexCharConstant(Result, CurPtr, tok::char_constant);
3204 return LexStringLiteral(Result, CurPtr, tok::string_literal);
3208 Kind = tok::question;
3211 Kind = tok::l_square;
3214 Kind = tok::r_square;
3217 Kind = tok::l_paren;
3220 Kind = tok::r_paren;
3223 Kind = tok::l_brace;
3226 Kind = tok::r_brace;
3229 Char = getCharAndSize(CurPtr, SizeTmp);
3230 if (Char >=
'0' && Char <=
'9') {
3234 return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
3235 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
3236 Kind = tok::periodstar;
3238 }
else if (Char ==
'.' &&
3239 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
3240 Kind = tok::ellipsis;
3241 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3248 Char = getCharAndSize(CurPtr, SizeTmp);
3251 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3252 }
else if (Char ==
'=') {
3253 Kind = tok::ampequal;
3254 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3260 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3261 Kind = tok::starequal;
3262 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3268 Char = getCharAndSize(CurPtr, SizeTmp);
3270 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3271 Kind = tok::plusplus;
3272 }
else if (Char ==
'=') {
3273 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3274 Kind = tok::plusequal;
3280 Char = getCharAndSize(CurPtr, SizeTmp);
3282 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3283 Kind = tok::minusminus;
3284 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
3285 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
3286 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3288 Kind = tok::arrowstar;
3289 }
else if (Char ==
'>') {
3290 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3292 }
else if (Char ==
'=') {
3293 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3294 Kind = tok::minusequal;
3303 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3304 Kind = tok::exclaimequal;
3305 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3307 Kind = tok::exclaim;
3312 Char = getCharAndSize(CurPtr, SizeTmp);
3322 bool TreatAsComment = LangOpts.LineComment &&
3323 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
3324 if (!TreatAsComment)
3326 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
3328 if (TreatAsComment) {
3329 if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3330 TokAtPhysicalStartOfLine))
3336 goto SkipIgnoredUnits;
3341 if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3342 TokAtPhysicalStartOfLine))
3351 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3352 Kind = tok::slashequal;
3358 Char = getCharAndSize(CurPtr, SizeTmp);
3360 Kind = tok::percentequal;
3361 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3362 }
else if (LangOpts.Digraphs && Char ==
'>') {
3363 Kind = tok::r_brace;
3364 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3365 }
else if (LangOpts.Digraphs && Char ==
':') {
3366 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3367 Char = getCharAndSize(CurPtr, SizeTmp);
3368 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
3369 Kind = tok::hashhash;
3370 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3372 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3373 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3375 Diag(BufferPtr, diag::ext_charize_microsoft);
3382 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3383 goto HandleDirective;
3388 Kind = tok::percent;
3392 Char = getCharAndSize(CurPtr, SizeTmp);
3394 return LexAngledStringLiteral(Result, CurPtr);
3395 }
else if (Char ==
'<') {
3396 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3398 Kind = tok::lesslessequal;
3399 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3401 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
3405 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
3409 }
else if (LangOpts.CUDA && After ==
'<') {
3410 Kind = tok::lesslessless;
3411 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3414 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3415 Kind = tok::lessless;
3417 }
else if (Char ==
'=') {
3418 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3419 Kind = tok::lessequal;
3420 }
else if (LangOpts.Digraphs && Char ==
':') {
3421 if (LangOpts.CPlusPlus11 &&
3422 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
3429 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3430 if (After !=
':' && After !=
'>') {
3433 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
3438 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3439 Kind = tok::l_square;
3440 }
else if (LangOpts.Digraphs && Char ==
'%') {
3441 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3442 Kind = tok::l_brace;
3448 Char = getCharAndSize(CurPtr, SizeTmp);
3450 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3451 Kind = tok::greaterequal;
3452 }
else if (Char ==
'>') {
3453 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3455 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3457 Kind = tok::greatergreaterequal;
3458 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
3462 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
3465 }
else if (LangOpts.CUDA && After ==
'>') {
3466 Kind = tok::greatergreatergreater;
3467 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3470 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3471 Kind = tok::greatergreater;
3475 Kind = tok::greater;
3479 Char = getCharAndSize(CurPtr, SizeTmp);
3481 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3482 Kind = tok::caretequal;
3488 Char = getCharAndSize(CurPtr, SizeTmp);
3490 Kind = tok::pipeequal;
3491 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3492 }
else if (Char ==
'|') {
3494 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
3496 Kind = tok::pipepipe;
3497 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3503 Char = getCharAndSize(CurPtr, SizeTmp);
3504 if (LangOpts.Digraphs && Char ==
'>') {
3505 Kind = tok::r_square;
3506 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3507 }
else if (LangOpts.CPlusPlus && Char ==
':') {
3508 Kind = tok::coloncolon;
3509 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3518 Char = getCharAndSize(CurPtr, SizeTmp);
3521 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
3524 Kind = tok::equalequal;
3525 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3534 Char = getCharAndSize(CurPtr, SizeTmp);
3536 Kind = tok::hashhash;
3537 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3538 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3541 Diag(BufferPtr, diag::ext_charize_microsoft);
3542 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3548 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3549 goto HandleDirective;
3557 if (CurPtr[-1] ==
'@' && LangOpts.ObjC1)
3560 Kind = tok::unknown;
3565 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
3566 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3567 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3575 return LexUnicode(Result, CodePoint, CurPtr);
3578 Kind = tok::unknown;
3583 Kind = tok::unknown;
3592 ConversionResult Status =
3593 llvm::convertUTF8Sequence((
const UTF8 **)&CurPtr,
3594 (
const UTF8 *)BufferEnd,
3597 if (Status == conversionOK) {
3598 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3599 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3606 return LexUnicode(Result, CodePoint, CurPtr);
3612 Kind = tok::unknown;
3619 Diag(CurPtr, diag::err_invalid_utf8);
3621 BufferPtr = CurPtr+1;
3633 FormTokenWithChars(Result, CurPtr, Kind);
3639 FormTokenWithChars(Result, CurPtr, tok::hash);
3644 assert(Result.
is(
tok::eof) &&
"Preprocessor did not set tok:eof");
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
SourceManager & getSourceManager() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool isMacroArgExpansion(SourceLocation Loc, SourceLocation *StartLoc=nullptr) const
Tests whether the given source location represents a macro argument's expansion into the function-lik...
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
This is a discriminated union of FileInfo and ExpansionInfo.
SourceLocation getBegin() const
static std::pair< unsigned, bool > ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
SourceLocation getImmediateSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
static LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.
void setBegin(SourceLocation b)
SourceLocation getSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
Defines the SourceManager interface.
const SrcMgr::SLocEntry & getSLocEntry(FileID FID, bool *Invalid=nullptr) const
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts)
llvm::MemoryBuffer * getBuffer(FileID FID, SourceLocation Loc, bool *Invalid=nullptr) const
Return the buffer for the specified FileID.
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded...
const ExpansionInfo & getExpansion() const
std::unique_ptr< llvm::MemoryBuffer > Buffer
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool hasUCN() const
Returns true if this token contains a universal character name.
void setFlag(TokenFlags Flag)
Set the specified flag.
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion...
static LLVM_READNONE bool isASCII(char c)
Returns true if this is an ASCII character.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token...
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from...
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
Like System, but searched after the system directories.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
static LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
One of these records is kept for each identifier that is lexed.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
void setRawIdentifierData(const char *Ptr)
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static SourceLocation getFromRawEncoding(unsigned Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
static LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
Token - This structure provides full information about a lexed token.
void setKind(tok::TokenKind K)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s...
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
bool getCommentRetentionState() const
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
Concrete class used by the front-end to report problems and issues.
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
bool hadModuleLoaderFatalFailure() const
static LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
tok::TokenKind getKind() const
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Character, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token...
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode), returns a reference to the text substring in the buffer if known.
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
A little helper class used to produce diagnostics.
bool ParsingFilename
True after #include; turns <xx> into a tok::angle_string_literal token.
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
bool isInFileID(SourceLocation Loc, FileID FID, unsigned *RelativeOffset=nullptr) const
Given a specific FileID, returns true if Loc is inside that FileID chunk and sets relative offset (of...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
static int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
bool LexingRawMode
True if in raw mode.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Represents a character-granular source range.
SourceLocation getEnd() const
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
Defines the clang::Preprocessor interface.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization...
void setEnd(SourceLocation e)
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
SourceLocation createExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLength, int LoadedID=0, unsigned LoadedOffset=0)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
The result type of a method or function.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
static CharSourceRange getCharRange(SourceRange R)
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier...
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
bool isAtEndOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroEnd=nullptr) const
Returns true if the given MacroID location points at the character end of the immediate macro expansi...
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
void setIdentifierInfo(IdentifierInfo *II)
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
DiagnosticsEngine & getDiagnostics() const
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
bool isAtStartOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroBegin=nullptr) const
Returns true if the given MacroID location points at the beginning of the immediate macro expansion...
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
std::pair< SourceLocation, SourceLocation > getImmediateExpansionRange(SourceLocation Loc) const
Return the start/end of the expansion information for an expansion location.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file...
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
detail::InMemoryDirectory::const_iterator E
SourceLocation getExpansionLocStart() const
void setLiteralData(const char *Ptr)
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
bool isMacroArgExpansion() const
static const llvm::sys::UnicodeCharRange CXX03AllowedIDCharRanges[]
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts)
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
__PTRDIFF_TYPE__ ptrdiff_t
static LLVM_READONLY bool isIdentifierBody(unsigned char c, bool AllowDollar=false)
Returns true if this is a body character of a C identifier, which is [a-zA-Z0-9_].
bool HandleComment(Token &Token, SourceRange Comment)
const LangOptions & getLangOpts() const
getLangOpts - Return the language features currently enabled.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string...
Not within a conflict marker.
static LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
static char DecodeTrigraphChar(const char *CP, Lexer *L)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ...
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string...
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode...
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
unsigned getLength() const
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file. ...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file...
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
static LLVM_READONLY bool isIdentifierHead(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
A trivial tuple used to represent a source range.
void clearFlag(TokenFlags Flag)
Unset the specified flag.
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID...
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode...
This class handles loading and caching of source files into memory.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
void startToken()
Reset all flags to cleared.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by escaping '\' and " characters. This does not add surrounding ""'s to the string.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getSpellingLoc() const
IdentifierInfo * getIdentifierInfo() const