clang  3.8.0
Token.h
Go to the documentation of this file.
1 //===--- Token.h - Token interface ------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the Token interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_LEX_TOKEN_H
15 #define LLVM_CLANG_LEX_TOKEN_H
16 
20 #include "clang/Basic/TokenKinds.h"
21 #include "llvm/ADT/StringRef.h"
22 #include <cstdlib>
23 
24 namespace clang {
25 
26 class IdentifierInfo;
27 
28 /// Token - This structure provides full information about a lexed token.
29 /// It is not intended to be space efficient, it is intended to return as much
30 /// information as possible about each returned token. This is expected to be
31 /// compressed into a smaller form if memory footprint is important.
32 ///
33 /// The parser can create a special "annotation token" representing a stream of
34 /// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"
35 /// can be represented by a single typename annotation token that carries
36 /// information about the SourceRange of the tokens and the type object.
37 class Token {
38  /// The location of the token. This is actually a SourceLocation.
39  unsigned Loc;
40 
41  // Conceptually these next two fields could be in a union. However, this
42  // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
43  // routine. Keeping as separate members with casts until a more beautiful fix
44  // presents itself.
45 
46  /// UintData - This holds either the length of the token text, when
47  /// a normal token, or the end of the SourceRange when an annotation
48  /// token.
49  unsigned UintData;
50 
51  /// PtrData - This is a union of four different pointer types, which depends
52  /// on what type of token this is:
53  /// Identifiers, keywords, etc:
54  /// This is an IdentifierInfo*, which contains the uniqued identifier
55  /// spelling.
56  /// Literals: isLiteral() returns true.
57  /// This is a pointer to the start of the token in a text buffer, which
58  /// may be dirty (have trigraphs / escaped newlines).
59  /// Annotations (resolved type names, C++ scopes, etc): isAnnotation().
60  /// This is a pointer to sema-specific data for the annotation token.
61  /// Eof:
62  // This is a pointer to a Decl.
63  /// Other:
64  /// This is null.
65  void *PtrData;
66 
67  /// Kind - The actual flavor of token this is.
69 
70  /// Flags - Bits we track about this token, members of the TokenFlags enum.
71  unsigned short Flags;
72 public:
73 
74  // Various flags set per token:
75  enum TokenFlags {
76  StartOfLine = 0x01, // At start of line or only after whitespace
77  // (considering the line after macro expansion).
78  LeadingSpace = 0x02, // Whitespace exists before this token (considering
79  // whitespace after macro expansion).
80  DisableExpand = 0x04, // This identifier may never be macro expanded.
81  NeedsCleaning = 0x08, // Contained an escaped newline or trigraph.
82  LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
83  HasUDSuffix = 0x20, // This string or character literal has a ud-suffix.
84  HasUCN = 0x40, // This identifier contains a UCN.
85  IgnoredComma = 0x80, // This comma is not a macro argument separator (MS).
86  StringifiedInMacro = 0x100, // This string or character literal is formed by
87  // macro stringizing or charizing operator.
88  };
89 
90  tok::TokenKind getKind() const { return Kind; }
91  void setKind(tok::TokenKind K) { Kind = K; }
92 
93  /// is/isNot - Predicates to check if this token is a specific kind, as in
94  /// "if (Tok.is(tok::l_brace)) {...}".
95  bool is(tok::TokenKind K) const { return Kind == K; }
96  bool isNot(tok::TokenKind K) const { return Kind != K; }
97  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
98  return is(K1) || is(K2);
99  }
100  template <typename... Ts>
101  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, Ts... Ks) const {
102  return is(K1) || isOneOf(K2, Ks...);
103  }
104 
105  /// \brief Return true if this is a raw identifier (when lexing
106  /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
107  bool isAnyIdentifier() const {
108  return tok::isAnyIdentifier(getKind());
109  }
110 
111  /// \brief Return true if this is a "literal", like a numeric
112  /// constant, string, etc.
113  bool isLiteral() const {
114  return tok::isLiteral(getKind());
115  }
116 
117  /// \brief Return true if this is any of tok::annot_* kind tokens.
118  bool isAnnotation() const {
119  return tok::isAnnotation(getKind());
120  }
121 
122  /// \brief Return a source location identifier for the specified
123  /// offset in the current file.
126  }
127  unsigned getLength() const {
128  assert(!isAnnotation() && "Annotation tokens have no length field");
129  return UintData;
130  }
131 
133  void setLength(unsigned Len) {
134  assert(!isAnnotation() && "Annotation tokens have no length field");
135  UintData = Len;
136  }
137 
139  assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
140  return SourceLocation::getFromRawEncoding(UintData ? UintData : Loc);
141  }
143  assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
144  UintData = L.getRawEncoding();
145  }
146 
149  }
150 
152  return isAnnotation() ? getAnnotationEndLoc()
154  }
155 
156  /// \brief SourceRange of the group of tokens that this annotation token
157  /// represents.
160  }
162  setLocation(R.getBegin());
164  }
165 
166  const char *getName() const { return tok::getTokenName(Kind); }
167 
168  /// \brief Reset all flags to cleared.
169  void startToken() {
170  Kind = tok::unknown;
171  Flags = 0;
172  PtrData = nullptr;
173  UintData = 0;
174  Loc = SourceLocation().getRawEncoding();
175  }
176 
178  assert(isNot(tok::raw_identifier) &&
179  "getIdentifierInfo() on a tok::raw_identifier token!");
180  assert(!isAnnotation() &&
181  "getIdentifierInfo() on an annotation token!");
182  if (isLiteral()) return nullptr;
183  if (is(tok::eof)) return nullptr;
184  return (IdentifierInfo*) PtrData;
185  }
187  PtrData = (void*) II;
188  }
189 
190  const void *getEofData() const {
191  assert(is(tok::eof));
192  return reinterpret_cast<const void *>(PtrData);
193  }
194  void setEofData(const void *D) {
195  assert(is(tok::eof));
196  assert(!PtrData);
197  PtrData = const_cast<void *>(D);
198  }
199 
200  /// getRawIdentifier - For a raw identifier token (i.e., an identifier
201  /// lexed in raw mode), returns a reference to the text substring in the
202  /// buffer if known.
203  StringRef getRawIdentifier() const {
204  assert(is(tok::raw_identifier));
205  return StringRef(reinterpret_cast<const char *>(PtrData), getLength());
206  }
207  void setRawIdentifierData(const char *Ptr) {
208  assert(is(tok::raw_identifier));
209  PtrData = const_cast<char*>(Ptr);
210  }
211 
212  /// getLiteralData - For a literal token (numeric constant, string, etc), this
213  /// returns a pointer to the start of it in the text buffer if known, null
214  /// otherwise.
215  const char *getLiteralData() const {
216  assert(isLiteral() && "Cannot get literal data of non-literal");
217  return reinterpret_cast<const char*>(PtrData);
218  }
219  void setLiteralData(const char *Ptr) {
220  assert(isLiteral() && "Cannot set literal data of non-literal");
221  PtrData = const_cast<char*>(Ptr);
222  }
223 
224  void *getAnnotationValue() const {
225  assert(isAnnotation() && "Used AnnotVal on non-annotation token");
226  return PtrData;
227  }
228  void setAnnotationValue(void *val) {
229  assert(isAnnotation() && "Used AnnotVal on non-annotation token");
230  PtrData = val;
231  }
232 
233  /// \brief Set the specified flag.
234  void setFlag(TokenFlags Flag) {
235  Flags |= Flag;
236  }
237 
238  /// \brief Unset the specified flag.
239  void clearFlag(TokenFlags Flag) {
240  Flags &= ~Flag;
241  }
242 
243  /// \brief Return the internal represtation of the flags.
244  ///
245  /// This is only intended for low-level operations such as writing tokens to
246  /// disk.
247  unsigned getFlags() const {
248  return Flags;
249  }
250 
251  /// \brief Set a flag to either true or false.
252  void setFlagValue(TokenFlags Flag, bool Val) {
253  if (Val)
254  setFlag(Flag);
255  else
256  clearFlag(Flag);
257  }
258 
259  /// isAtStartOfLine - Return true if this token is at the start of a line.
260  ///
261  bool isAtStartOfLine() const { return (Flags & StartOfLine) ? true : false; }
262 
263  /// \brief Return true if this token has whitespace before it.
264  ///
265  bool hasLeadingSpace() const { return (Flags & LeadingSpace) ? true : false; }
266 
267  /// \brief Return true if this identifier token should never
268  /// be expanded in the future, due to C99 6.10.3.4p2.
269  bool isExpandDisabled() const {
270  return (Flags & DisableExpand) ? true : false;
271  }
272 
273  /// \brief Return true if we have an ObjC keyword identifier.
274  bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
275 
276  /// \brief Return the ObjC keyword kind.
278 
279  /// \brief Return true if this token has trigraphs or escaped newlines in it.
280  bool needsCleaning() const { return (Flags & NeedsCleaning) ? true : false; }
281 
282  /// \brief Return true if this token has an empty macro before it.
283  ///
284  bool hasLeadingEmptyMacro() const {
285  return (Flags & LeadingEmptyMacro) ? true : false;
286  }
287 
288  /// \brief Return true if this token is a string or character literal which
289  /// has a ud-suffix.
290  bool hasUDSuffix() const { return (Flags & HasUDSuffix) ? true : false; }
291 
292  /// Returns true if this token contains a universal character name.
293  bool hasUCN() const { return (Flags & HasUCN) ? true : false; }
294 
295  /// Returns true if this token is formed by macro by stringizing or charizing
296  /// operator.
297  bool stringifiedInMacro() const {
298  return (Flags & StringifiedInMacro) ? true : false;
299  }
300 };
301 
302 /// \brief Information about the conditional stack (\#if directives)
303 /// currently active.
305  /// \brief Location where the conditional started.
307 
308  /// \brief True if this was contained in a skipping directive, e.g.,
309  /// in a "\#if 0" block.
311 
312  /// \brief True if we have emitted tokens already, and now we're in
313  /// an \#else block or something. Only useful in Skipping blocks.
315 
316  /// \brief True if we've seen a \#else in this block. If so,
317  /// \#elif/\#else directives are not allowed.
318  bool FoundElse;
319 };
320 
321 } // end namespace clang
322 
323 namespace llvm {
324  template <>
325  struct isPodLike<clang::Token> { static const bool value = true; };
326 } // end namespace llvm
327 
328 #endif
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:261
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:43
SourceLocation getEnd() const
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition: Token.h:252
const char * getName() const
Definition: Token.h:166
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:265
bool hasUCN() const
Returns true if this token contains a universal character name.
Definition: Token.h:293
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition: Token.h:234
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:280
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:107
One of these records is kept for each identifier that is lexed.
bool stringifiedInMacro() const
Returns true if this token is formed by macro by stringizing or charizing operator.
Definition: Token.h:297
void setRawIdentifierData(const char *Ptr)
Definition: Token.h:207
static SourceLocation getFromRawEncoding(unsigned Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
Token - This structure provides full information about a lexed token.
Definition: Token.h:37
void setKind(tok::TokenKind K)
Definition: Token.h:91
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
tok::TokenKind getKind() const
Definition: Token.h:90
bool FoundNonSkip
True if we have emitted tokens already, and now we're in an #else block or something.
Definition: Token.h:314
Defines the clang::TemplateNameKind enum.
void * getAnnotationValue() const
Definition: Token.h:224
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode), returns a reference to the text substring in the buffer if known.
Definition: Token.h:203
const void * getEofData() const
Definition: Token.h:190
void setAnnotationRange(SourceRange R)
Definition: Token.h:161
SourceRange getAnnotationRange() const
SourceRange of the group of tokens that this annotation token represents.
Definition: Token.h:158
void setAnnotationValue(void *val)
Definition: Token.h:228
void setEofData(const void *D)
Definition: Token.h:194
Defines an enumeration for C++ overloaded operators.
bool hasUDSuffix() const
Return true if this token is a string or character literal which has a ud-suffix. ...
Definition: Token.h:290
FormatToken * Token
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:124
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, Ts...Ks) const
Definition: Token.h:101
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
Information about the conditional stack (#if directives) currently active.
Definition: Token.h:304
SourceLocation getAnnotationEndLoc() const
Definition: Token.h:138
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
Definition: TokenKinds.h:41
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:215
Kind
bool WasSkipping
True if this was contained in a skipping directive, e.g., in a "\#if 0" block.
Definition: Token.h:310
Encodes a location in the source.
void setLength(unsigned Len)
Definition: Token.h:133
bool isAnnotation(TokenKind K)
Return true if this is any of tok::annot_* kinds.
Definition: TokenKinds.h:95
SourceLocation getEndLoc() const
Definition: Token.h:151
void setAnnotationEndLoc(SourceLocation L)
Definition: Token.h:142
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:36
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:186
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition: TokenKinds.h:87
SourceLocation getLastLoc() const
Definition: Token.h:147
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
SourceLocation getBegin() const
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
SourceLocation IfLoc
Location where the conditional started.
Definition: Token.h:306
unsigned getFlags() const
Return the internal represtation of the flags.
Definition: Token.h:247
void setLiteralData(const char *Ptr)
Definition: Token.h:219
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:97
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
Definition: Token.h:284
Defines the clang::TokenKind enum and support functions.
Defines the clang::SourceLocation class and associated facilities.
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
Definition: TokenKinds.cpp:25
unsigned getLength() const
Definition: Token.h:127
void setLocation(SourceLocation L)
Definition: Token.h:132
A trivial tuple used to represent a source range.
void clearFlag(TokenFlags Flag)
Unset the specified flag.
Definition: Token.h:239
bool FoundElse
True if we've seen a #else in this block.
Definition: Token.h:318
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:118
bool isExpandDisabled() const
Return true if this identifier token should never be expanded in the future, due to C99 6...
Definition: Token.h:269
bool isAnyIdentifier(TokenKind K)
Return true if this is a raw identifier or an identifier kind.
Definition: TokenKinds.h:73
void startToken()
Reset all flags to cleared.
Definition: Token.h:169
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177