clang  3.8.0
TokenAnnotator.cpp
Go to the documentation of this file.
1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "TokenAnnotator.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/Support/Debug.h"
20 
21 #define DEBUG_TYPE "format-token-annotator"
22 
23 namespace clang {
24 namespace format {
25 
26 namespace {
27 
28 /// \brief A parser that gathers additional information about tokens.
29 ///
30 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
31 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
32 /// into template parameter lists.
33 class AnnotatingParser {
34 public:
35  AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
36  const AdditionalKeywords &Keywords)
37  : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
38  Keywords(Keywords) {
39  Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
40  resetTokenMetadata(CurrentToken);
41  }
42 
43 private:
44  bool parseAngle() {
45  if (!CurrentToken)
46  return false;
47  FormatToken *Left = CurrentToken->Previous;
48  Left->ParentBracket = Contexts.back().ContextKind;
49  ScopedContextCreator ContextCreator(*this, tok::less, 10);
50 
51  // If this angle is in the context of an expression, we need to be more
52  // hesitant to detect it as opening template parameters.
53  bool InExprContext = Contexts.back().IsExpression;
54 
55  Contexts.back().IsExpression = false;
56  // If there's a template keyword before the opening angle bracket, this is a
57  // template parameter, not an argument.
58  Contexts.back().InTemplateArgument =
59  Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
60 
61  if (Style.Language == FormatStyle::LK_Java &&
62  CurrentToken->is(tok::question))
63  next();
64 
65  while (CurrentToken) {
66  if (CurrentToken->is(tok::greater)) {
67  Left->MatchingParen = CurrentToken;
68  CurrentToken->MatchingParen = Left;
69  CurrentToken->Type = TT_TemplateCloser;
70  next();
71  return true;
72  }
73  if (CurrentToken->is(tok::question) &&
74  Style.Language == FormatStyle::LK_Java) {
75  next();
76  continue;
77  }
78  if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
79  (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext))
80  return false;
81  // If a && or || is found and interpreted as a binary operator, this set
82  // of angles is likely part of something like "a < b && c > d". If the
83  // angles are inside an expression, the ||/&& might also be a binary
84  // operator that was misinterpreted because we are parsing template
85  // parameters.
86  // FIXME: This is getting out of hand, write a decent parser.
87  if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
88  CurrentToken->Previous->is(TT_BinaryOperator) &&
89  Contexts[Contexts.size() - 2].IsExpression &&
90  !Line.startsWith(tok::kw_template))
91  return false;
92  updateParameterCount(Left, CurrentToken);
93  if (!consumeToken())
94  return false;
95  }
96  return false;
97  }
98 
99  bool parseParens(bool LookForDecls = false) {
100  if (!CurrentToken)
101  return false;
102  FormatToken *Left = CurrentToken->Previous;
103  Left->ParentBracket = Contexts.back().ContextKind;
104  ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
105 
106  // FIXME: This is a bit of a hack. Do better.
107  Contexts.back().ColonIsForRangeExpr =
108  Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
109 
110  bool StartsObjCMethodExpr = false;
111  if (CurrentToken->is(tok::caret)) {
112  // (^ can start a block type.
113  Left->Type = TT_ObjCBlockLParen;
114  } else if (FormatToken *MaybeSel = Left->Previous) {
115  // @selector( starts a selector.
116  if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
117  MaybeSel->Previous->is(tok::at)) {
118  StartsObjCMethodExpr = true;
119  }
120  }
121 
122  if (Left->is(TT_OverloadedOperatorLParen)) {
123  Contexts.back().IsExpression = false;
124  } else if (Left->Previous &&
125  (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
126  tok::kw_if, tok::kw_while, tok::l_paren,
127  tok::comma) ||
128  Left->Previous->is(TT_BinaryOperator))) {
129  // static_assert, if and while usually contain expressions.
130  Contexts.back().IsExpression = true;
131  } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
132  Left->Previous->MatchingParen &&
133  Left->Previous->MatchingParen->is(TT_LambdaLSquare)) {
134  // This is a parameter list of a lambda expression.
135  Contexts.back().IsExpression = false;
136  } else if (Line.InPPDirective &&
137  (!Left->Previous || !Left->Previous->is(tok::identifier))) {
138  Contexts.back().IsExpression = true;
139  } else if (Contexts[Contexts.size() - 2].CaretFound) {
140  // This is the parameter list of an ObjC block.
141  Contexts.back().IsExpression = false;
142  } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
143  Left->Type = TT_AttributeParen;
144  } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
145  // The first argument to a foreach macro is a declaration.
146  Contexts.back().IsForEachMacro = true;
147  Contexts.back().IsExpression = false;
148  } else if (Left->Previous && Left->Previous->MatchingParen &&
149  Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) {
150  Contexts.back().IsExpression = false;
151  } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
152  bool IsForOrCatch =
153  Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch);
154  Contexts.back().IsExpression = !IsForOrCatch;
155  }
156 
157  if (StartsObjCMethodExpr) {
158  Contexts.back().ColonIsObjCMethodExpr = true;
159  Left->Type = TT_ObjCMethodExpr;
160  }
161 
162  bool MightBeFunctionType = CurrentToken->isOneOf(tok::star, tok::amp) &&
163  !Contexts[Contexts.size() - 2].IsExpression;
164  bool HasMultipleLines = false;
165  bool HasMultipleParametersOnALine = false;
166  bool MightBeObjCForRangeLoop =
167  Left->Previous && Left->Previous->is(tok::kw_for);
168  while (CurrentToken) {
169  // LookForDecls is set when "if (" has been seen. Check for
170  // 'identifier' '*' 'identifier' followed by not '=' -- this
171  // '*' has to be a binary operator but determineStarAmpUsage() will
172  // categorize it as an unary operator, so set the right type here.
173  if (LookForDecls && CurrentToken->Next) {
174  FormatToken *Prev = CurrentToken->getPreviousNonComment();
175  if (Prev) {
176  FormatToken *PrevPrev = Prev->getPreviousNonComment();
177  FormatToken *Next = CurrentToken->Next;
178  if (PrevPrev && PrevPrev->is(tok::identifier) &&
179  Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
180  CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
181  Prev->Type = TT_BinaryOperator;
182  LookForDecls = false;
183  }
184  }
185  }
186 
187  if (CurrentToken->Previous->is(TT_PointerOrReference) &&
188  CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
189  tok::coloncolon))
190  MightBeFunctionType = true;
191  if (CurrentToken->Previous->is(TT_BinaryOperator))
192  Contexts.back().IsExpression = true;
193  if (CurrentToken->is(tok::r_paren)) {
194  if (MightBeFunctionType && CurrentToken->Next &&
195  (CurrentToken->Next->is(tok::l_paren) ||
196  (CurrentToken->Next->is(tok::l_square) &&
197  Line.MustBeDeclaration)))
198  Left->Type = TT_FunctionTypeLParen;
199  Left->MatchingParen = CurrentToken;
200  CurrentToken->MatchingParen = Left;
201 
202  if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
203  Left->Previous && Left->Previous->is(tok::l_paren)) {
204  // Detect the case where macros are used to generate lambdas or
205  // function bodies, e.g.:
206  // auto my_lambda = MARCO((Type *type, int i) { .. body .. });
207  for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) {
208  if (Tok->is(TT_BinaryOperator) &&
209  Tok->isOneOf(tok::star, tok::amp, tok::ampamp))
210  Tok->Type = TT_PointerOrReference;
211  }
212  }
213 
214  if (StartsObjCMethodExpr) {
215  CurrentToken->Type = TT_ObjCMethodExpr;
216  if (Contexts.back().FirstObjCSelectorName) {
217  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
218  Contexts.back().LongestObjCSelectorName;
219  }
220  }
221 
222  if (Left->is(TT_AttributeParen))
223  CurrentToken->Type = TT_AttributeParen;
224  if (Left->Previous && Left->Previous->is(TT_JavaAnnotation))
225  CurrentToken->Type = TT_JavaAnnotation;
226  if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation))
227  CurrentToken->Type = TT_LeadingJavaAnnotation;
228 
229  if (!HasMultipleLines)
230  Left->PackingKind = PPK_Inconclusive;
231  else if (HasMultipleParametersOnALine)
232  Left->PackingKind = PPK_BinPacked;
233  else
234  Left->PackingKind = PPK_OnePerLine;
235 
236  next();
237  return true;
238  }
239  if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
240  return false;
241 
242  if (CurrentToken->is(tok::l_brace))
243  Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
244  if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
245  !CurrentToken->Next->HasUnescapedNewline &&
246  !CurrentToken->Next->isTrailingComment())
247  HasMultipleParametersOnALine = true;
248  if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) ||
249  CurrentToken->isSimpleTypeSpecifier())
250  Contexts.back().IsExpression = false;
251  if (CurrentToken->isOneOf(tok::semi, tok::colon))
252  MightBeObjCForRangeLoop = false;
253  if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in))
254  CurrentToken->Type = TT_ObjCForIn;
255  // When we discover a 'new', we set CanBeExpression to 'false' in order to
256  // parse the type correctly. Reset that after a comma.
257  if (CurrentToken->is(tok::comma))
258  Contexts.back().CanBeExpression = true;
259 
260  FormatToken *Tok = CurrentToken;
261  if (!consumeToken())
262  return false;
263  updateParameterCount(Left, Tok);
264  if (CurrentToken && CurrentToken->HasUnescapedNewline)
265  HasMultipleLines = true;
266  }
267  return false;
268  }
269 
270  bool parseSquare() {
271  if (!CurrentToken)
272  return false;
273 
274  // A '[' could be an index subscript (after an identifier or after
275  // ')' or ']'), it could be the start of an Objective-C method
276  // expression, or it could the start of an Objective-C array literal.
277  FormatToken *Left = CurrentToken->Previous;
278  Left->ParentBracket = Contexts.back().ContextKind;
279  FormatToken *Parent = Left->getPreviousNonComment();
280  bool StartsObjCMethodExpr =
281  Style.Language == FormatStyle::LK_Cpp &&
282  Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
283  CurrentToken->isNot(tok::l_brace) &&
284  (!Parent ||
285  Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
286  tok::kw_return, tok::kw_throw) ||
287  Parent->isUnaryOperator() ||
288  Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
289  getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
290  bool ColonFound = false;
291 
292  unsigned BindingIncrease = 1;
293  if (Left->is(TT_Unknown)) {
294  if (StartsObjCMethodExpr) {
295  Left->Type = TT_ObjCMethodExpr;
296  } else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
297  Contexts.back().ContextKind == tok::l_brace &&
298  Parent->isOneOf(tok::l_brace, tok::comma)) {
299  Left->Type = TT_JsComputedPropertyName;
300  } else if (Style.Language == FormatStyle::LK_Proto ||
301  (Parent &&
302  Parent->isOneOf(TT_BinaryOperator, tok::at, tok::comma,
303  tok::l_paren, tok::l_square, tok::question,
304  tok::colon, tok::kw_return,
305  // Should only be relevant to JavaScript:
306  tok::kw_default))) {
307  Left->Type = TT_ArrayInitializerLSquare;
308  } else {
309  BindingIncrease = 10;
310  Left->Type = TT_ArraySubscriptLSquare;
311  }
312  }
313 
314  ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
315  Contexts.back().IsExpression = true;
316  Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
317 
318  while (CurrentToken) {
319  if (CurrentToken->is(tok::r_square)) {
320  if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
321  Left->is(TT_ObjCMethodExpr)) {
322  // An ObjC method call is rarely followed by an open parenthesis.
323  // FIXME: Do we incorrectly label ":" with this?
324  StartsObjCMethodExpr = false;
325  Left->Type = TT_Unknown;
326  }
327  if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
328  CurrentToken->Type = TT_ObjCMethodExpr;
329  // determineStarAmpUsage() thinks that '*' '[' is allocating an
330  // array of pointers, but if '[' starts a selector then '*' is a
331  // binary operator.
332  if (Parent && Parent->is(TT_PointerOrReference))
333  Parent->Type = TT_BinaryOperator;
334  }
335  Left->MatchingParen = CurrentToken;
336  CurrentToken->MatchingParen = Left;
337  if (Contexts.back().FirstObjCSelectorName) {
338  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
339  Contexts.back().LongestObjCSelectorName;
340  if (Left->BlockParameterCount > 1)
341  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
342  }
343  next();
344  return true;
345  }
346  if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
347  return false;
348  if (CurrentToken->is(tok::colon)) {
349  if (Left->is(TT_ArraySubscriptLSquare)) {
350  Left->Type = TT_ObjCMethodExpr;
351  StartsObjCMethodExpr = true;
352  Contexts.back().ColonIsObjCMethodExpr = true;
353  if (Parent && Parent->is(tok::r_paren))
354  Parent->Type = TT_CastRParen;
355  }
356  ColonFound = true;
357  }
358  if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
359  !ColonFound)
360  Left->Type = TT_ArrayInitializerLSquare;
361  FormatToken *Tok = CurrentToken;
362  if (!consumeToken())
363  return false;
364  updateParameterCount(Left, Tok);
365  }
366  return false;
367  }
368 
369  bool parseBrace() {
370  if (CurrentToken) {
371  FormatToken *Left = CurrentToken->Previous;
372  Left->ParentBracket = Contexts.back().ContextKind;
373 
374  if (Contexts.back().CaretFound)
375  Left->Type = TT_ObjCBlockLBrace;
376  Contexts.back().CaretFound = false;
377 
378  ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
379  Contexts.back().ColonIsDictLiteral = true;
380  if (Left->BlockKind == BK_BracedInit)
381  Contexts.back().IsExpression = true;
382 
383  while (CurrentToken) {
384  if (CurrentToken->is(tok::r_brace)) {
385  Left->MatchingParen = CurrentToken;
386  CurrentToken->MatchingParen = Left;
387  next();
388  return true;
389  }
390  if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
391  return false;
392  updateParameterCount(Left, CurrentToken);
393  if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) {
394  FormatToken *Previous = CurrentToken->getPreviousNonComment();
395  if (((CurrentToken->is(tok::colon) &&
396  (!Contexts.back().ColonIsDictLiteral ||
397  Style.Language != FormatStyle::LK_Cpp)) ||
398  Style.Language == FormatStyle::LK_Proto) &&
399  Previous->Tok.getIdentifierInfo())
400  Previous->Type = TT_SelectorName;
401  if (CurrentToken->is(tok::colon) ||
402  Style.Language == FormatStyle::LK_JavaScript)
403  Left->Type = TT_DictLiteral;
404  }
405  if (!consumeToken())
406  return false;
407  }
408  }
409  return true;
410  }
411 
412  void updateParameterCount(FormatToken *Left, FormatToken *Current) {
413  if (Current->is(tok::l_brace) && !Current->is(TT_DictLiteral))
414  ++Left->BlockParameterCount;
415  if (Current->is(tok::comma)) {
416  ++Left->ParameterCount;
417  if (!Left->Role)
418  Left->Role.reset(new CommaSeparatedList(Style));
419  Left->Role->CommaFound(Current);
420  } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
421  Left->ParameterCount = 1;
422  }
423  }
424 
425  bool parseConditional() {
426  while (CurrentToken) {
427  if (CurrentToken->is(tok::colon)) {
428  CurrentToken->Type = TT_ConditionalExpr;
429  next();
430  return true;
431  }
432  if (!consumeToken())
433  return false;
434  }
435  return false;
436  }
437 
438  bool parseTemplateDeclaration() {
439  if (CurrentToken && CurrentToken->is(tok::less)) {
440  CurrentToken->Type = TT_TemplateOpener;
441  next();
442  if (!parseAngle())
443  return false;
444  if (CurrentToken)
445  CurrentToken->Previous->ClosesTemplateDeclaration = true;
446  return true;
447  }
448  return false;
449  }
450 
451  bool consumeToken() {
452  FormatToken *Tok = CurrentToken;
453  next();
454  switch (Tok->Tok.getKind()) {
455  case tok::plus:
456  case tok::minus:
457  if (!Tok->Previous && Line.MustBeDeclaration)
458  Tok->Type = TT_ObjCMethodSpecifier;
459  break;
460  case tok::colon:
461  if (!Tok->Previous)
462  return false;
463  // Colons from ?: are handled in parseConditional().
464  if (Style.Language == FormatStyle::LK_JavaScript) {
465  if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
466  (Contexts.size() == 1 && // switch/case labels
467  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
468  Contexts.back().ContextKind == tok::l_paren || // function params
469  Contexts.back().ContextKind == tok::l_square || // array type
470  (Contexts.size() == 1 &&
471  Line.MustBeDeclaration)) { // method/property declaration
472  Tok->Type = TT_JsTypeColon;
473  break;
474  }
475  }
476  if (Contexts.back().ColonIsDictLiteral ||
477  Style.Language == FormatStyle::LK_Proto) {
478  Tok->Type = TT_DictLiteral;
479  } else if (Contexts.back().ColonIsObjCMethodExpr ||
480  Line.startsWith(TT_ObjCMethodSpecifier)) {
481  Tok->Type = TT_ObjCMethodExpr;
482  Tok->Previous->Type = TT_SelectorName;
483  if (Tok->Previous->ColumnWidth >
484  Contexts.back().LongestObjCSelectorName)
485  Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth;
486  if (!Contexts.back().FirstObjCSelectorName)
487  Contexts.back().FirstObjCSelectorName = Tok->Previous;
488  } else if (Contexts.back().ColonIsForRangeExpr) {
489  Tok->Type = TT_RangeBasedForLoopColon;
490  } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
491  Tok->Type = TT_BitFieldColon;
492  } else if (Contexts.size() == 1 &&
493  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
494  if (Tok->Previous->is(tok::r_paren))
495  Tok->Type = TT_CtorInitializerColon;
496  else
497  Tok->Type = TT_InheritanceColon;
498  } else if (Tok->Previous->is(tok::identifier) && Tok->Next &&
499  Tok->Next->isOneOf(tok::r_paren, tok::comma)) {
500  // This handles a special macro in ObjC code where selectors including
501  // the colon are passed as macro arguments.
502  Tok->Type = TT_ObjCMethodExpr;
503  } else if (Contexts.back().ContextKind == tok::l_paren) {
504  Tok->Type = TT_InlineASMColon;
505  }
506  break;
507  case tok::kw_if:
508  case tok::kw_while:
509  if (CurrentToken && CurrentToken->is(tok::l_paren)) {
510  next();
511  if (!parseParens(/*LookForDecls=*/true))
512  return false;
513  }
514  break;
515  case tok::kw_for:
516  Contexts.back().ColonIsForRangeExpr = true;
517  next();
518  if (!parseParens())
519  return false;
520  break;
521  case tok::l_paren:
522  // When faced with 'operator()()', the kw_operator handler incorrectly
523  // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
524  // the first two parens OverloadedOperators and the second l_paren an
525  // OverloadedOperatorLParen.
526  if (Tok->Previous &&
527  Tok->Previous->is(tok::r_paren) &&
528  Tok->Previous->MatchingParen &&
529  Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
530  Tok->Previous->Type = TT_OverloadedOperator;
531  Tok->Previous->MatchingParen->Type = TT_OverloadedOperator;
532  Tok->Type = TT_OverloadedOperatorLParen;
533  }
534 
535  if (!parseParens())
536  return false;
537  if (Line.MustBeDeclaration && Contexts.size() == 1 &&
538  !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
539  (!Tok->Previous ||
540  !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute,
541  TT_LeadingJavaAnnotation)))
542  Line.MightBeFunctionDecl = true;
543  break;
544  case tok::l_square:
545  if (!parseSquare())
546  return false;
547  break;
548  case tok::l_brace:
549  if (!parseBrace())
550  return false;
551  break;
552  case tok::less:
553  if (!NonTemplateLess.count(Tok) &&
554  (!Tok->Previous ||
555  (!Tok->Previous->Tok.isLiteral() &&
556  !(Tok->Previous->is(tok::r_paren) && Contexts.size() > 1))) &&
557  parseAngle()) {
558  Tok->Type = TT_TemplateOpener;
559  } else {
560  Tok->Type = TT_BinaryOperator;
561  NonTemplateLess.insert(Tok);
562  CurrentToken = Tok;
563  next();
564  }
565  break;
566  case tok::r_paren:
567  case tok::r_square:
568  return false;
569  case tok::r_brace:
570  // Lines can start with '}'.
571  if (Tok->Previous)
572  return false;
573  break;
574  case tok::greater:
575  Tok->Type = TT_BinaryOperator;
576  break;
577  case tok::kw_operator:
578  while (CurrentToken &&
579  !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
580  if (CurrentToken->isOneOf(tok::star, tok::amp))
581  CurrentToken->Type = TT_PointerOrReference;
582  consumeToken();
583  if (CurrentToken &&
584  CurrentToken->Previous->isOneOf(TT_BinaryOperator, tok::comma))
585  CurrentToken->Previous->Type = TT_OverloadedOperator;
586  }
587  if (CurrentToken) {
588  CurrentToken->Type = TT_OverloadedOperatorLParen;
589  if (CurrentToken->Previous->is(TT_BinaryOperator))
590  CurrentToken->Previous->Type = TT_OverloadedOperator;
591  }
592  break;
593  case tok::question:
594  if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
595  Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
596  tok::r_brace)) {
597  // Question marks before semicolons, colons, etc. indicate optional
598  // types (fields, parameters), e.g.
599  // function(x?: string, y?) {...}
600  // class X { y?; }
601  Tok->Type = TT_JsTypeOptionalQuestion;
602  break;
603  }
604  // Declarations cannot be conditional expressions, this can only be part
605  // of a type declaration.
606  if (Line.MustBeDeclaration &&
607  Style.Language == FormatStyle::LK_JavaScript)
608  break;
609  parseConditional();
610  break;
611  case tok::kw_template:
612  parseTemplateDeclaration();
613  break;
614  case tok::comma:
615  if (Contexts.back().InCtorInitializer)
616  Tok->Type = TT_CtorInitializerComma;
617  else if (Contexts.back().FirstStartOfName &&
618  (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
619  Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
620  Line.IsMultiVariableDeclStmt = true;
621  }
622  if (Contexts.back().IsForEachMacro)
623  Contexts.back().IsExpression = true;
624  break;
625  default:
626  break;
627  }
628  return true;
629  }
630 
631  void parseIncludeDirective() {
632  if (CurrentToken && CurrentToken->is(tok::less)) {
633  next();
634  while (CurrentToken) {
635  if (CurrentToken->isNot(tok::comment) || CurrentToken->Next)
636  CurrentToken->Type = TT_ImplicitStringLiteral;
637  next();
638  }
639  }
640  }
641 
642  void parseWarningOrError() {
643  next();
644  // We still want to format the whitespace left of the first token of the
645  // warning or error.
646  next();
647  while (CurrentToken) {
648  CurrentToken->Type = TT_ImplicitStringLiteral;
649  next();
650  }
651  }
652 
653  void parsePragma() {
654  next(); // Consume "pragma".
655  if (CurrentToken &&
656  CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
657  bool IsMark = CurrentToken->is(Keywords.kw_mark);
658  next(); // Consume "mark".
659  next(); // Consume first token (so we fix leading whitespace).
660  while (CurrentToken) {
661  if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
662  CurrentToken->Type = TT_ImplicitStringLiteral;
663  next();
664  }
665  }
666  }
667 
668  LineType parsePreprocessorDirective() {
670  next();
671  if (!CurrentToken)
672  return Type;
673  if (CurrentToken->Tok.is(tok::numeric_constant)) {
674  CurrentToken->SpacesRequiredBefore = 1;
675  return Type;
676  }
677  // Hashes in the middle of a line can lead to any strange token
678  // sequence.
679  if (!CurrentToken->Tok.getIdentifierInfo())
680  return Type;
681  switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
682  case tok::pp_include:
683  case tok::pp_include_next:
684  case tok::pp_import:
685  next();
686  parseIncludeDirective();
687  Type = LT_ImportStatement;
688  break;
689  case tok::pp_error:
690  case tok::pp_warning:
691  parseWarningOrError();
692  break;
693  case tok::pp_pragma:
694  parsePragma();
695  break;
696  case tok::pp_if:
697  case tok::pp_elif:
698  Contexts.back().IsExpression = true;
699  parseLine();
700  break;
701  default:
702  break;
703  }
704  while (CurrentToken)
705  next();
706  return Type;
707  }
708 
709 public:
710  LineType parseLine() {
711  NonTemplateLess.clear();
712  if (CurrentToken->is(tok::hash))
713  return parsePreprocessorDirective();
714 
715  // Directly allow to 'import <string-literal>' to support protocol buffer
716  // definitions (code.google.com/p/protobuf) or missing "#" (either way we
717  // should not break the line).
718  IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
719  if ((Style.Language == FormatStyle::LK_Java &&
720  CurrentToken->is(Keywords.kw_package)) ||
721  (Info && Info->getPPKeywordID() == tok::pp_import &&
722  CurrentToken->Next &&
723  CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
724  tok::kw_static))) {
725  next();
726  parseIncludeDirective();
727  return LT_ImportStatement;
728  }
729 
730  // If this line starts and ends in '<' and '>', respectively, it is likely
731  // part of "#define <a/b.h>".
732  if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
733  parseIncludeDirective();
734  return LT_ImportStatement;
735  }
736 
737  // In .proto files, top-level options are very similar to import statements
738  // and should not be line-wrapped.
739  if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
740  CurrentToken->is(Keywords.kw_option)) {
741  next();
742  if (CurrentToken && CurrentToken->is(tok::identifier))
743  return LT_ImportStatement;
744  }
745 
746  bool KeywordVirtualFound = false;
747  bool ImportStatement = false;
748  while (CurrentToken) {
749  if (CurrentToken->is(tok::kw_virtual))
750  KeywordVirtualFound = true;
751  if (isImportStatement(*CurrentToken))
752  ImportStatement = true;
753  if (!consumeToken())
754  return LT_Invalid;
755  }
756  if (KeywordVirtualFound)
757  return LT_VirtualFunctionDecl;
758  if (ImportStatement)
759  return LT_ImportStatement;
760 
761  if (Line.startsWith(TT_ObjCMethodSpecifier)) {
762  if (Contexts.back().FirstObjCSelectorName)
763  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
764  Contexts.back().LongestObjCSelectorName;
765  return LT_ObjCMethodDecl;
766  }
767 
768  return LT_Other;
769  }
770 
771 private:
772  bool isImportStatement(const FormatToken &Tok) {
773  // FIXME: Closure-library specific stuff should not be hard-coded but be
774  // configurable.
775  return Style.Language == FormatStyle::LK_JavaScript &&
776  Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
777  Tok.Next->Next && (Tok.Next->Next->TokenText == "module" ||
778  Tok.Next->Next->TokenText == "provide" ||
779  Tok.Next->Next->TokenText == "require" ||
780  Tok.Next->Next->TokenText == "setTestOnly") &&
781  Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
782  }
783 
784  void resetTokenMetadata(FormatToken *Token) {
785  if (!Token)
786  return;
787 
788  // Reset token type in case we have already looked at it and then
789  // recovered from an error (e.g. failure to find the matching >).
790  if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
791  TT_FunctionLBrace, TT_ImplicitStringLiteral,
792  TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
793  TT_RegexLiteral))
794  CurrentToken->Type = TT_Unknown;
795  CurrentToken->Role.reset();
796  CurrentToken->MatchingParen = nullptr;
797  CurrentToken->FakeLParens.clear();
798  CurrentToken->FakeRParens = 0;
799  }
800 
801  void next() {
802  if (CurrentToken) {
803  CurrentToken->NestingLevel = Contexts.size() - 1;
804  CurrentToken->BindingStrength = Contexts.back().BindingStrength;
805  modifyContext(*CurrentToken);
806  determineTokenType(*CurrentToken);
807  CurrentToken = CurrentToken->Next;
808  }
809 
810  resetTokenMetadata(CurrentToken);
811  }
812 
813  /// \brief A struct to hold information valid in a specific context, e.g.
814  /// a pair of parenthesis.
815  struct Context {
817  bool IsExpression)
818  : ContextKind(ContextKind), BindingStrength(BindingStrength),
819  IsExpression(IsExpression) {}
820 
822  unsigned BindingStrength;
825  bool ColonIsForRangeExpr = false;
826  bool ColonIsDictLiteral = false;
827  bool ColonIsObjCMethodExpr = false;
828  FormatToken *FirstObjCSelectorName = nullptr;
829  FormatToken *FirstStartOfName = nullptr;
830  bool CanBeExpression = true;
831  bool InTemplateArgument = false;
832  bool InCtorInitializer = false;
833  bool CaretFound = false;
834  bool IsForEachMacro = false;
835  };
836 
837  /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
838  /// of each instance.
839  struct ScopedContextCreator {
840  AnnotatingParser &P;
841 
842  ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
843  unsigned Increase)
844  : P(P) {
845  P.Contexts.push_back(Context(ContextKind,
846  P.Contexts.back().BindingStrength + Increase,
847  P.Contexts.back().IsExpression));
848  }
849 
850  ~ScopedContextCreator() { P.Contexts.pop_back(); }
851  };
852 
853  void modifyContext(const FormatToken &Current) {
854  if (Current.getPrecedence() == prec::Assignment &&
855  !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
856  (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
857  Contexts.back().IsExpression = true;
858  if (!Line.startsWith(TT_UnaryOperator)) {
859  for (FormatToken *Previous = Current.Previous;
860  Previous && Previous->Previous &&
861  !Previous->Previous->isOneOf(tok::comma, tok::semi);
862  Previous = Previous->Previous) {
863  if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
864  Previous = Previous->MatchingParen;
865  if (!Previous)
866  break;
867  }
868  if (Previous->opensScope())
869  break;
870  if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
871  Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
872  Previous->Previous && Previous->Previous->isNot(tok::equal))
873  Previous->Type = TT_PointerOrReference;
874  }
875  }
876  } else if (Current.is(tok::lessless) &&
877  (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
878  Contexts.back().IsExpression = true;
879  } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
880  Contexts.back().IsExpression = true;
881  } else if (Current.is(TT_TrailingReturnArrow)) {
882  Contexts.back().IsExpression = false;
883  } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
884  Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
885  } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
886  for (FormatToken *Previous = Current.Previous;
887  Previous && Previous->isOneOf(tok::star, tok::amp);
888  Previous = Previous->Previous)
889  Previous->Type = TT_PointerOrReference;
890  if (Line.MustBeDeclaration)
891  Contexts.back().IsExpression = Contexts.front().InCtorInitializer;
892  } else if (Current.Previous &&
893  Current.Previous->is(TT_CtorInitializerColon)) {
894  Contexts.back().IsExpression = true;
895  Contexts.back().InCtorInitializer = true;
896  } else if (Current.is(tok::kw_new)) {
897  Contexts.back().CanBeExpression = false;
898  } else if (Current.isOneOf(tok::semi, tok::exclaim)) {
899  // This should be the condition or increment in a for-loop.
900  Contexts.back().IsExpression = true;
901  }
902  }
903 
904  void determineTokenType(FormatToken &Current) {
905  if (!Current.is(TT_Unknown))
906  // The token type is already known.
907  return;
908 
909  // Line.MightBeFunctionDecl can only be true after the parentheses of a
910  // function declaration have been found. In this case, 'Current' is a
911  // trailing token of this declaration and thus cannot be a name.
912  if (Current.is(Keywords.kw_instanceof)) {
913  Current.Type = TT_BinaryOperator;
914  } else if (isStartOfName(Current) &&
915  (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
916  Contexts.back().FirstStartOfName = &Current;
917  Current.Type = TT_StartOfName;
918  } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
919  AutoFound = true;
920  } else if (Current.is(tok::arrow) &&
921  Style.Language == FormatStyle::LK_Java) {
922  Current.Type = TT_LambdaArrow;
923  } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration &&
924  Current.NestingLevel == 0) {
925  Current.Type = TT_TrailingReturnArrow;
926  } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
927  Current.Type =
928  determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
929  Contexts.back().IsExpression,
930  Contexts.back().InTemplateArgument);
931  } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
932  Current.Type = determinePlusMinusCaretUsage(Current);
933  if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
934  Contexts.back().CaretFound = true;
935  } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
936  Current.Type = determineIncrementUsage(Current);
937  } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
938  Current.Type = TT_UnaryOperator;
939  } else if (Current.is(tok::question)) {
940  if (Style.Language == FormatStyle::LK_JavaScript &&
941  Line.MustBeDeclaration) {
942  // In JavaScript, `interface X { foo?(): bar; }` is an optional method
943  // on the interface, not a ternary expression.
944  Current.Type = TT_JsTypeOptionalQuestion;
945  } else {
946  Current.Type = TT_ConditionalExpr;
947  }
948  } else if (Current.isBinaryOperator() &&
949  (!Current.Previous || Current.Previous->isNot(tok::l_square))) {
950  Current.Type = TT_BinaryOperator;
951  } else if (Current.is(tok::comment)) {
952  if (Current.TokenText.startswith("/*")) {
953  if (Current.TokenText.endswith("*/"))
954  Current.Type = TT_BlockComment;
955  else
956  // The lexer has for some reason determined a comment here. But we
957  // cannot really handle it, if it isn't properly terminated.
958  Current.Tok.setKind(tok::unknown);
959  } else {
960  Current.Type = TT_LineComment;
961  }
962  } else if (Current.is(tok::r_paren)) {
963  if (rParenEndsCast(Current))
964  Current.Type = TT_CastRParen;
965  if (Current.MatchingParen && Current.Next &&
966  !Current.Next->isBinaryOperator() &&
967  !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace))
968  if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
969  if (BeforeParen->is(tok::identifier) &&
970  BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
971  (!BeforeParen->Previous ||
972  BeforeParen->Previous->ClosesTemplateDeclaration))
973  Current.Type = TT_FunctionAnnotationRParen;
974  } else if (Current.is(tok::at) && Current.Next) {
975  if (Current.Next->isStringLiteral()) {
976  Current.Type = TT_ObjCStringLiteral;
977  } else {
978  switch (Current.Next->Tok.getObjCKeywordID()) {
979  case tok::objc_interface:
980  case tok::objc_implementation:
981  case tok::objc_protocol:
982  Current.Type = TT_ObjCDecl;
983  break;
984  case tok::objc_property:
985  Current.Type = TT_ObjCProperty;
986  break;
987  default:
988  break;
989  }
990  }
991  } else if (Current.is(tok::period)) {
992  FormatToken *PreviousNoComment = Current.getPreviousNonComment();
993  if (PreviousNoComment &&
994  PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
995  Current.Type = TT_DesignatedInitializerPeriod;
996  else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
997  Current.Previous->isOneOf(TT_JavaAnnotation,
998  TT_LeadingJavaAnnotation)) {
999  Current.Type = Current.Previous->Type;
1000  }
1001  } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
1002  Current.Previous &&
1003  !Current.Previous->isOneOf(tok::equal, tok::at) &&
1004  Line.MightBeFunctionDecl && Contexts.size() == 1) {
1005  // Line.MightBeFunctionDecl can only be true after the parentheses of a
1006  // function declaration have been found.
1007  Current.Type = TT_TrailingAnnotation;
1008  } else if ((Style.Language == FormatStyle::LK_Java ||
1009  Style.Language == FormatStyle::LK_JavaScript) &&
1010  Current.Previous) {
1011  if (Current.Previous->is(tok::at) &&
1012  Current.isNot(Keywords.kw_interface)) {
1013  const FormatToken &AtToken = *Current.Previous;
1014  const FormatToken *Previous = AtToken.getPreviousNonComment();
1015  if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
1016  Current.Type = TT_LeadingJavaAnnotation;
1017  else
1018  Current.Type = TT_JavaAnnotation;
1019  } else if (Current.Previous->is(tok::period) &&
1020  Current.Previous->isOneOf(TT_JavaAnnotation,
1021  TT_LeadingJavaAnnotation)) {
1022  Current.Type = Current.Previous->Type;
1023  }
1024  }
1025  }
1026 
1027  /// \brief Take a guess at whether \p Tok starts a name of a function or
1028  /// variable declaration.
1029  ///
1030  /// This is a heuristic based on whether \p Tok is an identifier following
1031  /// something that is likely a type.
1032  bool isStartOfName(const FormatToken &Tok) {
1033  if (Tok.isNot(tok::identifier) || !Tok.Previous)
1034  return false;
1035 
1036  if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof))
1037  return false;
1038 
1039  // Skip "const" as it does not have an influence on whether this is a name.
1040  FormatToken *PreviousNotConst = Tok.Previous;
1041  while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
1042  PreviousNotConst = PreviousNotConst->Previous;
1043 
1044  if (!PreviousNotConst)
1045  return false;
1046 
1047  bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
1048  PreviousNotConst->Previous &&
1049  PreviousNotConst->Previous->is(tok::hash);
1050 
1051  if (PreviousNotConst->is(TT_TemplateCloser))
1052  return PreviousNotConst && PreviousNotConst->MatchingParen &&
1053  PreviousNotConst->MatchingParen->Previous &&
1054  PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
1055  PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
1056 
1057  if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen &&
1058  PreviousNotConst->MatchingParen->Previous &&
1059  PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
1060  return true;
1061 
1062  return (!IsPPKeyword &&
1063  PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) ||
1064  PreviousNotConst->is(TT_PointerOrReference) ||
1065  PreviousNotConst->isSimpleTypeSpecifier();
1066  }
1067 
1068  /// \brief Determine whether ')' is ending a cast.
1069  bool rParenEndsCast(const FormatToken &Tok) {
1070  // C-style casts are only used in C++ and Java.
1071  if (Style.Language != FormatStyle::LK_Cpp &&
1072  Style.Language != FormatStyle::LK_Java)
1073  return false;
1074 
1075  // Empty parens aren't casts and there are no casts at the end of the line.
1076  if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
1077  return false;
1078 
1079  FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
1080  if (LeftOfParens) {
1081  // If there is an opening parenthesis left of the current parentheses,
1082  // look past it as these might be chained casts.
1083  if (LeftOfParens->is(tok::r_paren)) {
1084  if (!LeftOfParens->MatchingParen ||
1085  !LeftOfParens->MatchingParen->Previous)
1086  return false;
1087  LeftOfParens = LeftOfParens->MatchingParen->Previous;
1088  }
1089 
1090  // If there is an identifier (or with a few exceptions a keyword) right
1091  // before the parentheses, this is unlikely to be a cast.
1092  if (LeftOfParens->Tok.getIdentifierInfo() &&
1093  !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
1094  tok::kw_delete))
1095  return false;
1096 
1097  // Certain other tokens right before the parentheses are also signals that
1098  // this cannot be a cast.
1099  if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
1100  TT_TemplateCloser))
1101  return false;
1102  }
1103 
1104  if (Tok.Next->is(tok::question))
1105  return false;
1106 
1107  // As Java has no function types, a "(" after the ")" likely means that this
1108  // is a cast.
1109  if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
1110  return true;
1111 
1112  // If a (non-string) literal follows, this is likely a cast.
1113  if (Tok.Next->isNot(tok::string_literal) &&
1114  (Tok.Next->Tok.isLiteral() ||
1115  Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
1116  return true;
1117 
1118  // Heuristically try to determine whether the parentheses contain a type.
1119  bool ParensAreType =
1120  !Tok.Previous ||
1121  Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) ||
1122  Tok.Previous->isSimpleTypeSpecifier();
1123  bool ParensCouldEndDecl =
1124  Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
1125  if (ParensAreType && !ParensCouldEndDecl)
1126  return true;
1127 
1128  // At this point, we heuristically assume that there are no casts at the
1129  // start of the line. We assume that we have found most cases where there
1130  // are by the logic above, e.g. "(void)x;".
1131  if (!LeftOfParens)
1132  return false;
1133 
1134  // If the following token is an identifier, this is a cast. All cases where
1135  // this can be something else are handled above.
1136  if (Tok.Next->is(tok::identifier))
1137  return true;
1138 
1139  if (!Tok.Next->Next)
1140  return false;
1141 
1142  // If the next token after the parenthesis is a unary operator, assume
1143  // that this is cast, unless there are unexpected tokens inside the
1144  // parenthesis.
1145  bool NextIsUnary =
1146  Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
1147  if (!NextIsUnary || Tok.Next->is(tok::plus) ||
1148  !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant))
1149  return false;
1150  // Search for unexpected tokens.
1151  for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
1152  Prev = Prev->Previous) {
1153  if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
1154  return false;
1155  }
1156  return true;
1157  }
1158 
1159  /// \brief Return the type of the given token assuming it is * or &.
1160  TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
1161  bool InTemplateArgument) {
1162  if (Style.Language == FormatStyle::LK_JavaScript)
1163  return TT_BinaryOperator;
1164 
1165  const FormatToken *PrevToken = Tok.getPreviousNonComment();
1166  if (!PrevToken)
1167  return TT_UnaryOperator;
1168 
1169  const FormatToken *NextToken = Tok.getNextNonComment();
1170  if (!NextToken ||
1171  NextToken->isOneOf(tok::arrow, Keywords.kw_final,
1172  Keywords.kw_override) ||
1173  (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
1174  return TT_PointerOrReference;
1175 
1176  if (PrevToken->is(tok::coloncolon))
1177  return TT_PointerOrReference;
1178 
1179  if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
1180  tok::comma, tok::semi, tok::kw_return, tok::colon,
1181  tok::equal, tok::kw_delete, tok::kw_sizeof) ||
1182  PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
1183  TT_UnaryOperator, TT_CastRParen))
1184  return TT_UnaryOperator;
1185 
1186  if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
1187  return TT_PointerOrReference;
1188  if (NextToken->is(tok::kw_operator) && !IsExpression)
1189  return TT_PointerOrReference;
1190  if (NextToken->isOneOf(tok::comma, tok::semi))
1191  return TT_PointerOrReference;
1192 
1193  if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
1194  PrevToken->MatchingParen->Previous &&
1195  PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof,
1196  tok::kw_decltype))
1197  return TT_PointerOrReference;
1198 
1199  if (PrevToken->Tok.isLiteral() ||
1200  PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
1201  tok::kw_false, tok::r_brace) ||
1202  NextToken->Tok.isLiteral() ||
1203  NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
1204  NextToken->isUnaryOperator() ||
1205  // If we know we're in a template argument, there are no named
1206  // declarations. Thus, having an identifier on the right-hand side
1207  // indicates a binary operator.
1208  (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
1209  return TT_BinaryOperator;
1210 
1211  // "&&(" is quite unlikely to be two successive unary "&".
1212  if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren))
1213  return TT_BinaryOperator;
1214 
1215  // This catches some cases where evaluation order is used as control flow:
1216  // aaa && aaa->f();
1217  const FormatToken *NextNextToken = NextToken->getNextNonComment();
1218  if (NextNextToken && NextNextToken->is(tok::arrow))
1219  return TT_BinaryOperator;
1220 
1221  // It is very unlikely that we are going to find a pointer or reference type
1222  // definition on the RHS of an assignment.
1223  if (IsExpression && !Contexts.back().CaretFound)
1224  return TT_BinaryOperator;
1225 
1226  return TT_PointerOrReference;
1227  }
1228 
1229  TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
1230  const FormatToken *PrevToken = Tok.getPreviousNonComment();
1231  if (!PrevToken || PrevToken->is(TT_CastRParen))
1232  return TT_UnaryOperator;
1233 
1234  // Use heuristics to recognize unary operators.
1235  if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
1236  tok::question, tok::colon, tok::kw_return,
1237  tok::kw_case, tok::at, tok::l_brace))
1238  return TT_UnaryOperator;
1239 
1240  // There can't be two consecutive binary operators.
1241  if (PrevToken->is(TT_BinaryOperator))
1242  return TT_UnaryOperator;
1243 
1244  // Fall back to marking the token as binary operator.
1245  return TT_BinaryOperator;
1246  }
1247 
1248  /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
1249  TokenType determineIncrementUsage(const FormatToken &Tok) {
1250  const FormatToken *PrevToken = Tok.getPreviousNonComment();
1251  if (!PrevToken || PrevToken->is(TT_CastRParen))
1252  return TT_UnaryOperator;
1253  if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
1254  return TT_TrailingUnaryOperator;
1255 
1256  return TT_UnaryOperator;
1257  }
1258 
1259  SmallVector<Context, 8> Contexts;
1260 
1261  const FormatStyle &Style;
1262  AnnotatedLine &Line;
1263  FormatToken *CurrentToken;
1265  const AdditionalKeywords &Keywords;
1266 
1267  // Set of "<" tokens that do not open a template parameter list. If parseAngle
1268  // determines that a specific token can't be a template opener, it will make
1269  // same decision irrespective of the decisions for tokens leading up to it.
1270  // Store this information to prevent this from causing exponential runtime.
1271  llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
1272 };
1273 
1274 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
1275 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
1276 
1277 /// \brief Parses binary expressions by inserting fake parenthesis based on
1278 /// operator precedence.
1279 class ExpressionParser {
1280 public:
1281  ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
1282  AnnotatedLine &Line)
1283  : Style(Style), Keywords(Keywords), Current(Line.First) {}
1284 
1285  /// \brief Parse expressions with the given operatore precedence.
1286  void parse(int Precedence = 0) {
1287  // Skip 'return' and ObjC selector colons as they are not part of a binary
1288  // expression.
1289  while (Current && (Current->is(tok::kw_return) ||
1290  (Current->is(tok::colon) &&
1291  Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))))
1292  next();
1293 
1294  if (!Current || Precedence > PrecedenceArrowAndPeriod)
1295  return;
1296 
1297  // Conditional expressions need to be parsed separately for proper nesting.
1298  if (Precedence == prec::Conditional) {
1299  parseConditionalExpr();
1300  return;
1301  }
1302 
1303  // Parse unary operators, which all have a higher precedence than binary
1304  // operators.
1305  if (Precedence == PrecedenceUnaryOperator) {
1306  parseUnaryOperator();
1307  return;
1308  }
1309 
1310  FormatToken *Start = Current;
1311  FormatToken *LatestOperator = nullptr;
1312  unsigned OperatorIndex = 0;
1313 
1314  while (Current) {
1315  // Consume operators with higher precedence.
1316  parse(Precedence + 1);
1317 
1318  int CurrentPrecedence = getCurrentPrecedence();
1319 
1320  if (Current && Current->is(TT_SelectorName) &&
1321  Precedence == CurrentPrecedence) {
1322  if (LatestOperator)
1323  addFakeParenthesis(Start, prec::Level(Precedence));
1324  Start = Current;
1325  }
1326 
1327  // At the end of the line or when an operator with higher precedence is
1328  // found, insert fake parenthesis and return.
1329  if (!Current || (Current->closesScope() && Current->MatchingParen) ||
1330  (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
1331  (CurrentPrecedence == prec::Conditional &&
1332  Precedence == prec::Assignment && Current->is(tok::colon))) {
1333  break;
1334  }
1335 
1336  // Consume scopes: (), [], <> and {}
1337  if (Current->opensScope()) {
1338  while (Current && !Current->closesScope()) {
1339  next();
1340  parse();
1341  }
1342  next();
1343  } else {
1344  // Operator found.
1345  if (CurrentPrecedence == Precedence) {
1346  if (LatestOperator)
1347  LatestOperator->NextOperator = Current;
1348  LatestOperator = Current;
1349  Current->OperatorIndex = OperatorIndex;
1350  ++OperatorIndex;
1351  }
1352  next(/*SkipPastLeadingComments=*/Precedence > 0);
1353  }
1354  }
1355 
1356  if (LatestOperator && (Current || Precedence > 0)) {
1357  // LatestOperator->LastOperator = true;
1358  if (Precedence == PrecedenceArrowAndPeriod) {
1359  // Call expressions don't have a binary operator precedence.
1360  addFakeParenthesis(Start, prec::Unknown);
1361  } else {
1362  addFakeParenthesis(Start, prec::Level(Precedence));
1363  }
1364  }
1365  }
1366 
1367 private:
1368  /// \brief Gets the precedence (+1) of the given token for binary operators
1369  /// and other tokens that we treat like binary operators.
1370  int getCurrentPrecedence() {
1371  if (Current) {
1372  const FormatToken *NextNonComment = Current->getNextNonComment();
1373  if (Current->is(TT_ConditionalExpr))
1374  return prec::Conditional;
1375  if (NextNonComment && NextNonComment->is(tok::colon) &&
1376  NextNonComment->is(TT_DictLiteral))
1377  return prec::Comma;
1378  if (Current->is(TT_LambdaArrow))
1379  return prec::Comma;
1380  if (Current->is(TT_JsFatArrow))
1381  return prec::Assignment;
1382  if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName,
1383  TT_JsComputedPropertyName) ||
1384  (Current->is(tok::comment) && NextNonComment &&
1385  NextNonComment->is(TT_SelectorName)))
1386  return 0;
1387  if (Current->is(TT_RangeBasedForLoopColon))
1388  return prec::Comma;
1389  if ((Style.Language == FormatStyle::LK_Java ||
1390  Style.Language == FormatStyle::LK_JavaScript) &&
1391  Current->is(Keywords.kw_instanceof))
1392  return prec::Relational;
1393  if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
1394  return Current->getPrecedence();
1395  if (Current->isOneOf(tok::period, tok::arrow))
1396  return PrecedenceArrowAndPeriod;
1397  if (Style.Language == FormatStyle::LK_Java &&
1398  Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
1399  Keywords.kw_throws))
1400  return 0;
1401  }
1402  return -1;
1403  }
1404 
1405  void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
1406  Start->FakeLParens.push_back(Precedence);
1407  if (Precedence > prec::Unknown)
1408  Start->StartsBinaryExpression = true;
1409  if (Current) {
1410  FormatToken *Previous = Current->Previous;
1411  while (Previous->is(tok::comment) && Previous->Previous)
1412  Previous = Previous->Previous;
1413  ++Previous->FakeRParens;
1414  if (Precedence > prec::Unknown)
1415  Previous->EndsBinaryExpression = true;
1416  }
1417  }
1418 
1419  /// \brief Parse unary operator expressions and surround them with fake
1420  /// parentheses if appropriate.
1421  void parseUnaryOperator() {
1422  if (!Current || Current->isNot(TT_UnaryOperator)) {
1423  parse(PrecedenceArrowAndPeriod);
1424  return;
1425  }
1426 
1427  FormatToken *Start = Current;
1428  next();
1429  parseUnaryOperator();
1430 
1431  // The actual precedence doesn't matter.
1432  addFakeParenthesis(Start, prec::Unknown);
1433  }
1434 
1435  void parseConditionalExpr() {
1436  while (Current && Current->isTrailingComment()) {
1437  next();
1438  }
1439  FormatToken *Start = Current;
1440  parse(prec::LogicalOr);
1441  if (!Current || !Current->is(tok::question))
1442  return;
1443  next();
1444  parse(prec::Assignment);
1445  if (!Current || Current->isNot(TT_ConditionalExpr))
1446  return;
1447  next();
1448  parse(prec::Assignment);
1449  addFakeParenthesis(Start, prec::Conditional);
1450  }
1451 
1452  void next(bool SkipPastLeadingComments = true) {
1453  if (Current)
1454  Current = Current->Next;
1455  while (Current &&
1456  (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
1457  Current->isTrailingComment())
1458  Current = Current->Next;
1459  }
1460 
1461  const FormatStyle &Style;
1462  const AdditionalKeywords &Keywords;
1463  FormatToken *Current;
1464 };
1465 
1466 } // end anonymous namespace
1467 
1470  const AnnotatedLine *NextNonCommentLine = nullptr;
1472  E = Lines.rend();
1473  I != E; ++I) {
1474  if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
1475  (*I)->First->Next == nullptr)
1476  (*I)->Level = NextNonCommentLine->Level;
1477  else
1478  NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
1479 
1480  setCommentLineLevels((*I)->Children);
1481  }
1482 }
1483 
1486  E = Line.Children.end();
1487  I != E; ++I) {
1488  annotate(**I);
1489  }
1490  AnnotatingParser Parser(Style, Line, Keywords);
1491  Line.Type = Parser.parseLine();
1492  if (Line.Type == LT_Invalid)
1493  return;
1494 
1495  ExpressionParser ExprParser(Style, Keywords, Line);
1496  ExprParser.parse();
1497 
1498  if (Line.startsWith(TT_ObjCMethodSpecifier))
1499  Line.Type = LT_ObjCMethodDecl;
1500  else if (Line.startsWith(TT_ObjCDecl))
1501  Line.Type = LT_ObjCDecl;
1502  else if (Line.startsWith(TT_ObjCProperty))
1503  Line.Type = LT_ObjCProperty;
1504 
1505  Line.First->SpacesRequiredBefore = 1;
1506  Line.First->CanBreakBefore = Line.First->MustBreakBefore;
1507 }
1508 
1509 // This function heuristically determines whether 'Current' starts the name of a
1510 // function declaration.
1511 static bool isFunctionDeclarationName(const FormatToken &Current) {
1512  auto skipOperatorName = [](const FormatToken* Next) -> const FormatToken* {
1513  for (; Next; Next = Next->Next) {
1514  if (Next->is(TT_OverloadedOperatorLParen))
1515  return Next;
1516  if (Next->is(TT_OverloadedOperator))
1517  continue;
1518  if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
1519  // For 'new[]' and 'delete[]'.
1520  if (Next->Next && Next->Next->is(tok::l_square) &&
1521  Next->Next->Next && Next->Next->Next->is(tok::r_square))
1522  Next = Next->Next->Next;
1523  continue;
1524  }
1525 
1526  break;
1527  }
1528  return nullptr;
1529  };
1530 
1531  const FormatToken *Next = Current.Next;
1532  if (Current.is(tok::kw_operator)) {
1533  if (Current.Previous && Current.Previous->is(tok::coloncolon))
1534  return false;
1535  Next = skipOperatorName(Next);
1536  } else {
1537  if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
1538  return false;
1539  for (; Next; Next = Next->Next) {
1540  if (Next->is(TT_TemplateOpener)) {
1541  Next = Next->MatchingParen;
1542  } else if (Next->is(tok::coloncolon)) {
1543  Next = Next->Next;
1544  if (!Next)
1545  return false;
1546  if (Next->is(tok::kw_operator)) {
1547  Next = skipOperatorName(Next->Next);
1548  break;
1549  }
1550  if (!Next->is(tok::identifier))
1551  return false;
1552  } else if (Next->is(tok::l_paren)) {
1553  break;
1554  } else {
1555  return false;
1556  }
1557  }
1558  }
1559 
1560  if (!Next || !Next->is(tok::l_paren))
1561  return false;
1562  if (Next->Next == Next->MatchingParen)
1563  return true;
1564  for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
1565  Tok = Tok->Next) {
1566  if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
1567  Tok->isOneOf(TT_PointerOrReference, TT_StartOfName))
1568  return true;
1569  if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
1570  Tok->Tok.isLiteral())
1571  return false;
1572  }
1573  return false;
1574 }
1575 
1576 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
1577  assert(Line.MightBeFunctionDecl);
1578 
1579  if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
1580  Style.AlwaysBreakAfterReturnType ==
1582  Line.Level > 0)
1583  return false;
1584 
1585  switch (Style.AlwaysBreakAfterReturnType) {
1587  return false;
1588  case FormatStyle::RTBS_All:
1590  return true;
1593  return Line.mightBeFunctionDefinition();
1594  }
1595 
1596  return false;
1597 }
1598 
1601  E = Line.Children.end();
1602  I != E; ++I) {
1604  }
1605 
1606  Line.First->TotalLength =
1607  Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
1608  if (!Line.First->Next)
1609  return;
1610  FormatToken *Current = Line.First->Next;
1611  bool InFunctionDecl = Line.MightBeFunctionDecl;
1612  while (Current) {
1613  if (isFunctionDeclarationName(*Current))
1614  Current->Type = TT_FunctionDeclarationName;
1615  if (Current->is(TT_LineComment)) {
1616  if (Current->Previous->BlockKind == BK_BracedInit &&
1617  Current->Previous->opensScope())
1618  Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
1619  else
1620  Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
1621 
1622  // If we find a trailing comment, iterate backwards to determine whether
1623  // it seems to relate to a specific parameter. If so, break before that
1624  // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
1625  // to the previous line in:
1626  // SomeFunction(a,
1627  // b, // comment
1628  // c);
1629  if (!Current->HasUnescapedNewline) {
1630  for (FormatToken *Parameter = Current->Previous; Parameter;
1631  Parameter = Parameter->Previous) {
1632  if (Parameter->isOneOf(tok::comment, tok::r_brace))
1633  break;
1634  if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
1635  if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
1636  Parameter->HasUnescapedNewline)
1637  Parameter->MustBreakBefore = true;
1638  break;
1639  }
1640  }
1641  }
1642  } else if (Current->SpacesRequiredBefore == 0 &&
1643  spaceRequiredBefore(Line, *Current)) {
1644  Current->SpacesRequiredBefore = 1;
1645  }
1646 
1647  Current->MustBreakBefore =
1648  Current->MustBreakBefore || mustBreakBefore(Line, *Current);
1649 
1650  if (!Current->MustBreakBefore && InFunctionDecl &&
1651  Current->is(TT_FunctionDeclarationName))
1652  Current->MustBreakBefore = mustBreakForReturnType(Line);
1653 
1654  Current->CanBreakBefore =
1655  Current->MustBreakBefore || canBreakBefore(Line, *Current);
1656  unsigned ChildSize = 0;
1657  if (Current->Previous->Children.size() == 1) {
1658  FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
1659  ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
1660  : LastOfChild.TotalLength + 1;
1661  }
1662  const FormatToken *Prev = Current->Previous;
1663  if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
1664  (Prev->Children.size() == 1 &&
1665  Prev->Children[0]->First->MustBreakBefore) ||
1666  Current->IsMultiline)
1667  Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
1668  else
1669  Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
1670  ChildSize + Current->SpacesRequiredBefore;
1671 
1672  if (Current->is(TT_CtorInitializerColon))
1673  InFunctionDecl = false;
1674 
1675  // FIXME: Only calculate this if CanBreakBefore is true once static
1676  // initializers etc. are sorted out.
1677  // FIXME: Move magic numbers to a better place.
1678  Current->SplitPenalty = 20 * Current->BindingStrength +
1679  splitPenalty(Line, *Current, InFunctionDecl);
1680 
1681  Current = Current->Next;
1682  }
1683 
1684  calculateUnbreakableTailLengths(Line);
1685  for (Current = Line.First; Current != nullptr; Current = Current->Next) {
1686  if (Current->Role)
1687  Current->Role->precomputeFormattingInfos(Current);
1688  }
1689 
1690  DEBUG({ printDebugInfo(Line); });
1691 }
1692 
1693 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
1694  unsigned UnbreakableTailLength = 0;
1695  FormatToken *Current = Line.Last;
1696  while (Current) {
1697  Current->UnbreakableTailLength = UnbreakableTailLength;
1698  if (Current->CanBreakBefore ||
1699  Current->isOneOf(tok::comment, tok::string_literal)) {
1700  UnbreakableTailLength = 0;
1701  } else {
1702  UnbreakableTailLength +=
1703  Current->ColumnWidth + Current->SpacesRequiredBefore;
1704  }
1705  Current = Current->Previous;
1706  }
1707 }
1708 
1709 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
1710  const FormatToken &Tok,
1711  bool InFunctionDecl) {
1712  const FormatToken &Left = *Tok.Previous;
1713  const FormatToken &Right = Tok;
1714 
1715  if (Left.is(tok::semi))
1716  return 0;
1717 
1718  if (Style.Language == FormatStyle::LK_Java) {
1719  if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
1720  return 1;
1721  if (Right.is(Keywords.kw_implements))
1722  return 2;
1723  if (Left.is(tok::comma) && Left.NestingLevel == 0)
1724  return 3;
1725  } else if (Style.Language == FormatStyle::LK_JavaScript) {
1726  if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
1727  return 100;
1728  if (Left.is(TT_JsTypeColon))
1729  return 35;
1730  }
1731 
1732  if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
1733  Right.Next->is(TT_DictLiteral)))
1734  return 1;
1735  if (Right.is(tok::l_square)) {
1736  if (Style.Language == FormatStyle::LK_Proto)
1737  return 1;
1738  if (Left.is(tok::r_square))
1739  return 25;
1740  // Slightly prefer formatting local lambda definitions like functions.
1741  if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
1742  return 35;
1743  if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
1744  TT_ArrayInitializerLSquare))
1745  return 500;
1746  }
1747 
1748  if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
1749  Right.is(tok::kw_operator)) {
1750  if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
1751  return 3;
1752  if (Left.is(TT_StartOfName))
1753  return 110;
1754  if (InFunctionDecl && Right.NestingLevel == 0)
1755  return Style.PenaltyReturnTypeOnItsOwnLine;
1756  return 200;
1757  }
1758  if (Right.is(TT_PointerOrReference))
1759  return 190;
1760  if (Right.is(TT_LambdaArrow))
1761  return 110;
1762  if (Left.is(tok::equal) && Right.is(tok::l_brace))
1763  return 150;
1764  if (Left.is(TT_CastRParen))
1765  return 100;
1766  if (Left.is(tok::coloncolon) ||
1767  (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
1768  return 500;
1769  if (Left.isOneOf(tok::kw_class, tok::kw_struct))
1770  return 5000;
1771 
1772  if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon))
1773  return 2;
1774 
1775  if (Right.isMemberAccess()) {
1776  // Breaking before the "./->" of a chained call/member access is reasonably
1777  // cheap, as formatting those with one call per line is generally
1778  // desirable. In particular, it should be cheaper to break before the call
1779  // than it is to break inside a call's parameters, which could lead to weird
1780  // "hanging" indents. The exception is the very last "./->" to support this
1781  // frequent pattern:
1782  //
1783  // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
1784  // dddddddd);
1785  //
1786  // which might otherwise be blown up onto many lines. Here, clang-format
1787  // won't produce "hanging" indents anyway as there is no other trailing
1788  // call.
1789  //
1790  // Also apply higher penalty is not a call as that might lead to a wrapping
1791  // like:
1792  //
1793  // aaaaaaa
1794  // .aaaaaaaaa.bbbbbbbb(cccccccc);
1795  return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
1796  ? 150
1797  : 35;
1798  }
1799 
1800  if (Right.is(TT_TrailingAnnotation) &&
1801  (!Right.Next || Right.Next->isNot(tok::l_paren))) {
1802  // Moving trailing annotations to the next line is fine for ObjC method
1803  // declarations.
1804  if (Line.startsWith(TT_ObjCMethodSpecifier))
1805  return 10;
1806  // Generally, breaking before a trailing annotation is bad unless it is
1807  // function-like. It seems to be especially preferable to keep standard
1808  // annotations (i.e. "const", "final" and "override") on the same line.
1809  // Use a slightly higher penalty after ")" so that annotations like
1810  // "const override" are kept together.
1811  bool is_short_annotation = Right.TokenText.size() < 10;
1812  return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
1813  }
1814 
1815  // In for-loops, prefer breaking at ',' and ';'.
1816  if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
1817  return 4;
1818 
1819  // In Objective-C method expressions, prefer breaking before "param:" over
1820  // breaking after it.
1821  if (Right.is(TT_SelectorName))
1822  return 0;
1823  if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
1824  return Line.MightBeFunctionDecl ? 50 : 500;
1825 
1826  if (Left.is(tok::l_paren) && InFunctionDecl &&
1827  Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
1828  return 100;
1829  if (Left.is(tok::l_paren) && Left.Previous &&
1830  Left.Previous->isOneOf(tok::kw_if, tok::kw_for))
1831  return 1000;
1832  if (Left.is(tok::equal) && InFunctionDecl)
1833  return 110;
1834  if (Right.is(tok::r_brace))
1835  return 1;
1836  if (Left.is(TT_TemplateOpener))
1837  return 100;
1838  if (Left.opensScope()) {
1839  if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
1840  return 0;
1841  return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
1842  : 19;
1843  }
1844  if (Left.is(TT_JavaAnnotation))
1845  return 50;
1846 
1847  if (Right.is(tok::lessless)) {
1848  if (Left.is(tok::string_literal) &&
1849  (Right.NextOperator || Right.OperatorIndex != 1)) {
1850  StringRef Content = Left.TokenText;
1851  if (Content.startswith("\""))
1852  Content = Content.drop_front(1);
1853  if (Content.endswith("\""))
1854  Content = Content.drop_back(1);
1855  Content = Content.trim();
1856  if (Content.size() > 1 &&
1857  (Content.back() == ':' || Content.back() == '='))
1858  return 25;
1859  }
1860  return 1; // Breaking at a << is really cheap.
1861  }
1862  if (Left.is(TT_ConditionalExpr))
1863  return prec::Conditional;
1864  prec::Level Level = Left.getPrecedence();
1865  if (Level != prec::Unknown)
1866  return Level;
1867  Level = Right.getPrecedence();
1868  if (Level != prec::Unknown)
1869  return Level;
1870 
1871  return 3;
1872 }
1873 
1874 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
1875  const FormatToken &Left,
1876  const FormatToken &Right) {
1877  if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
1878  return true;
1879  if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
1880  Left.Tok.getObjCKeywordID() == tok::objc_property)
1881  return true;
1882  if (Right.is(tok::hashhash))
1883  return Left.is(tok::hash);
1884  if (Left.isOneOf(tok::hashhash, tok::hash))
1885  return Right.is(tok::hash);
1886  if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
1887  return Style.SpaceInEmptyParentheses;
1888  if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
1889  return (Right.is(TT_CastRParen) ||
1890  (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
1891  ? Style.SpacesInCStyleCastParentheses
1892  : Style.SpacesInParentheses;
1893  if (Right.isOneOf(tok::semi, tok::comma))
1894  return false;
1895  if (Right.is(tok::less) &&
1896  (Left.is(tok::kw_template) ||
1897  (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1898  return true;
1899  if (Left.isOneOf(tok::exclaim, tok::tilde))
1900  return false;
1901  if (Left.is(tok::at) &&
1902  Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
1903  tok::numeric_constant, tok::l_paren, tok::l_brace,
1904  tok::kw_true, tok::kw_false))
1905  return false;
1906  if (Left.is(tok::colon))
1907  return !Left.is(TT_ObjCMethodExpr);
1908  if (Left.is(tok::coloncolon))
1909  return false;
1910  if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
1911  return false;
1912  if (Right.is(tok::ellipsis))
1913  return Left.Tok.isLiteral();
1914  if (Left.is(tok::l_square) && Right.is(tok::amp))
1915  return false;
1916  if (Right.is(TT_PointerOrReference))
1917  return (Left.is(tok::r_paren) && Left.MatchingParen &&
1918  (Left.MatchingParen->is(TT_OverloadedOperatorLParen) ||
1919  (Left.MatchingParen->Previous &&
1920  Left.MatchingParen->Previous->is(TT_FunctionDeclarationName)))) ||
1921  (Left.Tok.isLiteral() ||
1922  (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
1923  (Style.PointerAlignment != FormatStyle::PAS_Left ||
1924  Line.IsMultiVariableDeclStmt)));
1925  if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
1926  (!Left.is(TT_PointerOrReference) ||
1927  (Style.PointerAlignment != FormatStyle::PAS_Right &&
1928  !Line.IsMultiVariableDeclStmt)))
1929  return true;
1930  if (Left.is(TT_PointerOrReference))
1931  return Right.Tok.isLiteral() ||
1932  Right.isOneOf(TT_BlockComment, Keywords.kw_final,
1933  Keywords.kw_override) ||
1934  (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
1935  (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
1936  tok::l_paren) &&
1937  (Style.PointerAlignment != FormatStyle::PAS_Right &&
1938  !Line.IsMultiVariableDeclStmt) &&
1939  Left.Previous &&
1940  !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
1941  if (Right.is(tok::star) && Left.is(tok::l_paren))
1942  return false;
1943  if (Left.is(tok::l_square))
1944  return (Left.is(TT_ArrayInitializerLSquare) &&
1945  Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) ||
1946  (Left.is(TT_ArraySubscriptLSquare) && Style.SpacesInSquareBrackets &&
1947  Right.isNot(tok::r_square));
1948  if (Right.is(tok::r_square))
1949  return Right.MatchingParen &&
1950  ((Style.SpacesInContainerLiterals &&
1951  Right.MatchingParen->is(TT_ArrayInitializerLSquare)) ||
1952  (Style.SpacesInSquareBrackets &&
1953  Right.MatchingParen->is(TT_ArraySubscriptLSquare)));
1954  if (Right.is(tok::l_square) &&
1955  !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare) &&
1956  !Left.isOneOf(tok::numeric_constant, TT_DictLiteral))
1957  return false;
1958  if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1959  return !Left.Children.empty(); // No spaces in "{}".
1960  if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
1961  (Right.is(tok::r_brace) && Right.MatchingParen &&
1962  Right.MatchingParen->BlockKind != BK_Block))
1963  return !Style.Cpp11BracedListStyle;
1964  if (Left.is(TT_BlockComment))
1965  return !Left.TokenText.endswith("=*/");
1966  if (Right.is(tok::l_paren)) {
1967  if (Left.is(tok::r_paren) && Left.is(TT_AttributeParen))
1968  return true;
1969  return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
1970  (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
1971  (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while,
1972  tok::kw_switch, tok::kw_case, TT_ForEachMacro,
1973  TT_ObjCForIn) ||
1974  (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
1975  tok::kw_new, tok::kw_delete) &&
1976  (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
1977  (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
1978  (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
1979  Left.is(tok::r_paren)) &&
1980  Line.Type != LT_PreprocessorDirective);
1981  }
1982  if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1983  return false;
1984  if (Right.is(TT_UnaryOperator))
1985  return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
1986  (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
1987  if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
1988  tok::r_paren) ||
1989  Left.isSimpleTypeSpecifier()) &&
1990  Right.is(tok::l_brace) && Right.getNextNonComment() &&
1991  Right.BlockKind != BK_Block)
1992  return false;
1993  if (Left.is(tok::period) || Right.is(tok::period))
1994  return false;
1995  if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
1996  return false;
1997  if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
1998  Left.MatchingParen->Previous &&
1999  Left.MatchingParen->Previous->is(tok::period))
2000  // A.<B>DoSomething();
2001  return false;
2002  if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
2003  return false;
2004  return true;
2005 }
2006 
2007 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
2008  const FormatToken &Right) {
2009  const FormatToken &Left = *Right.Previous;
2010  if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
2011  return true; // Never ever merge two identifiers.
2012  if (Style.Language == FormatStyle::LK_Cpp) {
2013  if (Left.is(tok::kw_operator))
2014  return Right.is(tok::coloncolon);
2015  } else if (Style.Language == FormatStyle::LK_Proto) {
2016  if (Right.is(tok::period) &&
2017  Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
2018  Keywords.kw_repeated, Keywords.kw_extend))
2019  return true;
2020  if (Right.is(tok::l_paren) &&
2021  Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
2022  return true;
2023  } else if (Style.Language == FormatStyle::LK_JavaScript) {
2024  if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, TT_JsFatArrow,
2025  Keywords.kw_in))
2026  return true;
2027  if (Left.is(tok::kw_default) && Left.Previous &&
2028  Left.Previous->is(tok::kw_export))
2029  return true;
2030  if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
2031  return true;
2032  if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
2033  return false;
2034  if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
2035  Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
2036  return false;
2037  if (Left.is(tok::ellipsis))
2038  return false;
2039  if (Left.is(TT_TemplateCloser) &&
2040  !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
2041  Keywords.kw_implements, Keywords.kw_extends))
2042  // Type assertions ('<type>expr') are not followed by whitespace. Other
2043  // locations that should have whitespace following are identified by the
2044  // above set of follower tokens.
2045  return false;
2046  } else if (Style.Language == FormatStyle::LK_Java) {
2047  if (Left.is(tok::r_square) && Right.is(tok::l_brace))
2048  return true;
2049  if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
2050  return Style.SpaceBeforeParens != FormatStyle::SBPO_Never;
2051  if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
2052  tok::kw_protected) ||
2053  Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
2054  Keywords.kw_native)) &&
2055  Right.is(TT_TemplateOpener))
2056  return true;
2057  }
2058  if (Left.is(TT_ImplicitStringLiteral))
2059  return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
2060  if (Line.Type == LT_ObjCMethodDecl) {
2061  if (Left.is(TT_ObjCMethodSpecifier))
2062  return true;
2063  if (Left.is(tok::r_paren) && Right.is(tok::identifier))
2064  // Don't space between ')' and <id>
2065  return false;
2066  }
2067  if (Line.Type == LT_ObjCProperty &&
2068  (Right.is(tok::equal) || Left.is(tok::equal)))
2069  return false;
2070 
2071  if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
2072  Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
2073  return true;
2074  if (Right.is(TT_OverloadedOperatorLParen))
2075  return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
2076  if (Left.is(tok::comma))
2077  return true;
2078  if (Right.is(tok::comma))
2079  return false;
2080  if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen))
2081  return true;
2082  if (Right.is(tok::colon)) {
2083  if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
2084  !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
2085  return false;
2086  if (Right.is(TT_ObjCMethodExpr))
2087  return false;
2088  if (Left.is(tok::question))
2089  return false;
2090  if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
2091  return false;
2092  if (Right.is(TT_DictLiteral))
2093  return Style.SpacesInContainerLiterals;
2094  return true;
2095  }
2096  if (Left.is(TT_UnaryOperator))
2097  return Right.is(TT_BinaryOperator);
2098 
2099  // If the next token is a binary operator or a selector name, we have
2100  // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
2101  if (Left.is(TT_CastRParen))
2102  return Style.SpaceAfterCStyleCast ||
2103  Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
2104 
2105  if (Left.is(tok::greater) && Right.is(tok::greater))
2106  return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
2107  (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
2108  if (Right.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
2109  Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar))
2110  return false;
2111  if (!Style.SpaceBeforeAssignmentOperators &&
2112  Right.getPrecedence() == prec::Assignment)
2113  return false;
2114  if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace))
2115  return (Left.is(TT_TemplateOpener) &&
2116  Style.Standard == FormatStyle::LS_Cpp03) ||
2117  !(Left.isOneOf(tok::identifier, tok::l_paren, tok::r_paren) ||
2118  Left.isOneOf(TT_TemplateCloser, TT_TemplateOpener));
2119  if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
2120  return Style.SpacesInAngles;
2121  if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
2122  (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
2123  !Right.is(tok::r_paren)))
2124  return true;
2125  if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
2126  Right.isNot(TT_FunctionTypeLParen))
2127  return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
2128  if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
2129  Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
2130  return false;
2131  if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
2132  Line.startsWith(tok::hash))
2133  return true;
2134  if (Right.is(TT_TrailingUnaryOperator))
2135  return false;
2136  if (Left.is(TT_RegexLiteral))
2137  return false;
2138  return spaceRequiredBetween(Line, Left, Right);
2139 }
2140 
2141 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
2142 static bool isAllmanBrace(const FormatToken &Tok) {
2143  return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
2144  !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
2145 }
2146 
2147 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
2148  const FormatToken &Right) {
2149  const FormatToken &Left = *Right.Previous;
2150  if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
2151  return true;
2152 
2153  if (Style.Language == FormatStyle::LK_JavaScript) {
2154  // FIXME: This might apply to other languages and token kinds.
2155  if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous &&
2156  Left.Previous->is(tok::char_constant))
2157  return true;
2158  if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
2159  Left.Previous && Left.Previous->is(tok::equal) &&
2160  Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
2161  tok::kw_const) &&
2162  // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
2163  // above.
2164  !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let))
2165  // Object literals on the top level of a file are treated as "enum-style".
2166  // Each key/value pair is put on a separate line, instead of bin-packing.
2167  return true;
2168  if (Left.is(tok::l_brace) && Line.Level == 0 &&
2169  (Line.startsWith(tok::kw_enum) ||
2170  Line.startsWith(tok::kw_export, tok::kw_enum)))
2171  // JavaScript top-level enum key/value pairs are put on separate lines
2172  // instead of bin-packing.
2173  return true;
2174  if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
2175  !Left.Children.empty())
2176  // Support AllowShortFunctionsOnASingleLine for JavaScript.
2177  return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
2178  Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
2179  (Left.NestingLevel == 0 && Line.Level == 0 &&
2180  Style.AllowShortFunctionsOnASingleLine ==
2182  } else if (Style.Language == FormatStyle::LK_Java) {
2183  if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
2184  Right.Next->is(tok::string_literal))
2185  return true;
2186  }
2187 
2188  // If the last token before a '}' is a comma or a trailing comment, the
2189  // intention is to insert a line break after it in order to make shuffling
2190  // around entries easier.
2191  const FormatToken *BeforeClosingBrace = nullptr;
2192  if (Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) &&
2193  Left.BlockKind != BK_Block && Left.MatchingParen)
2194  BeforeClosingBrace = Left.MatchingParen->Previous;
2195  else if (Right.MatchingParen &&
2196  Right.MatchingParen->isOneOf(tok::l_brace,
2197  TT_ArrayInitializerLSquare))
2198  BeforeClosingBrace = &Left;
2199  if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
2200  BeforeClosingBrace->isTrailingComment()))
2201  return true;
2202 
2203  if (Right.is(tok::comment))
2204  return Left.BlockKind != BK_BracedInit &&
2205  Left.isNot(TT_CtorInitializerColon) &&
2206  (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
2207  if (Left.isTrailingComment())
2208  return true;
2209  if (Left.isStringLiteral() &&
2210  (Right.isStringLiteral() || Right.is(TT_ObjCStringLiteral)))
2211  return true;
2212  if (Right.Previous->IsUnterminatedLiteral)
2213  return true;
2214  if (Right.is(tok::lessless) && Right.Next &&
2215  Right.Previous->is(tok::string_literal) &&
2216  Right.Next->is(tok::string_literal))
2217  return true;
2218  if (Right.Previous->ClosesTemplateDeclaration &&
2219  Right.Previous->MatchingParen &&
2220  Right.Previous->MatchingParen->NestingLevel == 0 &&
2221  Style.AlwaysBreakTemplateDeclarations)
2222  return true;
2223  if ((Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) &&
2224  Style.BreakConstructorInitializersBeforeComma &&
2225  !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
2226  return true;
2227  if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
2228  // Raw string literals are special wrt. line breaks. The author has made a
2229  // deliberate choice and might have aligned the contents of the string
2230  // literal accordingly. Thus, we try keep existing line breaks.
2231  return Right.NewlinesBefore > 0;
2232  if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 &&
2233  Style.Language == FormatStyle::LK_Proto)
2234  // Don't put enums onto single lines in protocol buffers.
2235  return true;
2236  if (Right.is(TT_InlineASMBrace))
2237  return Right.HasUnescapedNewline;
2238  if (isAllmanBrace(Left) || isAllmanBrace(Right))
2239  return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
2240  (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
2241  (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
2242  if (Style.Language == FormatStyle::LK_Proto && Left.isNot(tok::l_brace) &&
2243  Right.is(TT_SelectorName))
2244  return true;
2245  if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine)
2246  return true;
2247 
2248  if ((Style.Language == FormatStyle::LK_Java ||
2249  Style.Language == FormatStyle::LK_JavaScript) &&
2250  Left.is(TT_LeadingJavaAnnotation) &&
2251  Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
2252  (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
2253  return true;
2254 
2255  return false;
2256 }
2257 
2258 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
2259  const FormatToken &Right) {
2260  const FormatToken &Left = *Right.Previous;
2261 
2262  // Language-specific stuff.
2263  if (Style.Language == FormatStyle::LK_Java) {
2264  if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
2265  Keywords.kw_implements))
2266  return false;
2267  if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
2268  Keywords.kw_implements))
2269  return true;
2270  } else if (Style.Language == FormatStyle::LK_JavaScript) {
2271  if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
2272  return false;
2273  if (Left.is(TT_JsTypeColon))
2274  return true;
2275  if (Right.NestingLevel == 0 && Right.is(Keywords.kw_is))
2276  return false;
2277  }
2278 
2279  if (Left.is(tok::at))
2280  return false;
2281  if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
2282  return false;
2283  if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
2284  return !Right.is(tok::l_paren);
2285  if (Right.is(TT_PointerOrReference))
2286  return Line.IsMultiVariableDeclStmt ||
2287  (Style.PointerAlignment == FormatStyle::PAS_Right &&
2288  (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
2289  if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
2290  Right.is(tok::kw_operator))
2291  return true;
2292  if (Left.is(TT_PointerOrReference))
2293  return false;
2294  if (Right.isTrailingComment())
2295  // We rely on MustBreakBefore being set correctly here as we should not
2296  // change the "binding" behavior of a comment.
2297  // The first comment in a braced lists is always interpreted as belonging to
2298  // the first list element. Otherwise, it should be placed outside of the
2299  // list.
2300  return Left.BlockKind == BK_BracedInit;
2301  if (Left.is(tok::question) && Right.is(tok::colon))
2302  return false;
2303  if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
2304  return Style.BreakBeforeTernaryOperators;
2305  if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
2306  return !Style.BreakBeforeTernaryOperators;
2307  if (Right.is(TT_InheritanceColon))
2308  return true;
2309  if (Right.is(tok::colon) &&
2310  !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
2311  return false;
2312  if (Left.is(tok::colon) && (Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)))
2313  return true;
2314  if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
2315  Right.Next->is(TT_ObjCMethodExpr)))
2316  return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
2317  if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
2318  return true;
2319  if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
2320  return true;
2321  if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
2322  TT_OverloadedOperator))
2323  return false;
2324  if (Left.is(TT_RangeBasedForLoopColon))
2325  return true;
2326  if (Right.is(TT_RangeBasedForLoopColon))
2327  return false;
2328  if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
2329  Left.is(tok::kw_operator))
2330  return false;
2331  if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
2332  Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0)
2333  return false;
2334  if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen))
2335  return false;
2336  if (Left.is(tok::l_paren) && Left.Previous &&
2337  (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen)))
2338  return false;
2339  if (Right.is(TT_ImplicitStringLiteral))
2340  return false;
2341 
2342  if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser))
2343  return false;
2344  if (Right.is(tok::r_square) && Right.MatchingParen &&
2345  Right.MatchingParen->is(TT_LambdaLSquare))
2346  return false;
2347 
2348  // We only break before r_brace if there was a corresponding break before
2349  // the l_brace, which is tracked by BreakBeforeClosingBrace.
2350  if (Right.is(tok::r_brace))
2351  return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
2352 
2353  // Allow breaking after a trailing annotation, e.g. after a method
2354  // declaration.
2355  if (Left.is(TT_TrailingAnnotation))
2356  return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
2357  tok::less, tok::coloncolon);
2358 
2359  if (Right.is(tok::kw___attribute))
2360  return true;
2361 
2362  if (Left.is(tok::identifier) && Right.is(tok::string_literal))
2363  return true;
2364 
2365  if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
2366  return true;
2367 
2368  if (Left.is(TT_CtorInitializerComma) &&
2369  Style.BreakConstructorInitializersBeforeComma)
2370  return false;
2371  if (Right.is(TT_CtorInitializerComma) &&
2372  Style.BreakConstructorInitializersBeforeComma)
2373  return true;
2374  if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
2375  (Left.is(tok::less) && Right.is(tok::less)))
2376  return false;
2377  if (Right.is(TT_BinaryOperator) &&
2378  Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
2379  (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
2380  Right.getPrecedence() != prec::Assignment))
2381  return true;
2382  if (Left.is(TT_ArrayInitializerLSquare))
2383  return true;
2384  if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
2385  return true;
2386  if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
2387  !Left.isOneOf(tok::arrowstar, tok::lessless) &&
2388  Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
2389  (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
2390  Left.getPrecedence() == prec::Assignment))
2391  return true;
2392  return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
2393  tok::kw_class, tok::kw_struct) ||
2394  Right.isMemberAccess() ||
2395  Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
2396  tok::colon, tok::l_square, tok::at) ||
2397  (Left.is(tok::r_paren) &&
2398  Right.isOneOf(tok::identifier, tok::kw_const)) ||
2399  (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
2400 }
2401 
2402 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
2403  llvm::errs() << "AnnotatedTokens:\n";
2404  const FormatToken *Tok = Line.First;
2405  while (Tok) {
2406  llvm::errs() << " M=" << Tok->MustBreakBefore
2407  << " C=" << Tok->CanBreakBefore
2408  << " T=" << getTokenTypeName(Tok->Type)
2409  << " S=" << Tok->SpacesRequiredBefore
2410  << " B=" << Tok->BlockParameterCount
2411  << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
2412  << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
2413  << " FakeLParens=";
2414  for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
2415  llvm::errs() << Tok->FakeLParens[i] << "/";
2416  llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
2417  if (!Tok->Next)
2418  assert(Tok == Line.Last);
2419  Tok = Tok->Next;
2420  }
2421  llvm::errs() << "----\n";
2422 }
2423 
2424 } // namespace format
2425 } // namespace clang
Always break after the return type of top-level definitions.
Definition: Format.h:176
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:220
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:430
bool AutoFound
Defines the SourceManager interface.
std::unique_ptr< TokenRole > Role
A token can have a special role that can carry extra information about the token's formatting...
Definition: FormatToken.h:196
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:290
Parser - This implements a parser for the C family of languages.
Definition: Parse/Parser.h:56
Align pointer to the left.
Definition: Format.h:493
FormatToken * CurrentToken
Should be used for C, C++, ObjectiveC, ObjectiveC++.
Definition: Format.h:423
bool IsMultiline
Whether the token text contains newlines (escaped or not).
Definition: FormatToken.h:145
bool isNot(T Kind) const
Definition: FormatToken.h:297
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine * > &Lines)
Adapts the indent levels of comment lines to the indent of the subsequent line.
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:203
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:267
Break after operators.
Definition: Format.h:209
bool CanBeExpression
tok::TokenKind ContextKind
unsigned UnbreakableTailLength
The length of following tokens until the next natural split point, or the next token that can be brok...
Definition: FormatToken.h:211
Always break after the return type.
Definition: Format.h:170
unsigned SplitPenalty
Penalty for inserting a line break before this token.
Definition: FormatToken.h:223
bool ColonIsForRangeExpr
bool CanBreakBefore
true if it is allowed to break before this token.
Definition: FormatToken.h:174
Should be used for Java.
Definition: Format.h:425
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:264
Always break after the return type of function definitions.
Definition: Format.h:174
This file implements a token annotator, i.e.
unsigned SpacesRequiredBefore
The number of spaces that should be inserted before this token.
Definition: FormatToken.h:171
bool closesScope() const
Returns whether Tok is )]} or a template closing >.
Definition: FormatToken.h:326
llvm::SmallPtrSet< FormatToken *, 16 > NonTemplateLess
detail::InMemoryDirectory::const_iterator I
Always put a space before opening parentheses, except when it's prohibited by the syntax rules (in fu...
Definition: Format.h:526
Never merge functions into a single line.
Definition: Format.h:131
AnnotatingParser & P
ASTContext * Context
Break after return type automatically.
Definition: Format.h:168
Only merge empty functions.
Definition: Format.h:133
Should be used for JavaScript.
Definition: Format.h:427
StateNode * Previous
SmallVector< AnnotatedLine *, 0 > Children
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:112
FormatToken * Token
FormatToken * FirstStartOfName
void annotate(AnnotatedLine &Line)
bool startsWith(Ts...Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
Don't align, instead use ContinuationIndentWidth, e.g.
Definition: Format.h:59
const char * getTokenTypeName(TokenType Type)
Determines the name of a token type.
Definition: FormatToken.cpp:26
const FormatStyle & Style
bool isTrailingComment() const
Definition: FormatToken.h:359
SmallVectorImpl< AnnotatedLine * >::const_iterator Next
static bool isAllmanBrace(const FormatToken &Tok)
AnnotatedLine & Line
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
Never put a space before opening parentheses.
Definition: Format.h:518
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T-> getSizeExpr()))
bool ColonIsObjCMethodExpr
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Definition: FormatToken.h:138
const AdditionalKeywords & Keywords
Use C++03-compatible syntax.
Definition: Format.h:562
Always break after the return types of top-level functions.
Definition: Format.h:172
bool IsExpression
Use features of C++11 (e.g.
Definition: Format.h:565
detail::InMemoryDirectory::const_iterator E
SmallVector< Context, 8 > Contexts
bool opensScope() const
Returns whether Tok is ([{ or a template opening <.
Definition: FormatToken.h:321
void calculateFormattingInformation(AnnotatedLine &Line)
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:281
Only merge functions defined inside a class. Implies "empty".
Definition: Format.h:135
FormatToken * NextOperator
If this is an operator (or "."/"->") in a sequence of operators with the same precedence, points to the next operator.
Definition: FormatToken.h:253
unsigned BindingStrength
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:261
SmallVector< AnnotatedLine *, 1 > Children
If this token starts a block, this contains all the unwrapped lines in it.
Definition: FormatToken.h:271
FormatToken * Current
FormatToken * FirstObjCSelectorName
bool InTemplateArgument
bool ColonIsDictLiteral
The parameter type of a method or function.
Break before operators.
Definition: Format.h:213
unsigned LongestObjCSelectorName
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:154
prec::Level getBinOpPrecedence(tok::TokenKind Kind, bool GreaterThanIsOperator, bool CPlusPlus11)
Return the precedence of the specified binary operator token.
Align pointer to the right.
Definition: Format.h:495
unsigned BindingStrength
The binding strength of a token.
Definition: FormatToken.h:216
bool CaretFound
bool InCtorInitializer
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:126
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:166
bool IsForEachMacro
static bool isFunctionDeclarationName(const FormatToken &Current)