clang  3.7.0
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
27 public:
28  virtual ~FormatTokenSource() {}
29  virtual FormatToken *getNextToken() = 0;
30 
31  virtual unsigned getPosition() = 0;
32  virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40  bool MustBeDeclaration)
41  : Line(Line), Stack(Stack) {
42  Line.MustBeDeclaration = MustBeDeclaration;
43  Stack.push_back(MustBeDeclaration);
44  }
45  ~ScopedDeclarationState() {
46  Stack.pop_back();
47  if (!Stack.empty())
48  Line.MustBeDeclaration = Stack.back();
49  else
50  Line.MustBeDeclaration = true;
51  }
52 
53 private:
54  UnwrappedLine &Line;
55  std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
60  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61  FormatToken *&ResetToken)
62  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64  Token(nullptr) {
65  TokenSource = this;
66  Line.Level = 0;
67  Line.InPPDirective = true;
68  }
69 
70  ~ScopedMacroState() override {
71  TokenSource = PreviousTokenSource;
72  ResetToken = Token;
73  Line.InPPDirective = false;
74  Line.Level = PreviousLineLevel;
75  }
76 
77  FormatToken *getNextToken() override {
78  // The \c UnwrappedLineParser guards against this by never calling
79  // \c getNextToken() after it has encountered the first eof token.
80  assert(!eof());
81  Token = PreviousTokenSource->getNextToken();
82  if (eof())
83  return getFakeEOF();
84  return Token;
85  }
86 
87  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
88 
89  FormatToken *setPosition(unsigned Position) override {
90  Token = PreviousTokenSource->setPosition(Position);
91  return Token;
92  }
93 
94 private:
95  bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97  FormatToken *getFakeEOF() {
98  static bool EOFInitialized = false;
99  static FormatToken FormatTok;
100  if (!EOFInitialized) {
101  FormatTok.Tok.startToken();
102  FormatTok.Tok.setKind(tok::eof);
103  EOFInitialized = true;
104  }
105  return &FormatTok;
106  }
107 
108  UnwrappedLine &Line;
109  FormatTokenSource *&TokenSource;
110  FormatToken *&ResetToken;
112  FormatTokenSource *PreviousTokenSource;
113 
114  FormatToken *Token;
115 };
116 
117 } // end anonymous namespace
118 
120 public:
122  bool SwitchToPreprocessorLines = false)
123  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
124  if (SwitchToPreprocessorLines)
125  Parser.CurrentLines = &Parser.PreprocessorDirectives;
126  else if (!Parser.Line->Tokens.empty())
127  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
128  PreBlockLine = std::move(Parser.Line);
129  Parser.Line = llvm::make_unique<UnwrappedLine>();
130  Parser.Line->Level = PreBlockLine->Level;
131  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132  }
133 
135  if (!Parser.Line->Tokens.empty()) {
136  Parser.addUnwrappedLine();
137  }
138  assert(Parser.Line->Tokens.empty());
139  Parser.Line = std::move(PreBlockLine);
140  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
141  Parser.MustBreakBeforeNextToken = true;
142  Parser.CurrentLines = OriginalLines;
143  }
144 
145 private:
147 
148  std::unique_ptr<UnwrappedLine> PreBlockLine;
149  SmallVectorImpl<UnwrappedLine> *OriginalLines;
150 };
151 
153 public:
155  const FormatStyle &Style, unsigned &LineLevel)
156  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
158  Parser->addUnwrappedLine();
159  } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
160  Parser->addUnwrappedLine();
161  ++LineLevel;
162  }
163  }
164  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
165 
166 private:
167  unsigned &LineLevel;
168  unsigned OldLineLevel;
169 };
170 
171 namespace {
172 
173 class IndexedTokenSource : public FormatTokenSource {
174 public:
175  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
176  : Tokens(Tokens), Position(-1) {}
177 
178  FormatToken *getNextToken() override {
179  ++Position;
180  return Tokens[Position];
181  }
182 
183  unsigned getPosition() override {
184  assert(Position >= 0);
185  return Position;
186  }
187 
188  FormatToken *setPosition(unsigned P) override {
189  Position = P;
190  return Tokens[Position];
191  }
192 
193  void reset() { Position = -1; }
194 
195 private:
196  ArrayRef<FormatToken *> Tokens;
197  int Position;
198 };
199 
200 } // end anonymous namespace
201 
206  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
207  CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
208  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
209 
210 void UnwrappedLineParser::reset() {
211  PPBranchLevel = -1;
212  Line.reset(new UnwrappedLine);
213  CommentsBeforeNextToken.clear();
214  FormatTok = nullptr;
215  MustBreakBeforeNextToken = false;
216  PreprocessorDirectives.clear();
217  CurrentLines = &Lines;
218  DeclarationScopeStack.clear();
219  PPStack.clear();
220 }
221 
223  IndexedTokenSource TokenSource(AllTokens);
224  do {
225  DEBUG(llvm::dbgs() << "----\n");
226  reset();
227  Tokens = &TokenSource;
228  TokenSource.reset();
229 
230  readToken();
231  parseFile();
232  // Create line with eof token.
233  pushToken(FormatTok);
234  addUnwrappedLine();
235 
236  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
237  E = Lines.end();
238  I != E; ++I) {
239  Callback.consumeUnwrappedLine(*I);
240  }
241  Callback.finishRun();
242  Lines.clear();
243  while (!PPLevelBranchIndex.empty() &&
244  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
245  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
246  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
247  }
248  if (!PPLevelBranchIndex.empty()) {
249  ++PPLevelBranchIndex.back();
250  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
251  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
252  }
253  } while (!PPLevelBranchIndex.empty());
254 }
255 
256 void UnwrappedLineParser::parseFile() {
257  // The top-level context in a file always has declarations, except for pre-
258  // processor directives and JavaScript files.
259  bool MustBeDeclaration =
260  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
261  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
262  MustBeDeclaration);
263  parseLevel(/*HasOpeningBrace=*/false);
264  // Make sure to format the remaining tokens.
265  flushComments(true);
266  addUnwrappedLine();
267 }
268 
269 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
270  bool SwitchLabelEncountered = false;
271  do {
272  tok::TokenKind kind = FormatTok->Tok.getKind();
273  if (FormatTok->Type == TT_MacroBlockBegin) {
274  kind = tok::l_brace;
275  } else if (FormatTok->Type == TT_MacroBlockEnd) {
276  kind = tok::r_brace;
277  }
278 
279  switch (kind) {
280  case tok::comment:
281  nextToken();
282  addUnwrappedLine();
283  break;
284  case tok::l_brace:
285  // FIXME: Add parameter whether this can happen - if this happens, we must
286  // be in a non-declaration context.
287  parseBlock(/*MustBeDeclaration=*/false);
288  addUnwrappedLine();
289  break;
290  case tok::r_brace:
291  if (HasOpeningBrace)
292  return;
293  nextToken();
294  addUnwrappedLine();
295  break;
296  case tok::kw_default:
297  case tok::kw_case:
298  if (!SwitchLabelEncountered &&
299  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
300  ++Line->Level;
301  SwitchLabelEncountered = true;
302  parseStructuralElement();
303  break;
304  default:
305  parseStructuralElement();
306  break;
307  }
308  } while (!eof());
309 }
310 
311 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
312  // We'll parse forward through the tokens until we hit
313  // a closing brace or eof - note that getNextToken() will
314  // parse macros, so this will magically work inside macro
315  // definitions, too.
316  unsigned StoredPosition = Tokens->getPosition();
317  FormatToken *Tok = FormatTok;
318  // Keep a stack of positions of lbrace tokens. We will
319  // update information about whether an lbrace starts a
320  // braced init list or a different block during the loop.
321  SmallVector<FormatToken *, 8> LBraceStack;
322  assert(Tok->Tok.is(tok::l_brace));
323  do {
324  // Get next none-comment token.
325  FormatToken *NextTok;
326  unsigned ReadTokens = 0;
327  do {
328  NextTok = Tokens->getNextToken();
329  ++ReadTokens;
330  } while (NextTok->is(tok::comment));
331 
332  switch (Tok->Tok.getKind()) {
333  case tok::l_brace:
334  Tok->BlockKind = BK_Unknown;
335  LBraceStack.push_back(Tok);
336  break;
337  case tok::r_brace:
338  if (!LBraceStack.empty()) {
339  if (LBraceStack.back()->BlockKind == BK_Unknown) {
340  bool ProbablyBracedList = false;
341  if (Style.Language == FormatStyle::LK_Proto) {
342  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
343  } else {
344  // Using OriginalColumn to distinguish between ObjC methods and
345  // binary operators is a bit hacky.
346  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
347  NextTok->OriginalColumn == 0;
348 
349  // If there is a comma, semicolon or right paren after the closing
350  // brace, we assume this is a braced initializer list. Note that
351  // regardless how we mark inner braces here, we will overwrite the
352  // BlockKind later if we parse a braced list (where all blocks
353  // inside are by default braced lists), or when we explicitly detect
354  // blocks (for example while parsing lambdas).
355  //
356  // We exclude + and - as they can be ObjC visibility modifiers.
357  ProbablyBracedList =
358  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
359  tok::r_paren, tok::r_square, tok::l_brace,
360  tok::l_paren, tok::ellipsis) ||
361  (NextTok->is(tok::semi) &&
362  (!ExpectClassBody || LBraceStack.size() != 1)) ||
363  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
364  }
365  if (ProbablyBracedList) {
366  Tok->BlockKind = BK_BracedInit;
367  LBraceStack.back()->BlockKind = BK_BracedInit;
368  } else {
369  Tok->BlockKind = BK_Block;
370  LBraceStack.back()->BlockKind = BK_Block;
371  }
372  }
373  LBraceStack.pop_back();
374  }
375  break;
376  case tok::at:
377  case tok::semi:
378  case tok::kw_if:
379  case tok::kw_while:
380  case tok::kw_for:
381  case tok::kw_switch:
382  case tok::kw_try:
383  case tok::kw___try:
384  if (!LBraceStack.empty())
385  LBraceStack.back()->BlockKind = BK_Block;
386  break;
387  default:
388  break;
389  }
390  Tok = NextTok;
391  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
392  // Assume other blocks for all unclosed opening braces.
393  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
394  if (LBraceStack[i]->BlockKind == BK_Unknown)
395  LBraceStack[i]->BlockKind = BK_Block;
396  }
397 
398  FormatTok = Tokens->setPosition(StoredPosition);
399 }
400 
401 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
402  bool MunchSemi) {
403  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
404  "'{' or macro block token expected");
405  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
406 
407  unsigned InitialLevel = Line->Level;
408  nextToken();
409 
410  if (MacroBlock && FormatTok->is(tok::l_paren))
411  parseParens();
412 
413  addUnwrappedLine();
414 
415  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
416  MustBeDeclaration);
417  if (AddLevel)
418  ++Line->Level;
419  parseLevel(/*HasOpeningBrace=*/true);
420 
421  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
422  : !FormatTok->is(tok::r_brace)) {
423  Line->Level = InitialLevel;
424  return;
425  }
426 
427  nextToken(); // Munch the closing brace.
428 
429  if (MacroBlock && FormatTok->is(tok::l_paren))
430  parseParens();
431 
432  if (MunchSemi && FormatTok->Tok.is(tok::semi))
433  nextToken();
434  Line->Level = InitialLevel;
435 }
436 
437 static bool isGoogScope(const UnwrappedLine &Line) {
438  // FIXME: Closure-library specific stuff should not be hard-coded but be
439  // configurable.
440  if (Line.Tokens.size() < 4)
441  return false;
442  auto I = Line.Tokens.begin();
443  if (I->Tok->TokenText != "goog")
444  return false;
445  ++I;
446  if (I->Tok->isNot(tok::period))
447  return false;
448  ++I;
449  if (I->Tok->TokenText != "scope")
450  return false;
451  ++I;
452  return I->Tok->is(tok::l_paren);
453 }
454 
456  const FormatToken &InitialToken) {
457  switch (Style.BreakBeforeBraces) {
459  return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class);
461  return InitialToken.isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union);
463  case FormatStyle::BS_GNU:
464  return true;
465  default:
466  return false;
467  }
468 }
469 
470 void UnwrappedLineParser::parseChildBlock() {
471  FormatTok->BlockKind = BK_Block;
472  nextToken();
473  {
474  bool GoogScope =
476  ScopedLineState LineState(*this);
477  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
478  /*MustBeDeclaration=*/false);
479  Line->Level += GoogScope ? 0 : 1;
480  parseLevel(/*HasOpeningBrace=*/true);
481  flushComments(isOnNewLine(*FormatTok));
482  Line->Level -= GoogScope ? 0 : 1;
483  }
484  nextToken();
485 }
486 
487 void UnwrappedLineParser::parsePPDirective() {
488  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
489  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
490  nextToken();
491 
492  if (!FormatTok->Tok.getIdentifierInfo()) {
493  parsePPUnknown();
494  return;
495  }
496 
497  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
498  case tok::pp_define:
499  parsePPDefine();
500  return;
501  case tok::pp_if:
502  parsePPIf(/*IfDef=*/false);
503  break;
504  case tok::pp_ifdef:
505  case tok::pp_ifndef:
506  parsePPIf(/*IfDef=*/true);
507  break;
508  case tok::pp_else:
509  parsePPElse();
510  break;
511  case tok::pp_elif:
512  parsePPElIf();
513  break;
514  case tok::pp_endif:
515  parsePPEndIf();
516  break;
517  default:
518  parsePPUnknown();
519  break;
520  }
521 }
522 
523 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
524  if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
525  PPStack.push_back(PP_Unreachable);
526  else
527  PPStack.push_back(PP_Conditional);
528 }
529 
530 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
531  ++PPBranchLevel;
532  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
533  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
534  PPLevelBranchIndex.push_back(0);
535  PPLevelBranchCount.push_back(0);
536  }
537  PPChainBranchIndex.push(0);
538  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
539  conditionalCompilationCondition(Unreachable || Skip);
540 }
541 
542 void UnwrappedLineParser::conditionalCompilationAlternative() {
543  if (!PPStack.empty())
544  PPStack.pop_back();
545  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
546  if (!PPChainBranchIndex.empty())
547  ++PPChainBranchIndex.top();
548  conditionalCompilationCondition(
549  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
550  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
551 }
552 
553 void UnwrappedLineParser::conditionalCompilationEnd() {
554  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
555  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
556  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
557  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
558  }
559  }
560  // Guard against #endif's without #if.
561  if (PPBranchLevel > 0)
562  --PPBranchLevel;
563  if (!PPChainBranchIndex.empty())
564  PPChainBranchIndex.pop();
565  if (!PPStack.empty())
566  PPStack.pop_back();
567 }
568 
569 void UnwrappedLineParser::parsePPIf(bool IfDef) {
570  nextToken();
571  bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
572  FormatTok->Tok.getLiteralData() != nullptr &&
573  StringRef(FormatTok->Tok.getLiteralData(),
574  FormatTok->Tok.getLength()) == "0") ||
575  FormatTok->Tok.is(tok::kw_false);
576  conditionalCompilationStart(!IfDef && IsLiteralFalse);
577  parsePPUnknown();
578 }
579 
580 void UnwrappedLineParser::parsePPElse() {
581  conditionalCompilationAlternative();
582  parsePPUnknown();
583 }
584 
585 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
586 
587 void UnwrappedLineParser::parsePPEndIf() {
588  conditionalCompilationEnd();
589  parsePPUnknown();
590 }
591 
592 void UnwrappedLineParser::parsePPDefine() {
593  nextToken();
594 
595  if (FormatTok->Tok.getKind() != tok::identifier) {
596  parsePPUnknown();
597  return;
598  }
599  nextToken();
600  if (FormatTok->Tok.getKind() == tok::l_paren &&
601  FormatTok->WhitespaceRange.getBegin() ==
602  FormatTok->WhitespaceRange.getEnd()) {
603  parseParens();
604  }
605  addUnwrappedLine();
606  Line->Level = 1;
607 
608  // Errors during a preprocessor directive can only affect the layout of the
609  // preprocessor directive, and thus we ignore them. An alternative approach
610  // would be to use the same approach we use on the file level (no
611  // re-indentation if there was a structural error) within the macro
612  // definition.
613  parseFile();
614 }
615 
616 void UnwrappedLineParser::parsePPUnknown() {
617  do {
618  nextToken();
619  } while (!eof());
620  addUnwrappedLine();
621 }
622 
623 // Here we blacklist certain tokens that are not usually the first token in an
624 // unwrapped line. This is used in attempt to distinguish macro calls without
625 // trailing semicolons from other constructs split to several lines.
626 static bool tokenCanStartNewLine(const clang::Token &Tok) {
627  // Semicolon can be a null-statement, l_square can be a start of a macro or
628  // a C++11 attribute, but this doesn't seem to be common.
629  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
630  Tok.isNot(tok::l_square) &&
631  // Tokens that can only be used as binary operators and a part of
632  // overloaded operator names.
633  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
634  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
635  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
636  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
637  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
638  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
639  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
640  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
641  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
642  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
643  Tok.isNot(tok::lesslessequal) &&
644  // Colon is used in labels, base class lists, initializer lists,
645  // range-based for loops, ternary operator, but should never be the
646  // first token in an unwrapped line.
647  Tok.isNot(tok::colon) &&
648  // 'noexcept' is a trailing annotation.
649  Tok.isNot(tok::kw_noexcept);
650 }
651 
652 void UnwrappedLineParser::parseStructuralElement() {
653  assert(!FormatTok->Tok.is(tok::l_brace));
654  switch (FormatTok->Tok.getKind()) {
655  case tok::at:
656  nextToken();
657  if (FormatTok->Tok.is(tok::l_brace)) {
658  parseBracedList();
659  break;
660  }
661  switch (FormatTok->Tok.getObjCKeywordID()) {
662  case tok::objc_public:
663  case tok::objc_protected:
664  case tok::objc_package:
665  case tok::objc_private:
666  return parseAccessSpecifier();
667  case tok::objc_interface:
668  case tok::objc_implementation:
669  return parseObjCInterfaceOrImplementation();
670  case tok::objc_protocol:
671  return parseObjCProtocol();
672  case tok::objc_end:
673  return; // Handled by the caller.
674  case tok::objc_optional:
675  case tok::objc_required:
676  nextToken();
677  addUnwrappedLine();
678  return;
679  case tok::objc_autoreleasepool:
680  nextToken();
681  if (FormatTok->Tok.is(tok::l_brace)) {
684  addUnwrappedLine();
685  parseBlock(/*MustBeDeclaration=*/false);
686  }
687  addUnwrappedLine();
688  return;
689  case tok::objc_try:
690  // This branch isn't strictly necessary (the kw_try case below would
691  // do this too after the tok::at is parsed above). But be explicit.
692  parseTryCatch();
693  return;
694  default:
695  break;
696  }
697  break;
698  case tok::kw_asm:
699  nextToken();
700  if (FormatTok->is(tok::l_brace)) {
701  FormatTok->Type = TT_InlineASMBrace;
702  nextToken();
703  while (FormatTok && FormatTok->isNot(tok::eof)) {
704  if (FormatTok->is(tok::r_brace)) {
705  FormatTok->Type = TT_InlineASMBrace;
706  nextToken();
707  addUnwrappedLine();
708  break;
709  }
710  FormatTok->Finalized = true;
711  nextToken();
712  }
713  }
714  break;
715  case tok::kw_namespace:
716  parseNamespace();
717  return;
718  case tok::kw_inline:
719  nextToken();
720  if (FormatTok->Tok.is(tok::kw_namespace)) {
721  parseNamespace();
722  return;
723  }
724  break;
725  case tok::kw_public:
726  case tok::kw_protected:
727  case tok::kw_private:
728  if (Style.Language == FormatStyle::LK_Java ||
730  nextToken();
731  else
732  parseAccessSpecifier();
733  return;
734  case tok::kw_if:
735  parseIfThenElse();
736  return;
737  case tok::kw_for:
738  case tok::kw_while:
739  parseForOrWhileLoop();
740  return;
741  case tok::kw_do:
742  parseDoWhile();
743  return;
744  case tok::kw_switch:
745  parseSwitch();
746  return;
747  case tok::kw_default:
748  nextToken();
749  parseLabel();
750  return;
751  case tok::kw_case:
752  parseCaseLabel();
753  return;
754  case tok::kw_try:
755  case tok::kw___try:
756  parseTryCatch();
757  return;
758  case tok::kw_extern:
759  nextToken();
760  if (FormatTok->Tok.is(tok::string_literal)) {
761  nextToken();
762  if (FormatTok->Tok.is(tok::l_brace)) {
763  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
764  addUnwrappedLine();
765  return;
766  }
767  }
768  break;
769  case tok::kw_export:
770  if (Style.Language == FormatStyle::LK_JavaScript) {
771  parseJavaScriptEs6ImportExport();
772  return;
773  }
774  break;
775  case tok::identifier:
776  if (FormatTok->is(TT_ForEachMacro)) {
777  parseForOrWhileLoop();
778  return;
779  }
780  if (FormatTok->is(TT_MacroBlockBegin)) {
781  parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
782  /*MunchSemi=*/false);
783  return;
784  }
785  if (Style.Language == FormatStyle::LK_JavaScript &&
786  FormatTok->is(Keywords.kw_import)) {
787  parseJavaScriptEs6ImportExport();
788  return;
789  }
790  if (FormatTok->is(Keywords.kw_signals)) {
791  nextToken();
792  if (FormatTok->is(tok::colon)) {
793  nextToken();
794  addUnwrappedLine();
795  }
796  return;
797  }
798  // In all other cases, parse the declaration.
799  break;
800  default:
801  break;
802  }
803  do {
804  switch (FormatTok->Tok.getKind()) {
805  case tok::at:
806  nextToken();
807  if (FormatTok->Tok.is(tok::l_brace))
808  parseBracedList();
809  break;
810  case tok::kw_enum:
811  // parseEnum falls through and does not yet add an unwrapped line as an
812  // enum definition can start a structural element.
813  parseEnum();
814  // This does not apply for Java and JavaScript.
815  if (Style.Language == FormatStyle::LK_Java ||
817  addUnwrappedLine();
818  return;
819  }
820  break;
821  case tok::kw_typedef:
822  nextToken();
823  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
824  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
825  parseEnum();
826  break;
827  case tok::kw_struct:
828  case tok::kw_union:
829  case tok::kw_class:
830  // parseRecord falls through and does not yet add an unwrapped line as a
831  // record declaration or definition can start a structural element.
832  parseRecord();
833  // This does not apply for Java and JavaScript.
834  if (Style.Language == FormatStyle::LK_Java ||
836  addUnwrappedLine();
837  return;
838  }
839  break;
840  case tok::period:
841  nextToken();
842  // In Java, classes have an implicit static member "class".
843  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
844  FormatTok->is(tok::kw_class))
845  nextToken();
846  break;
847  case tok::semi:
848  nextToken();
849  addUnwrappedLine();
850  return;
851  case tok::r_brace:
852  addUnwrappedLine();
853  return;
854  case tok::l_paren:
855  parseParens();
856  break;
857  case tok::caret:
858  nextToken();
859  if (FormatTok->Tok.isAnyIdentifier() ||
860  FormatTok->isSimpleTypeSpecifier())
861  nextToken();
862  if (FormatTok->is(tok::l_paren))
863  parseParens();
864  if (FormatTok->is(tok::l_brace))
865  parseChildBlock();
866  break;
867  case tok::l_brace:
868  if (!tryToParseBracedList()) {
869  // A block outside of parentheses must be the last part of a
870  // structural element.
871  // FIXME: Figure out cases where this is not true, and add projections
872  // for them (the one we know is missing are lambdas).
874  addUnwrappedLine();
875  FormatTok->Type = TT_FunctionLBrace;
876  parseBlock(/*MustBeDeclaration=*/false);
877  addUnwrappedLine();
878  return;
879  }
880  // Otherwise this was a braced init list, and the structural
881  // element continues.
882  break;
883  case tok::kw_try:
884  // We arrive here when parsing function-try blocks.
885  parseTryCatch();
886  return;
887  case tok::identifier: {
888  if (FormatTok->is(TT_MacroBlockEnd)) {
889  addUnwrappedLine();
890  return;
891  }
892 
893  // Parse function literal unless 'function' is the first token in a line
894  // in which case this should be treated as a free-standing function.
895  if (Style.Language == FormatStyle::LK_JavaScript &&
896  FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) {
897  tryToParseJSFunction();
898  break;
899  }
900  if ((Style.Language == FormatStyle::LK_JavaScript ||
901  Style.Language == FormatStyle::LK_Java) &&
902  FormatTok->is(Keywords.kw_interface)) {
903  parseRecord();
904  addUnwrappedLine();
905  return;
906  }
907 
908  StringRef Text = FormatTok->TokenText;
909  nextToken();
910  if (Line->Tokens.size() == 1 &&
911  // JS doesn't have macros, and within classes colons indicate fields,
912  // not labels.
914  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
915  parseLabel();
916  return;
917  }
918  // Recognize function-like macro usages without trailing semicolon as
919  // well as free-standing macros like Q_OBJECT.
920  bool FunctionLike = FormatTok->is(tok::l_paren);
921  if (FunctionLike)
922  parseParens();
923 
924  bool FollowedByNewline =
925  CommentsBeforeNextToken.empty()
926  ? FormatTok->NewlinesBefore > 0
927  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
928 
929  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
930  tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
931  addUnwrappedLine();
932  return;
933  }
934  }
935  break;
936  }
937  case tok::equal:
938  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
939  // TT_JsFatArrow. The always start an expression or a child block if
940  // followed by a curly.
941  if (FormatTok->is(TT_JsFatArrow)) {
942  nextToken();
943  if (FormatTok->is(tok::l_brace))
944  parseChildBlock();
945  break;
946  }
947 
948  nextToken();
949  if (FormatTok->Tok.is(tok::l_brace)) {
950  parseBracedList();
951  }
952  break;
953  case tok::l_square:
954  parseSquare();
955  break;
956  case tok::kw_new:
957  parseNew();
958  break;
959  default:
960  nextToken();
961  break;
962  }
963  } while (!eof());
964 }
965 
966 bool UnwrappedLineParser::tryToParseLambda() {
967  if (Style.Language != FormatStyle::LK_Cpp) {
968  nextToken();
969  return false;
970  }
971  // FIXME: This is a dirty way to access the previous token. Find a better
972  // solution.
973  if (!Line->Tokens.empty() &&
974  (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator,
975  tok::kw_new, tok::kw_delete) ||
976  Line->Tokens.back().Tok->closesScope() ||
977  Line->Tokens.back().Tok->isSimpleTypeSpecifier())) {
978  nextToken();
979  return false;
980  }
981  assert(FormatTok->is(tok::l_square));
982  FormatToken &LSquare = *FormatTok;
983  if (!tryToParseLambdaIntroducer())
984  return false;
985 
986  while (FormatTok->isNot(tok::l_brace)) {
987  if (FormatTok->isSimpleTypeSpecifier()) {
988  nextToken();
989  continue;
990  }
991  switch (FormatTok->Tok.getKind()) {
992  case tok::l_brace:
993  break;
994  case tok::l_paren:
995  parseParens();
996  break;
997  case tok::amp:
998  case tok::star:
999  case tok::kw_const:
1000  case tok::comma:
1001  case tok::less:
1002  case tok::greater:
1003  case tok::identifier:
1004  case tok::coloncolon:
1005  case tok::kw_mutable:
1006  nextToken();
1007  break;
1008  case tok::arrow:
1009  FormatTok->Type = TT_LambdaArrow;
1010  nextToken();
1011  break;
1012  default:
1013  return true;
1014  }
1015  }
1016  LSquare.Type = TT_LambdaLSquare;
1017  parseChildBlock();
1018  return true;
1019 }
1020 
1021 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1022  nextToken();
1023  if (FormatTok->is(tok::equal)) {
1024  nextToken();
1025  if (FormatTok->is(tok::r_square)) {
1026  nextToken();
1027  return true;
1028  }
1029  if (FormatTok->isNot(tok::comma))
1030  return false;
1031  nextToken();
1032  } else if (FormatTok->is(tok::amp)) {
1033  nextToken();
1034  if (FormatTok->is(tok::r_square)) {
1035  nextToken();
1036  return true;
1037  }
1038  if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1039  return false;
1040  }
1041  if (FormatTok->is(tok::comma))
1042  nextToken();
1043  } else if (FormatTok->is(tok::r_square)) {
1044  nextToken();
1045  return true;
1046  }
1047  do {
1048  if (FormatTok->is(tok::amp))
1049  nextToken();
1050  if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1051  return false;
1052  nextToken();
1053  if (FormatTok->is(tok::ellipsis))
1054  nextToken();
1055  if (FormatTok->is(tok::comma)) {
1056  nextToken();
1057  } else if (FormatTok->is(tok::r_square)) {
1058  nextToken();
1059  return true;
1060  } else {
1061  return false;
1062  }
1063  } while (!eof());
1064  return false;
1065 }
1066 
1067 void UnwrappedLineParser::tryToParseJSFunction() {
1068  nextToken();
1069 
1070  // Consume function name.
1071  if (FormatTok->is(tok::identifier))
1072  nextToken();
1073 
1074  if (FormatTok->isNot(tok::l_paren))
1075  return;
1076 
1077  // Parse formal parameter list.
1078  parseParens();
1079 
1080  if (FormatTok->is(tok::colon)) {
1081  // Parse a type definition.
1082  nextToken();
1083 
1084  // Eat the type declaration. For braced inline object types, balance braces,
1085  // otherwise just parse until finding an l_brace for the function body.
1086  if (FormatTok->is(tok::l_brace))
1087  tryToParseBracedList();
1088  else
1089  while (FormatTok->isNot(tok::l_brace) && !eof())
1090  nextToken();
1091  }
1092 
1093  parseChildBlock();
1094 }
1095 
1096 bool UnwrappedLineParser::tryToParseBracedList() {
1097  if (FormatTok->BlockKind == BK_Unknown)
1098  calculateBraceTypes();
1099  assert(FormatTok->BlockKind != BK_Unknown);
1100  if (FormatTok->BlockKind == BK_Block)
1101  return false;
1102  parseBracedList();
1103  return true;
1104 }
1105 
1106 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1107  bool HasError = false;
1108  nextToken();
1109 
1110  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1111  // replace this by using parseAssigmentExpression() inside.
1112  do {
1113  if (Style.Language == FormatStyle::LK_JavaScript) {
1114  if (FormatTok->is(Keywords.kw_function)) {
1115  tryToParseJSFunction();
1116  continue;
1117  }
1118  if (FormatTok->is(TT_JsFatArrow)) {
1119  nextToken();
1120  // Fat arrows can be followed by simple expressions or by child blocks
1121  // in curly braces.
1122  if (FormatTok->is(tok::l_brace)) {
1123  parseChildBlock();
1124  continue;
1125  }
1126  }
1127  }
1128  switch (FormatTok->Tok.getKind()) {
1129  case tok::caret:
1130  nextToken();
1131  if (FormatTok->is(tok::l_brace)) {
1132  parseChildBlock();
1133  }
1134  break;
1135  case tok::l_square:
1136  tryToParseLambda();
1137  break;
1138  case tok::l_brace:
1139  // Assume there are no blocks inside a braced init list apart
1140  // from the ones we explicitly parse out (like lambdas).
1141  FormatTok->BlockKind = BK_BracedInit;
1142  parseBracedList();
1143  break;
1144  case tok::l_paren:
1145  parseParens();
1146  // JavaScript can just have free standing methods and getters/setters in
1147  // object literals. Detect them by a "{" following ")".
1148  if (Style.Language == FormatStyle::LK_JavaScript) {
1149  if (FormatTok->is(tok::l_brace))
1150  parseChildBlock();
1151  break;
1152  }
1153  break;
1154  case tok::r_brace:
1155  nextToken();
1156  return !HasError;
1157  case tok::semi:
1158  HasError = true;
1159  if (!ContinueOnSemicolons)
1160  return !HasError;
1161  nextToken();
1162  break;
1163  case tok::comma:
1164  nextToken();
1165  break;
1166  default:
1167  nextToken();
1168  break;
1169  }
1170  } while (!eof());
1171  return false;
1172 }
1173 
1174 void UnwrappedLineParser::parseParens() {
1175  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1176  nextToken();
1177  do {
1178  switch (FormatTok->Tok.getKind()) {
1179  case tok::l_paren:
1180  parseParens();
1181  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1182  parseChildBlock();
1183  break;
1184  case tok::r_paren:
1185  nextToken();
1186  return;
1187  case tok::r_brace:
1188  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1189  return;
1190  case tok::l_square:
1191  tryToParseLambda();
1192  break;
1193  case tok::l_brace:
1194  if (!tryToParseBracedList())
1195  parseChildBlock();
1196  break;
1197  case tok::at:
1198  nextToken();
1199  if (FormatTok->Tok.is(tok::l_brace))
1200  parseBracedList();
1201  break;
1202  case tok::identifier:
1203  if (Style.Language == FormatStyle::LK_JavaScript &&
1204  FormatTok->is(Keywords.kw_function))
1205  tryToParseJSFunction();
1206  else
1207  nextToken();
1208  break;
1209  default:
1210  nextToken();
1211  break;
1212  }
1213  } while (!eof());
1214 }
1215 
1216 void UnwrappedLineParser::parseSquare() {
1217  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1218  if (tryToParseLambda())
1219  return;
1220  do {
1221  switch (FormatTok->Tok.getKind()) {
1222  case tok::l_paren:
1223  parseParens();
1224  break;
1225  case tok::r_square:
1226  nextToken();
1227  return;
1228  case tok::r_brace:
1229  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1230  return;
1231  case tok::l_square:
1232  parseSquare();
1233  break;
1234  case tok::l_brace: {
1235  if (!tryToParseBracedList())
1236  parseChildBlock();
1237  break;
1238  }
1239  case tok::at:
1240  nextToken();
1241  if (FormatTok->Tok.is(tok::l_brace))
1242  parseBracedList();
1243  break;
1244  default:
1245  nextToken();
1246  break;
1247  }
1248  } while (!eof());
1249 }
1250 
1251 void UnwrappedLineParser::parseIfThenElse() {
1252  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1253  nextToken();
1254  if (FormatTok->Tok.is(tok::l_paren))
1255  parseParens();
1256  bool NeedsUnwrappedLine = false;
1257  if (FormatTok->Tok.is(tok::l_brace)) {
1258  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1259  parseBlock(/*MustBeDeclaration=*/false);
1262  addUnwrappedLine();
1263  } else {
1264  NeedsUnwrappedLine = true;
1265  }
1266  } else {
1267  addUnwrappedLine();
1268  ++Line->Level;
1269  parseStructuralElement();
1270  --Line->Level;
1271  }
1272  if (FormatTok->Tok.is(tok::kw_else)) {
1274  addUnwrappedLine();
1275  nextToken();
1276  if (FormatTok->Tok.is(tok::l_brace)) {
1277  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1278  parseBlock(/*MustBeDeclaration=*/false);
1279  addUnwrappedLine();
1280  } else if (FormatTok->Tok.is(tok::kw_if)) {
1281  parseIfThenElse();
1282  } else {
1283  addUnwrappedLine();
1284  ++Line->Level;
1285  parseStructuralElement();
1286  --Line->Level;
1287  }
1288  } else if (NeedsUnwrappedLine) {
1289  addUnwrappedLine();
1290  }
1291 }
1292 
1293 void UnwrappedLineParser::parseTryCatch() {
1294  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1295  nextToken();
1296  bool NeedsUnwrappedLine = false;
1297  if (FormatTok->is(tok::colon)) {
1298  // We are in a function try block, what comes is an initializer list.
1299  nextToken();
1300  while (FormatTok->is(tok::identifier)) {
1301  nextToken();
1302  if (FormatTok->is(tok::l_paren))
1303  parseParens();
1304  if (FormatTok->is(tok::comma))
1305  nextToken();
1306  }
1307  }
1308  // Parse try with resource.
1309  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1310  parseParens();
1311  }
1312  if (FormatTok->is(tok::l_brace)) {
1313  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1314  parseBlock(/*MustBeDeclaration=*/false);
1318  addUnwrappedLine();
1319  } else {
1320  NeedsUnwrappedLine = true;
1321  }
1322  } else if (!FormatTok->is(tok::kw_catch)) {
1323  // The C++ standard requires a compound-statement after a try.
1324  // If there's none, we try to assume there's a structuralElement
1325  // and try to continue.
1326  addUnwrappedLine();
1327  ++Line->Level;
1328  parseStructuralElement();
1329  --Line->Level;
1330  }
1331  while (1) {
1332  if (FormatTok->is(tok::at))
1333  nextToken();
1334  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1335  tok::kw___finally) ||
1336  ((Style.Language == FormatStyle::LK_Java ||
1338  FormatTok->is(Keywords.kw_finally)) ||
1339  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1340  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1341  break;
1342  nextToken();
1343  while (FormatTok->isNot(tok::l_brace)) {
1344  if (FormatTok->is(tok::l_paren)) {
1345  parseParens();
1346  continue;
1347  }
1348  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1349  return;
1350  nextToken();
1351  }
1352  NeedsUnwrappedLine = false;
1353  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1354  parseBlock(/*MustBeDeclaration=*/false);
1358  addUnwrappedLine();
1359  } else {
1360  NeedsUnwrappedLine = true;
1361  }
1362  }
1363  if (NeedsUnwrappedLine) {
1364  addUnwrappedLine();
1365  }
1366 }
1367 
1368 void UnwrappedLineParser::parseNamespace() {
1369  assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1370 
1371  const FormatToken &InitialToken = *FormatTok;
1372  nextToken();
1373  if (FormatTok->Tok.is(tok::identifier))
1374  nextToken();
1375  if (FormatTok->Tok.is(tok::l_brace)) {
1376  if (ShouldBreakBeforeBrace(Style, InitialToken))
1377  addUnwrappedLine();
1378 
1379  bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1381  DeclarationScopeStack.size() > 1);
1382  parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1383  // Munch the semicolon after a namespace. This is more common than one would
1384  // think. Puttin the semicolon into its own line is very ugly.
1385  if (FormatTok->Tok.is(tok::semi))
1386  nextToken();
1387  addUnwrappedLine();
1388  }
1389  // FIXME: Add error handling.
1390 }
1391 
1392 void UnwrappedLineParser::parseNew() {
1393  assert(FormatTok->is(tok::kw_new) && "'new' expected");
1394  nextToken();
1395  if (Style.Language != FormatStyle::LK_Java)
1396  return;
1397 
1398  // In Java, we can parse everything up to the parens, which aren't optional.
1399  do {
1400  // There should not be a ;, { or } before the new's open paren.
1401  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1402  return;
1403 
1404  // Consume the parens.
1405  if (FormatTok->is(tok::l_paren)) {
1406  parseParens();
1407 
1408  // If there is a class body of an anonymous class, consume that as child.
1409  if (FormatTok->is(tok::l_brace))
1410  parseChildBlock();
1411  return;
1412  }
1413  nextToken();
1414  } while (!eof());
1415 }
1416 
1417 void UnwrappedLineParser::parseForOrWhileLoop() {
1418  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1419  "'for', 'while' or foreach macro expected");
1420  nextToken();
1421  if (FormatTok->Tok.is(tok::l_paren))
1422  parseParens();
1423  if (FormatTok->Tok.is(tok::l_brace)) {
1424  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1425  parseBlock(/*MustBeDeclaration=*/false);
1426  addUnwrappedLine();
1427  } else {
1428  addUnwrappedLine();
1429  ++Line->Level;
1430  parseStructuralElement();
1431  --Line->Level;
1432  }
1433 }
1434 
1435 void UnwrappedLineParser::parseDoWhile() {
1436  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1437  nextToken();
1438  if (FormatTok->Tok.is(tok::l_brace)) {
1439  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1440  parseBlock(/*MustBeDeclaration=*/false);
1442  addUnwrappedLine();
1443  } else {
1444  addUnwrappedLine();
1445  ++Line->Level;
1446  parseStructuralElement();
1447  --Line->Level;
1448  }
1449 
1450  // FIXME: Add error handling.
1451  if (!FormatTok->Tok.is(tok::kw_while)) {
1452  addUnwrappedLine();
1453  return;
1454  }
1455 
1456  nextToken();
1457  parseStructuralElement();
1458 }
1459 
1460 void UnwrappedLineParser::parseLabel() {
1461  nextToken();
1462  unsigned OldLineLevel = Line->Level;
1463  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1464  --Line->Level;
1465  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1466  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1467  parseBlock(/*MustBeDeclaration=*/false);
1468  if (FormatTok->Tok.is(tok::kw_break)) {
1469  // "break;" after "}" on its own line only for BS_Allman and BS_GNU
1472  addUnwrappedLine();
1473  }
1474  parseStructuralElement();
1475  }
1476  addUnwrappedLine();
1477  } else {
1478  if (FormatTok->is(tok::semi))
1479  nextToken();
1480  addUnwrappedLine();
1481  }
1482  Line->Level = OldLineLevel;
1483 }
1484 
1485 void UnwrappedLineParser::parseCaseLabel() {
1486  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1487  // FIXME: fix handling of complex expressions here.
1488  do {
1489  nextToken();
1490  } while (!eof() && !FormatTok->Tok.is(tok::colon));
1491  parseLabel();
1492 }
1493 
1494 void UnwrappedLineParser::parseSwitch() {
1495  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1496  nextToken();
1497  if (FormatTok->Tok.is(tok::l_paren))
1498  parseParens();
1499  if (FormatTok->Tok.is(tok::l_brace)) {
1500  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1501  parseBlock(/*MustBeDeclaration=*/false);
1502  addUnwrappedLine();
1503  } else {
1504  addUnwrappedLine();
1505  ++Line->Level;
1506  parseStructuralElement();
1507  --Line->Level;
1508  }
1509 }
1510 
1511 void UnwrappedLineParser::parseAccessSpecifier() {
1512  nextToken();
1513  // Understand Qt's slots.
1514  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1515  nextToken();
1516  // Otherwise, we don't know what it is, and we'd better keep the next token.
1517  if (FormatTok->Tok.is(tok::colon))
1518  nextToken();
1519  addUnwrappedLine();
1520 }
1521 
1522 void UnwrappedLineParser::parseEnum() {
1523  // Won't be 'enum' for NS_ENUMs.
1524  if (FormatTok->Tok.is(tok::kw_enum))
1525  nextToken();
1526 
1527  // Eat up enum class ...
1528  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1529  nextToken();
1530 
1531  while (FormatTok->Tok.getIdentifierInfo() ||
1532  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1533  tok::greater, tok::comma, tok::question)) {
1534  nextToken();
1535  // We can have macros or attributes in between 'enum' and the enum name.
1536  if (FormatTok->is(tok::l_paren))
1537  parseParens();
1538  if (FormatTok->is(tok::identifier)) {
1539  nextToken();
1540  // If there are two identifiers in a row, this is likely an elaborate
1541  // return type. In Java, this can be "implements", etc.
1542  if (Style.Language == FormatStyle::LK_Cpp &&
1543  FormatTok->is(tok::identifier))
1544  return;
1545  }
1546  }
1547 
1548  // Just a declaration or something is wrong.
1549  if (FormatTok->isNot(tok::l_brace))
1550  return;
1551  FormatTok->BlockKind = BK_Block;
1552 
1553  if (Style.Language == FormatStyle::LK_Java) {
1554  // Java enums are different.
1555  parseJavaEnumBody();
1556  return;
1557  }
1558 
1559  // Parse enum body.
1560  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1561  if (HasError) {
1562  if (FormatTok->is(tok::semi))
1563  nextToken();
1564  addUnwrappedLine();
1565  }
1566 
1567  // There is no addUnwrappedLine() here so that we fall through to parsing a
1568  // structural element afterwards. Thus, in "enum A {} n, m;",
1569  // "} n, m;" will end up in one unwrapped line.
1570 }
1571 
1572 void UnwrappedLineParser::parseJavaEnumBody() {
1573  // Determine whether the enum is simple, i.e. does not have a semicolon or
1574  // constants with class bodies. Simple enums can be formatted like braced
1575  // lists, contracted to a single line, etc.
1576  unsigned StoredPosition = Tokens->getPosition();
1577  bool IsSimple = true;
1578  FormatToken *Tok = Tokens->getNextToken();
1579  while (Tok) {
1580  if (Tok->is(tok::r_brace))
1581  break;
1582  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1583  IsSimple = false;
1584  break;
1585  }
1586  // FIXME: This will also mark enums with braces in the arguments to enum
1587  // constants as "not simple". This is probably fine in practice, though.
1588  Tok = Tokens->getNextToken();
1589  }
1590  FormatTok = Tokens->setPosition(StoredPosition);
1591 
1592  if (IsSimple) {
1593  parseBracedList();
1594  addUnwrappedLine();
1595  return;
1596  }
1597 
1598  // Parse the body of a more complex enum.
1599  // First add a line for everything up to the "{".
1600  nextToken();
1601  addUnwrappedLine();
1602  ++Line->Level;
1603 
1604  // Parse the enum constants.
1605  while (FormatTok) {
1606  if (FormatTok->is(tok::l_brace)) {
1607  // Parse the constant's class body.
1608  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1609  /*MunchSemi=*/false);
1610  } else if (FormatTok->is(tok::l_paren)) {
1611  parseParens();
1612  } else if (FormatTok->is(tok::comma)) {
1613  nextToken();
1614  addUnwrappedLine();
1615  } else if (FormatTok->is(tok::semi)) {
1616  nextToken();
1617  addUnwrappedLine();
1618  break;
1619  } else if (FormatTok->is(tok::r_brace)) {
1620  addUnwrappedLine();
1621  break;
1622  } else {
1623  nextToken();
1624  }
1625  }
1626 
1627  // Parse the class body after the enum's ";" if any.
1628  parseLevel(/*HasOpeningBrace=*/true);
1629  nextToken();
1630  --Line->Level;
1631  addUnwrappedLine();
1632 }
1633 
1634 void UnwrappedLineParser::parseRecord() {
1635  const FormatToken &InitialToken = *FormatTok;
1636  nextToken();
1637 
1638  // The actual identifier can be a nested name specifier, and in macros
1639  // it is often token-pasted.
1640  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1641  tok::kw___attribute, tok::kw___declspec,
1642  tok::kw_alignas) ||
1643  ((Style.Language == FormatStyle::LK_Java ||
1645  FormatTok->isOneOf(tok::period, tok::comma))) {
1646  bool IsNonMacroIdentifier =
1647  FormatTok->is(tok::identifier) &&
1648  FormatTok->TokenText != FormatTok->TokenText.upper();
1649  nextToken();
1650  // We can have macros or attributes in between 'class' and the class name.
1651  if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1652  parseParens();
1653  }
1654 
1655  // Note that parsing away template declarations here leads to incorrectly
1656  // accepting function declarations as record declarations.
1657  // In general, we cannot solve this problem. Consider:
1658  // class A<int> B() {}
1659  // which can be a function definition or a class definition when B() is a
1660  // macro. If we find enough real-world cases where this is a problem, we
1661  // can parse for the 'template' keyword in the beginning of the statement,
1662  // and thus rule out the record production in case there is no template
1663  // (this would still leave us with an ambiguity between template function
1664  // and class declarations).
1665  if (FormatTok->isOneOf(tok::colon, tok::less)) {
1666  while (!eof()) {
1667  if (FormatTok->is(tok::l_brace)) {
1668  calculateBraceTypes(/*ExpectClassBody=*/true);
1669  if (!tryToParseBracedList())
1670  break;
1671  }
1672  if (FormatTok->Tok.is(tok::semi))
1673  return;
1674  nextToken();
1675  }
1676  }
1677  if (FormatTok->Tok.is(tok::l_brace)) {
1678  if (ShouldBreakBeforeBrace(Style, InitialToken))
1679  addUnwrappedLine();
1680 
1681  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1682  /*MunchSemi=*/false);
1683  }
1684  // There is no addUnwrappedLine() here so that we fall through to parsing a
1685  // structural element afterwards. Thus, in "class A {} n, m;",
1686  // "} n, m;" will end up in one unwrapped line.
1687 }
1688 
1689 void UnwrappedLineParser::parseObjCProtocolList() {
1690  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1691  do
1692  nextToken();
1693  while (!eof() && FormatTok->Tok.isNot(tok::greater));
1694  nextToken(); // Skip '>'.
1695 }
1696 
1697 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1698  do {
1699  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1700  nextToken();
1701  addUnwrappedLine();
1702  break;
1703  }
1704  if (FormatTok->is(tok::l_brace)) {
1705  parseBlock(/*MustBeDeclaration=*/false);
1706  // In ObjC interfaces, nothing should be following the "}".
1707  addUnwrappedLine();
1708  } else if (FormatTok->is(tok::r_brace)) {
1709  // Ignore stray "}". parseStructuralElement doesn't consume them.
1710  nextToken();
1711  addUnwrappedLine();
1712  } else {
1713  parseStructuralElement();
1714  }
1715  } while (!eof());
1716 }
1717 
1718 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1719  nextToken();
1720  nextToken(); // interface name
1721 
1722  // @interface can be followed by either a base class, or a category.
1723  if (FormatTok->Tok.is(tok::colon)) {
1724  nextToken();
1725  nextToken(); // base class name
1726  } else if (FormatTok->Tok.is(tok::l_paren))
1727  // Skip category, if present.
1728  parseParens();
1729 
1730  if (FormatTok->Tok.is(tok::less))
1731  parseObjCProtocolList();
1732 
1733  if (FormatTok->Tok.is(tok::l_brace)) {
1736  addUnwrappedLine();
1737  parseBlock(/*MustBeDeclaration=*/true);
1738  }
1739 
1740  // With instance variables, this puts '}' on its own line. Without instance
1741  // variables, this ends the @interface line.
1742  addUnwrappedLine();
1743 
1744  parseObjCUntilAtEnd();
1745 }
1746 
1747 void UnwrappedLineParser::parseObjCProtocol() {
1748  nextToken();
1749  nextToken(); // protocol name
1750 
1751  if (FormatTok->Tok.is(tok::less))
1752  parseObjCProtocolList();
1753 
1754  // Check for protocol declaration.
1755  if (FormatTok->Tok.is(tok::semi)) {
1756  nextToken();
1757  return addUnwrappedLine();
1758  }
1759 
1760  addUnwrappedLine();
1761  parseObjCUntilAtEnd();
1762 }
1763 
1764 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1765  assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
1766  nextToken();
1767 
1768  // Consume the "default" in "export default class/function".
1769  if (FormatTok->is(tok::kw_default))
1770  nextToken();
1771 
1772  // Consume "function" and "default function", so that these get parsed as
1773  // free-standing JS functions, i.e. do not require a trailing semicolon.
1774  if (FormatTok->is(Keywords.kw_function)) {
1775  nextToken();
1776  return;
1777  }
1778 
1779  if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
1780  Keywords.kw_var))
1781  return; // Fall through to parsing the corresponding structure.
1782 
1783  if (FormatTok->is(tok::l_brace)) {
1784  FormatTok->BlockKind = BK_Block;
1785  parseBracedList();
1786  }
1787 
1788  while (!eof() && FormatTok->isNot(tok::semi) &&
1789  FormatTok->isNot(tok::l_brace)) {
1790  nextToken();
1791  }
1792 }
1793 
1794 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1795  StringRef Prefix = "") {
1796  llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1797  << (Line.InPPDirective ? " MACRO" : "") << ": ";
1798  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1799  E = Line.Tokens.end();
1800  I != E; ++I) {
1801  llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1802  }
1803  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1804  E = Line.Tokens.end();
1805  I != E; ++I) {
1806  const UnwrappedLineNode &Node = *I;
1808  I = Node.Children.begin(),
1809  E = Node.Children.end();
1810  I != E; ++I) {
1811  printDebugInfo(*I, "\nChild: ");
1812  }
1813  }
1814  llvm::dbgs() << "\n";
1815 }
1816 
1817 void UnwrappedLineParser::addUnwrappedLine() {
1818  if (Line->Tokens.empty())
1819  return;
1820  DEBUG({
1821  if (CurrentLines == &Lines)
1822  printDebugInfo(*Line);
1823  });
1824  CurrentLines->push_back(std::move(*Line));
1825  Line->Tokens.clear();
1826  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1827  CurrentLines->append(
1828  std::make_move_iterator(PreprocessorDirectives.begin()),
1829  std::make_move_iterator(PreprocessorDirectives.end()));
1830  PreprocessorDirectives.clear();
1831  }
1832 }
1833 
1834 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1835 
1836 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
1837  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
1838  FormatTok.NewlinesBefore > 0;
1839 }
1840 
1841 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1842  bool JustComments = Line->Tokens.empty();
1843  for (SmallVectorImpl<FormatToken *>::const_iterator
1844  I = CommentsBeforeNextToken.begin(),
1845  E = CommentsBeforeNextToken.end();
1846  I != E; ++I) {
1847  if (isOnNewLine(**I) && JustComments)
1848  addUnwrappedLine();
1849  pushToken(*I);
1850  }
1851  if (NewlineBeforeNext && JustComments)
1852  addUnwrappedLine();
1853  CommentsBeforeNextToken.clear();
1854 }
1855 
1856 void UnwrappedLineParser::nextToken() {
1857  if (eof())
1858  return;
1859  flushComments(isOnNewLine(*FormatTok));
1860  pushToken(FormatTok);
1861  readToken();
1862 }
1863 
1864 void UnwrappedLineParser::readToken() {
1865  bool CommentsInCurrentLine = true;
1866  do {
1867  FormatTok = Tokens->getNextToken();
1868  assert(FormatTok);
1869  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1870  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1871  // If there is an unfinished unwrapped line, we flush the preprocessor
1872  // directives only after that unwrapped line was finished later.
1873  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
1874  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1875  // Comments stored before the preprocessor directive need to be output
1876  // before the preprocessor directive, at the same level as the
1877  // preprocessor directive, as we consider them to apply to the directive.
1878  flushComments(isOnNewLine(*FormatTok));
1879  parsePPDirective();
1880  }
1881  while (FormatTok->Type == TT_ConflictStart ||
1882  FormatTok->Type == TT_ConflictEnd ||
1883  FormatTok->Type == TT_ConflictAlternative) {
1884  if (FormatTok->Type == TT_ConflictStart) {
1885  conditionalCompilationStart(/*Unreachable=*/false);
1886  } else if (FormatTok->Type == TT_ConflictAlternative) {
1887  conditionalCompilationAlternative();
1888  } else if (FormatTok->Type == TT_ConflictEnd) {
1889  conditionalCompilationEnd();
1890  }
1891  FormatTok = Tokens->getNextToken();
1892  FormatTok->MustBreakBefore = true;
1893  }
1894 
1895  if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1896  !Line->InPPDirective) {
1897  continue;
1898  }
1899 
1900  if (!FormatTok->Tok.is(tok::comment))
1901  return;
1902  if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
1903  CommentsInCurrentLine = false;
1904  }
1905  if (CommentsInCurrentLine) {
1906  pushToken(FormatTok);
1907  } else {
1908  CommentsBeforeNextToken.push_back(FormatTok);
1909  }
1910  } while (!eof());
1911 }
1912 
1913 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1914  Line->Tokens.push_back(UnwrappedLineNode(Tok));
1915  if (MustBreakBeforeNextToken) {
1916  Line->Tokens.back().Tok->MustBreakBefore = true;
1917  MustBreakBeforeNextToken = false;
1918  }
1919 }
1920 
1921 } // end namespace format
1922 } // end namespace clang
int Position
SmallVector< UnwrappedLine, 0 > Children
Indent in all namespaces.
Definition: Format.h:312
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:264
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:286
Should be used for C, C++, ObjectiveC, ObjectiveC++.
Definition: Format.h:283
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, ArrayRef< FormatToken * > Tokens, UnwrappedLineConsumer &Callback)
FormatToken *& ResetToken
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
unsigned Level
The indent level of the UnwrappedLine.
FormatToken * FormatTok
Definition: Format.cpp:1199
Should be used for Java.
Definition: Format.h:285
static bool tokenCanStartNewLine(const clang::Token &Tok)
AdditionalKeywords Keywords
Definition: Format.cpp:1209
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:316
static bool isGoogScope(const UnwrappedLine &Line)
virtual FormatToken * getNextToken()=0
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
AnnotatingParser & P
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
std::vector< bool > & Stack
FormatTokenSource *& TokenSource
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
Should be used for JavaScript.
Definition: Format.h:287
ContinuationIndenter * Indenter
MatchFinder::MatchCallback * Callback
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:112
FormatToken * Token
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
BraceBreakingStyle BreakBeforeBraces
The brace breaking style to use.
Definition: Format.h:183
Always attach braces to surrounding context.
Definition: Format.h:165
#define false
Definition: stdbool.h:33
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:522
ArrayRef< FormatToken * > Tokens
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
virtual FormatToken * setPosition(unsigned Position)=0
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:42
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:310
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:294
Always break before braces.
Definition: Format.h:175
ast_type_traits::DynTypedNode Node
UnwrappedLine & Line
FormatStyle & Style
Definition: Format.cpp:1207
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:281
Like Attach, but break before function definitions, and 'else'.
Definition: Format.h:173
virtual unsigned getPosition()=0
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:43
unsigned PreviousLineLevel
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:154
FormatTokenSource * PreviousTokenSource