clang  3.7.0
UnwrappedLineFormatter.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineFormatter.cpp - Format C++ code ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "UnwrappedLineFormatter.h"
11 #include "WhitespaceManager.h"
12 #include "llvm/Support/Debug.h"
13 
14 #define DEBUG_TYPE "format-formatter"
15 
16 namespace clang {
17 namespace format {
18 
19 namespace {
20 
21 bool startsExternCBlock(const AnnotatedLine &Line) {
22  const FormatToken *Next = Line.First->getNextNonComment();
23  const FormatToken *NextNext = Next ? Next->getNextNonComment() : nullptr;
24  return Line.startsWith(tok::kw_extern) && Next && Next->isStringLiteral() &&
25  NextNext && NextNext->is(tok::l_brace);
26 }
27 
28 /// \brief Tracks the indent level of \c AnnotatedLines across levels.
29 ///
30 /// \c nextLine must be called for each \c AnnotatedLine, after which \c
31 /// getIndent() will return the indent for the last line \c nextLine was called
32 /// with.
33 /// If the line is not formatted (and thus the indent does not change), calling
34 /// \c adjustToUnmodifiedLine after the call to \c nextLine will cause
35 /// subsequent lines on the same level to be indented at the same level as the
36 /// given line.
37 class LevelIndentTracker {
38 public:
39  LevelIndentTracker(const FormatStyle &Style,
40  const AdditionalKeywords &Keywords, unsigned StartLevel,
41  int AdditionalIndent)
42  : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) {
43  for (unsigned i = 0; i != StartLevel; ++i)
44  IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
45  }
46 
47  /// \brief Returns the indent for the current line.
48  unsigned getIndent() const { return Indent; }
49 
50  /// \brief Update the indent state given that \p Line is going to be formatted
51  /// next.
52  void nextLine(const AnnotatedLine &Line) {
53  Offset = getIndentOffset(*Line.First);
54  // Update the indent level cache size so that we can rely on it
55  // having the right size in adjustToUnmodifiedline.
56  while (IndentForLevel.size() <= Line.Level)
57  IndentForLevel.push_back(-1);
58  if (Line.InPPDirective) {
59  Indent = Line.Level * Style.IndentWidth + AdditionalIndent;
60  } else {
61  IndentForLevel.resize(Line.Level + 1);
62  Indent = getIndent(IndentForLevel, Line.Level);
63  }
64  if (static_cast<int>(Indent) + Offset >= 0)
65  Indent += Offset;
66  }
67 
68  /// \brief Update the level indent to adapt to the given \p Line.
69  ///
70  /// When a line is not formatted, we move the subsequent lines on the same
71  /// level to the same indent.
72  /// Note that \c nextLine must have been called before this method.
73  void adjustToUnmodifiedLine(const AnnotatedLine &Line) {
74  unsigned LevelIndent = Line.First->OriginalColumn;
75  if (static_cast<int>(LevelIndent) - Offset >= 0)
76  LevelIndent -= Offset;
77  if ((!Line.First->is(tok::comment) || IndentForLevel[Line.Level] == -1) &&
78  !Line.InPPDirective)
79  IndentForLevel[Line.Level] = LevelIndent;
80  }
81 
82 private:
83  /// \brief Get the offset of the line relatively to the level.
84  ///
85  /// For example, 'public:' labels in classes are offset by 1 or 2
86  /// characters to the left from their level.
87  int getIndentOffset(const FormatToken &RootToken) {
88  if (Style.Language == FormatStyle::LK_Java ||
89  Style.Language == FormatStyle::LK_JavaScript)
90  return 0;
91  if (RootToken.isAccessSpecifier(false) ||
92  RootToken.isObjCAccessSpecifier() ||
93  (RootToken.is(Keywords.kw_signals) && RootToken.Next &&
94  RootToken.Next->is(tok::colon)))
95  return Style.AccessModifierOffset;
96  return 0;
97  }
98 
99  /// \brief Get the indent of \p Level from \p IndentForLevel.
100  ///
101  /// \p IndentForLevel must contain the indent for the level \c l
102  /// at \p IndentForLevel[l], or a value < 0 if the indent for
103  /// that level is unknown.
104  unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level) {
105  if (IndentForLevel[Level] != -1)
106  return IndentForLevel[Level];
107  if (Level == 0)
108  return 0;
109  return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
110  }
111 
112  const FormatStyle &Style;
113  const AdditionalKeywords &Keywords;
114  const unsigned AdditionalIndent;
115 
116  /// \brief The indent in characters for each level.
117  std::vector<int> IndentForLevel;
118 
119  /// \brief Offset of the current line relative to the indent level.
120  ///
121  /// For example, the 'public' keywords is often indented with a negative
122  /// offset.
123  int Offset = 0;
124 
125  /// \brief The current line's indent.
126  unsigned Indent = 0;
127 };
128 
129 class LineJoiner {
130 public:
131  LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords,
132  const SmallVectorImpl<AnnotatedLine *> &Lines)
133  : Style(Style), Keywords(Keywords), End(Lines.end()),
134  Next(Lines.begin()) {}
135 
136  /// \brief Returns the next line, merging multiple lines into one if possible.
137  const AnnotatedLine *getNextMergedLine(bool DryRun,
138  LevelIndentTracker &IndentTracker) {
139  if (Next == End)
140  return nullptr;
141  const AnnotatedLine *Current = *Next;
142  IndentTracker.nextLine(*Current);
143  unsigned MergedLines =
144  tryFitMultipleLinesInOne(IndentTracker.getIndent(), Next, End);
145  if (MergedLines > 0 && Style.ColumnLimit == 0)
146  // Disallow line merging if there is a break at the start of one of the
147  // input lines.
148  for (unsigned i = 0; i < MergedLines; ++i)
149  if (Next[i + 1]->First->NewlinesBefore > 0)
150  MergedLines = 0;
151  if (!DryRun)
152  for (unsigned i = 0; i < MergedLines; ++i)
153  join(*Next[i], *Next[i + 1]);
154  Next = Next + MergedLines + 1;
155  return Current;
156  }
157 
158 private:
159  /// \brief Calculates how many lines can be merged into 1 starting at \p I.
160  unsigned
161  tryFitMultipleLinesInOne(unsigned Indent,
162  SmallVectorImpl<AnnotatedLine *>::const_iterator I,
163  SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
164  // Can't join the last line with anything.
165  if (I + 1 == E)
166  return 0;
167  // We can never merge stuff if there are trailing line comments.
168  const AnnotatedLine *TheLine = *I;
169  if (TheLine->Last->is(TT_LineComment))
170  return 0;
171  if (I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore)
172  return 0;
173  if (TheLine->InPPDirective &&
174  (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline))
175  return 0;
176 
177  if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
178  return 0;
179 
180  unsigned Limit =
181  Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
182  // If we already exceed the column limit, we set 'Limit' to 0. The different
183  // tryMerge..() functions can then decide whether to still do merging.
184  Limit = TheLine->Last->TotalLength > Limit
185  ? 0
186  : Limit - TheLine->Last->TotalLength;
187 
188  // FIXME: TheLine->Level != 0 might or might not be the right check to do.
189  // If necessary, change to something smarter.
190  bool MergeShortFunctions =
191  Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All ||
192  (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
193  I[1]->First->is(tok::r_brace)) ||
194  (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline &&
195  TheLine->Level != 0);
196 
197  if (TheLine->Last->is(TT_FunctionLBrace) &&
198  TheLine->First != TheLine->Last) {
199  return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0;
200  }
201  if (TheLine->Last->is(tok::l_brace)) {
202  return Style.BreakBeforeBraces == FormatStyle::BS_Attach
203  ? tryMergeSimpleBlock(I, E, Limit)
204  : 0;
205  }
206  if (I[1]->First->is(TT_FunctionLBrace) &&
207  Style.BreakBeforeBraces != FormatStyle::BS_Attach) {
208  if (I[1]->Last->is(TT_LineComment))
209  return 0;
210 
211  // Check for Limit <= 2 to account for the " {".
212  if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
213  return 0;
214  Limit -= 2;
215 
216  unsigned MergedLines = 0;
217  if (MergeShortFunctions) {
218  MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
219  // If we managed to merge the block, count the function header, which is
220  // on a separate line.
221  if (MergedLines > 0)
222  ++MergedLines;
223  }
224  return MergedLines;
225  }
226  if (TheLine->First->is(tok::kw_if)) {
227  return Style.AllowShortIfStatementsOnASingleLine
228  ? tryMergeSimpleControlStatement(I, E, Limit)
229  : 0;
230  }
231  if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
232  return Style.AllowShortLoopsOnASingleLine
233  ? tryMergeSimpleControlStatement(I, E, Limit)
234  : 0;
235  }
236  if (TheLine->First->isOneOf(tok::kw_case, tok::kw_default)) {
237  return Style.AllowShortCaseLabelsOnASingleLine
238  ? tryMergeShortCaseLabels(I, E, Limit)
239  : 0;
240  }
241  if (TheLine->InPPDirective &&
242  (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
243  return tryMergeSimplePPDirective(I, E, Limit);
244  }
245  return 0;
246  }
247 
248  unsigned
249  tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
250  SmallVectorImpl<AnnotatedLine *>::const_iterator E,
251  unsigned Limit) {
252  if (Limit == 0)
253  return 0;
254  if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
255  return 0;
256  if (1 + I[1]->Last->TotalLength > Limit)
257  return 0;
258  return 1;
259  }
260 
261  unsigned tryMergeSimpleControlStatement(
262  SmallVectorImpl<AnnotatedLine *>::const_iterator I,
263  SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
264  if (Limit == 0)
265  return 0;
266  if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
267  Style.BreakBeforeBraces == FormatStyle::BS_GNU) &&
268  (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine))
269  return 0;
270  if (I[1]->InPPDirective != (*I)->InPPDirective ||
271  (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
272  return 0;
273  Limit = limitConsideringMacros(I + 1, E, Limit);
274  AnnotatedLine &Line = **I;
275  if (Line.Last->isNot(tok::r_paren))
276  return 0;
277  if (1 + I[1]->Last->TotalLength > Limit)
278  return 0;
279  if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while,
280  TT_LineComment))
281  return 0;
282  // Only inline simple if's (no nested if or else).
283  if (I + 2 != E && Line.startsWith(tok::kw_if) &&
284  I[2]->First->is(tok::kw_else))
285  return 0;
286  return 1;
287  }
288 
289  unsigned
290  tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
291  SmallVectorImpl<AnnotatedLine *>::const_iterator E,
292  unsigned Limit) {
293  if (Limit == 0 || I + 1 == E ||
294  I[1]->First->isOneOf(tok::kw_case, tok::kw_default))
295  return 0;
296  unsigned NumStmts = 0;
297  unsigned Length = 0;
298  bool InPPDirective = I[0]->InPPDirective;
299  for (; NumStmts < 3; ++NumStmts) {
300  if (I + 1 + NumStmts == E)
301  break;
302  const AnnotatedLine *Line = I[1 + NumStmts];
303  if (Line->InPPDirective != InPPDirective)
304  break;
305  if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))
306  break;
307  if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch,
308  tok::kw_while, tok::comment))
309  return 0;
310  Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space.
311  }
312  if (NumStmts == 0 || NumStmts == 3 || Length > Limit)
313  return 0;
314  return NumStmts;
315  }
316 
317  unsigned
318  tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
319  SmallVectorImpl<AnnotatedLine *>::const_iterator E,
320  unsigned Limit) {
321  AnnotatedLine &Line = **I;
322 
323  // Don't merge ObjC @ keywords and methods.
324  // FIXME: If an option to allow short exception handling clauses on a single
325  // line is added, change this to not return for @try and friends.
326  if (Style.Language != FormatStyle::LK_Java &&
327  Line.First->isOneOf(tok::at, tok::minus, tok::plus))
328  return 0;
329 
330  // Check that the current line allows merging. This depends on whether we
331  // are in a control flow statements as well as several style flags.
332  if (Line.First->isOneOf(tok::kw_else, tok::kw_case) ||
333  (Line.First->Next && Line.First->Next->is(tok::kw_else)))
334  return 0;
335  if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try,
336  tok::kw___try, tok::kw_catch, tok::kw___finally,
337  tok::kw_for, tok::r_brace, Keywords.kw___except)) {
338  if (!Style.AllowShortBlocksOnASingleLine)
339  return 0;
340  if (!Style.AllowShortIfStatementsOnASingleLine &&
341  Line.startsWith(tok::kw_if))
342  return 0;
343  if (!Style.AllowShortLoopsOnASingleLine &&
344  Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for))
345  return 0;
346  // FIXME: Consider an option to allow short exception handling clauses on
347  // a single line.
348  // FIXME: This isn't covered by tests.
349  // FIXME: For catch, __except, __finally the first token on the line
350  // is '}', so this isn't correct here.
351  if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch,
352  Keywords.kw___except, tok::kw___finally))
353  return 0;
354  }
355 
356  FormatToken *Tok = I[1]->First;
357  if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
358  (Tok->getNextNonComment() == nullptr ||
359  Tok->getNextNonComment()->is(tok::semi))) {
360  // We merge empty blocks even if the line exceeds the column limit.
361  Tok->SpacesRequiredBefore = 0;
362  Tok->CanBreakBefore = true;
363  return 1;
364  } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) &&
365  !startsExternCBlock(Line)) {
366  // We don't merge short records.
367  if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
368  Keywords.kw_interface))
369  return 0;
370 
371  // Check that we still have three lines and they fit into the limit.
372  if (I + 2 == E || I[2]->Type == LT_Invalid)
373  return 0;
374  Limit = limitConsideringMacros(I + 2, E, Limit);
375 
376  if (!nextTwoLinesFitInto(I, Limit))
377  return 0;
378 
379  // Second, check that the next line does not contain any braces - if it
380  // does, readability declines when putting it into a single line.
381  if (I[1]->Last->is(TT_LineComment))
382  return 0;
383  do {
384  if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit)
385  return 0;
386  Tok = Tok->Next;
387  } while (Tok);
388 
389  // Last, check that the third line starts with a closing brace.
390  Tok = I[2]->First;
391  if (Tok->isNot(tok::r_brace))
392  return 0;
393 
394  // Don't merge "if (a) { .. } else {".
395  if (Tok->Next && Tok->Next->is(tok::kw_else))
396  return 0;
397 
398  return 2;
399  }
400  return 0;
401  }
402 
403  /// Returns the modified column limit for \p I if it is inside a macro and
404  /// needs a trailing '\'.
405  unsigned
406  limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
407  SmallVectorImpl<AnnotatedLine *>::const_iterator E,
408  unsigned Limit) {
409  if (I[0]->InPPDirective && I + 1 != E &&
410  !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) {
411  return Limit < 2 ? 0 : Limit - 2;
412  }
413  return Limit;
414  }
415 
416  bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
417  unsigned Limit) {
418  if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore)
419  return false;
420  return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
421  }
422 
423  bool containsMustBreak(const AnnotatedLine *Line) {
424  for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
425  if (Tok->MustBreakBefore)
426  return true;
427  }
428  return false;
429  }
430 
431  void join(AnnotatedLine &A, const AnnotatedLine &B) {
432  assert(!A.Last->Next);
433  assert(!B.First->Previous);
434  if (B.Affected)
435  A.Affected = true;
436  A.Last->Next = B.First;
437  B.First->Previous = A.Last;
438  B.First->CanBreakBefore = true;
439  unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
440  for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
441  Tok->TotalLength += LengthA;
442  A.Last = Tok;
443  }
444  }
445 
446  const FormatStyle &Style;
447  const AdditionalKeywords &Keywords;
448  const SmallVectorImpl<AnnotatedLine *>::const_iterator End;
449 
450  SmallVectorImpl<AnnotatedLine *>::const_iterator Next;
451 };
452 
453 static void markFinalized(FormatToken *Tok) {
454  for (; Tok; Tok = Tok->Next) {
455  Tok->Finalized = true;
456  for (AnnotatedLine *Child : Tok->Children)
457  markFinalized(Child->First);
458  }
459 }
460 
461 #ifndef NDEBUG
462 static void printLineState(const LineState &State) {
463  llvm::dbgs() << "State: ";
464  for (const ParenState &P : State.Stack) {
465  llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent
466  << " ";
467  }
468  llvm::dbgs() << State.NextToken->TokenText << "\n";
469 }
470 #endif
471 
472 /// \brief Base class for classes that format one \c AnnotatedLine.
473 class LineFormatter {
474 public:
475  LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces,
476  const FormatStyle &Style,
477  UnwrappedLineFormatter *BlockFormatter)
478  : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
479  BlockFormatter(BlockFormatter) {}
480  virtual ~LineFormatter() {}
481 
482  /// \brief Formats an \c AnnotatedLine and returns the penalty.
483  ///
484  /// If \p DryRun is \c false, directly applies the changes.
485  virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
486  bool DryRun) = 0;
487 
488 protected:
489  /// \brief If the \p State's next token is an r_brace closing a nested block,
490  /// format the nested block before it.
491  ///
492  /// Returns \c true if all children could be placed successfully and adapts
493  /// \p Penalty as well as \p State. If \p DryRun is false, also directly
494  /// creates changes using \c Whitespaces.
495  ///
496  /// The crucial idea here is that children always get formatted upon
497  /// encountering the closing brace right after the nested block. Now, if we
498  /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
499  /// \c false), the entire block has to be kept on the same line (which is only
500  /// possible if it fits on the line, only contains a single statement, etc.
501  ///
502  /// If \p NewLine is true, we format the nested block on separate lines, i.e.
503  /// break after the "{", format all lines with correct indentation and the put
504  /// the closing "}" on yet another new line.
505  ///
506  /// This enables us to keep the simple structure of the
507  /// \c UnwrappedLineFormatter, where we only have two options for each token:
508  /// break or don't break.
509  bool formatChildren(LineState &State, bool NewLine, bool DryRun,
510  unsigned &Penalty) {
511  const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
512  FormatToken &Previous = *State.NextToken->Previous;
513  if (!LBrace || LBrace->isNot(tok::l_brace) ||
514  LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
515  // The previous token does not open a block. Nothing to do. We don't
516  // assert so that we can simply call this function for all tokens.
517  return true;
518 
519  if (NewLine) {
520  int AdditionalIndent = State.Stack.back().Indent -
521  Previous.Children[0]->Level * Style.IndentWidth;
522 
523  Penalty +=
524  BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent,
525  /*FixBadIndentation=*/true);
526  return true;
527  }
528 
529  if (Previous.Children[0]->First->MustBreakBefore)
530  return false;
531 
532  // Cannot merge multiple statements into a single line.
533  if (Previous.Children.size() > 1)
534  return false;
535 
536  // Cannot merge into one line if this line ends on a comment.
537  if (Previous.is(tok::comment))
538  return false;
539 
540  // We can't put the closing "}" on a line with a trailing comment.
541  if (Previous.Children[0]->Last->isTrailingComment())
542  return false;
543 
544  // If the child line exceeds the column limit, we wouldn't want to merge it.
545  // We add +2 for the trailing " }".
546  if (Style.ColumnLimit > 0 &&
547  Previous.Children[0]->Last->TotalLength + State.Column + 2 >
548  Style.ColumnLimit)
549  return false;
550 
551  if (!DryRun) {
552  Whitespaces->replaceWhitespace(
553  *Previous.Children[0]->First,
554  /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
555  /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
556  }
557  Penalty += formatLine(*Previous.Children[0], State.Column + 1, DryRun);
558 
559  State.Column += 1 + Previous.Children[0]->Last->TotalLength;
560  return true;
561  }
562 
563  ContinuationIndenter *Indenter;
564 
565 private:
566  WhitespaceManager *Whitespaces;
567  const FormatStyle &Style;
568  UnwrappedLineFormatter *BlockFormatter;
569 };
570 
571 /// \brief Formatter that keeps the existing line breaks.
572 class NoColumnLimitLineFormatter : public LineFormatter {
573 public:
574  NoColumnLimitLineFormatter(ContinuationIndenter *Indenter,
575  WhitespaceManager *Whitespaces,
576  const FormatStyle &Style,
577  UnwrappedLineFormatter *BlockFormatter)
578  : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
579 
580  /// \brief Formats the line, simply keeping all of the input's line breaking
581  /// decisions.
582  unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
583  bool DryRun) override {
584  assert(!DryRun);
585  LineState State =
586  Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false);
587  while (State.NextToken) {
588  bool Newline =
589  Indenter->mustBreak(State) ||
590  (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
591  unsigned Penalty = 0;
592  formatChildren(State, Newline, /*DryRun=*/false, Penalty);
593  Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
594  }
595  return 0;
596  }
597 };
598 
599 /// \brief Formatter that puts all tokens into a single line without breaks.
600 class NoLineBreakFormatter : public LineFormatter {
601 public:
602  NoLineBreakFormatter(ContinuationIndenter *Indenter,
603  WhitespaceManager *Whitespaces, const FormatStyle &Style,
604  UnwrappedLineFormatter *BlockFormatter)
605  : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
606 
607  /// \brief Puts all tokens into a single line.
608  unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
609  bool DryRun) {
610  unsigned Penalty = 0;
611  LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
612  while (State.NextToken) {
613  formatChildren(State, /*Newline=*/false, DryRun, Penalty);
614  Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
615  }
616  return Penalty;
617  }
618 };
619 
620 /// \brief Finds the best way to break lines.
621 class OptimizingLineFormatter : public LineFormatter {
622 public:
623  OptimizingLineFormatter(ContinuationIndenter *Indenter,
624  WhitespaceManager *Whitespaces,
625  const FormatStyle &Style,
626  UnwrappedLineFormatter *BlockFormatter)
627  : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
628 
629  /// \brief Formats the line by finding the best line breaks with line lengths
630  /// below the column limit.
631  unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
632  bool DryRun) {
633  LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
634 
635  // If the ObjC method declaration does not fit on a line, we should format
636  // it with one arg per line.
637  if (State.Line->Type == LT_ObjCMethodDecl)
638  State.Stack.back().BreakBeforeParameter = true;
639 
640  // Find best solution in solution space.
641  return analyzeSolutionSpace(State, DryRun);
642  }
643 
644 private:
645  struct CompareLineStatePointers {
646  bool operator()(LineState *obj1, LineState *obj2) const {
647  return *obj1 < *obj2;
648  }
649  };
650 
651  /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
652  ///
653  /// In case of equal penalties, we want to prefer states that were inserted
654  /// first. During state generation we make sure that we insert states first
655  /// that break the line as late as possible.
656  typedef std::pair<unsigned, unsigned> OrderedPenalty;
657 
658  /// \brief An edge in the solution space from \c Previous->State to \c State,
659  /// inserting a newline dependent on the \c NewLine.
660  struct StateNode {
661  StateNode(const LineState &State, bool NewLine, StateNode *Previous)
662  : State(State), NewLine(NewLine), Previous(Previous) {}
663  LineState State;
664  bool NewLine;
665  StateNode *Previous;
666  };
667 
668  /// \brief An item in the prioritized BFS search queue. The \c StateNode's
669  /// \c State has the given \c OrderedPenalty.
670  typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
671 
672  /// \brief The BFS queue type.
673  typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
674  std::greater<QueueItem>> QueueType;
675 
676  /// \brief Analyze the entire solution space starting from \p InitialState.
677  ///
678  /// This implements a variant of Dijkstra's algorithm on the graph that spans
679  /// the solution space (\c LineStates are the nodes). The algorithm tries to
680  /// find the shortest path (the one with lowest penalty) from \p InitialState
681  /// to a state where all tokens are placed. Returns the penalty.
682  ///
683  /// If \p DryRun is \c false, directly applies the changes.
684  unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) {
685  std::set<LineState *, CompareLineStatePointers> Seen;
686 
687  // Increasing count of \c StateNode items we have created. This is used to
688  // create a deterministic order independent of the container.
689  unsigned Count = 0;
690  QueueType Queue;
691 
692  // Insert start element into queue.
693  StateNode *Node =
694  new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
695  Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
696  ++Count;
697 
698  unsigned Penalty = 0;
699 
700  // While not empty, take first element and follow edges.
701  while (!Queue.empty()) {
702  Penalty = Queue.top().first.first;
703  StateNode *Node = Queue.top().second;
704  if (!Node->State.NextToken) {
705  DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
706  break;
707  }
708  Queue.pop();
709 
710  // Cut off the analysis of certain solutions if the analysis gets too
711  // complex. See description of IgnoreStackForComparison.
712  if (Count > 10000)
713  Node->State.IgnoreStackForComparison = true;
714 
715  if (!Seen.insert(&Node->State).second)
716  // State already examined with lower penalty.
717  continue;
718 
719  FormatDecision LastFormat = Node->State.NextToken->Decision;
720  if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
721  addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
722  if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
723  addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
724  }
725 
726  if (Queue.empty()) {
727  // We were unable to find a solution, do nothing.
728  // FIXME: Add diagnostic?
729  DEBUG(llvm::dbgs() << "Could not find a solution.\n");
730  return 0;
731  }
732 
733  // Reconstruct the solution.
734  if (!DryRun)
735  reconstructPath(InitialState, Queue.top().second);
736 
737  DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
738  DEBUG(llvm::dbgs() << "---\n");
739 
740  return Penalty;
741  }
742 
743  /// \brief Add the following state to the analysis queue \c Queue.
744  ///
745  /// Assume the current state is \p PreviousNode and has been reached with a
746  /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
747  void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
748  bool NewLine, unsigned *Count, QueueType *Queue) {
749  if (NewLine && !Indenter->canBreak(PreviousNode->State))
750  return;
751  if (!NewLine && Indenter->mustBreak(PreviousNode->State))
752  return;
753 
754  StateNode *Node = new (Allocator.Allocate())
755  StateNode(PreviousNode->State, NewLine, PreviousNode);
756  if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
757  return;
758 
759  Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
760 
761  Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
762  ++(*Count);
763  }
764 
765  /// \brief Applies the best formatting by reconstructing the path in the
766  /// solution space that leads to \c Best.
767  void reconstructPath(LineState &State, StateNode *Best) {
768  std::deque<StateNode *> Path;
769  // We do not need a break before the initial token.
770  while (Best->Previous) {
771  Path.push_front(Best);
772  Best = Best->Previous;
773  }
774  for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
775  I != E; ++I) {
776  unsigned Penalty = 0;
777  formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
778  Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
779 
780  DEBUG({
781  printLineState((*I)->Previous->State);
782  if ((*I)->NewLine) {
783  llvm::dbgs() << "Penalty for placing "
784  << (*I)->Previous->State.NextToken->Tok.getName() << ": "
785  << Penalty << "\n";
786  }
787  });
788  }
789  }
790 
791  llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
792 };
793 
794 } // namespace
795 
796 unsigned
798  bool DryRun, int AdditionalIndent,
799  bool FixBadIndentation) {
800  LineJoiner Joiner(Style, Keywords, Lines);
801 
802  // Try to look up already computed penalty in DryRun-mode.
803  std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey(
804  &Lines, AdditionalIndent);
805  auto CacheIt = PenaltyCache.find(CacheKey);
806  if (DryRun && CacheIt != PenaltyCache.end())
807  return CacheIt->second;
808 
809  assert(!Lines.empty());
810  unsigned Penalty = 0;
811  LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level,
812  AdditionalIndent);
813  const AnnotatedLine *PreviousLine = nullptr;
814  const AnnotatedLine *NextLine = nullptr;
815  for (const AnnotatedLine *Line =
816  Joiner.getNextMergedLine(DryRun, IndentTracker);
817  Line; Line = NextLine) {
818  const AnnotatedLine &TheLine = *Line;
819  unsigned Indent = IndentTracker.getIndent();
820  bool FixIndentation =
821  FixBadIndentation && (Indent != TheLine.First->OriginalColumn);
822  bool ShouldFormat = TheLine.Affected || FixIndentation;
823  // We cannot format this line; if the reason is that the line had a
824  // parsing error, remember that.
825  if (ShouldFormat && TheLine.Type == LT_Invalid && IncompleteFormat)
826  *IncompleteFormat = true;
827 
828  if (ShouldFormat && TheLine.Type != LT_Invalid) {
829  if (!DryRun)
830  formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent,
831  TheLine.InPPDirective);
832 
833  NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
834  unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
835  bool FitsIntoOneLine =
836  TheLine.Last->TotalLength + Indent <= ColumnLimit ||
837  TheLine.Type == LT_ImportStatement;
838 
839  if (Style.ColumnLimit == 0)
840  NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
841  .formatLine(TheLine, Indent, DryRun);
842  else if (FitsIntoOneLine)
843  Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this)
844  .formatLine(TheLine, Indent, DryRun);
845  else
846  Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this)
847  .formatLine(TheLine, Indent, DryRun);
848  } else {
849  // If no token in the current line is affected, we still need to format
850  // affected children.
851  if (TheLine.ChildrenAffected)
852  format(TheLine.Children, DryRun);
853 
854  // Adapt following lines on the current indent level to the same level
855  // unless the current \c AnnotatedLine is not at the beginning of a line.
856  bool StartsNewLine =
857  TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst;
858  if (StartsNewLine)
859  IndentTracker.adjustToUnmodifiedLine(TheLine);
860  if (!DryRun) {
861  bool ReformatLeadingWhitespace =
862  StartsNewLine && ((PreviousLine && PreviousLine->Affected) ||
863  TheLine.LeadingEmptyLinesAffected);
864  // Format the first token.
865  if (ReformatLeadingWhitespace)
866  formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
867  TheLine.First->OriginalColumn,
868  TheLine.InPPDirective);
869  else
870  Whitespaces->addUntouchableToken(*TheLine.First,
871  TheLine.InPPDirective);
872 
873  // Notify the WhitespaceManager about the unchanged whitespace.
874  for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next)
875  Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
876  }
877  NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
878  }
879  if (!DryRun)
880  markFinalized(TheLine.First);
881  PreviousLine = &TheLine;
882  }
883  PenaltyCache[CacheKey] = Penalty;
884  return Penalty;
885 }
886 
887 void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken,
888  const AnnotatedLine *PreviousLine,
889  unsigned IndentLevel,
890  unsigned Indent,
891  bool InPPDirective) {
892  if (RootToken.is(tok::eof)) {
893  unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u);
894  Whitespaces->replaceWhitespace(RootToken, Newlines, /*IndentLevel=*/0,
895  /*Spaces=*/0, /*TargetColumn=*/0);
896  return;
897  }
898  unsigned Newlines =
899  std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
900  // Remove empty lines before "}" where applicable.
901  if (RootToken.is(tok::r_brace) &&
902  (!RootToken.Next ||
903  (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
904  Newlines = std::min(Newlines, 1u);
905  if (Newlines == 0 && !RootToken.IsFirst)
906  Newlines = 1;
907  if (RootToken.IsFirst && !RootToken.HasUnescapedNewline)
908  Newlines = 0;
909 
910  // Remove empty lines after "{".
911  if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine &&
912  PreviousLine->Last->is(tok::l_brace) &&
913  PreviousLine->First->isNot(tok::kw_namespace) &&
914  !startsExternCBlock(*PreviousLine))
915  Newlines = 1;
916 
917  // Insert extra new line before access specifiers.
918  if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
919  RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
920  ++Newlines;
921 
922  // Remove empty lines after access specifiers.
923  if (PreviousLine && PreviousLine->First->isAccessSpecifier() &&
924  (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline))
925  Newlines = std::min(1u, Newlines);
926 
927  Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
928  Indent, InPPDirective &&
929  !RootToken.HasUnescapedNewline);
930 }
931 
932 unsigned
933 UnwrappedLineFormatter::getColumnLimit(bool InPPDirective,
934  const AnnotatedLine *NextLine) const {
935  // In preprocessor directives reserve two chars for trailing " \" if the
936  // next line continues the preprocessor directive.
937  bool ContinuesPPDirective =
938  InPPDirective &&
939  // If there is no next line, this is likely a child line and the parent
940  // continues the preprocessor directive.
941  (!NextLine ||
942  (NextLine->InPPDirective &&
943  // If there is an unescaped newline between this line and the next, the
944  // next line starts a new preprocessor directive.
945  !NextLine->First->HasUnescapedNewline));
946  return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0);
947 }
948 
949 } // namespace format
950 } // namespace clang
bool isAccessSpecifier(bool ColonRequired=true) const
Definition: FormatToken.h:301
std::vector< int > IndentForLevel
The indent in characters for each level.
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs. The configured TabWidth is used a...
Definition: FormatToken.h:207
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:286
bool IsFirst
Indicates that this is the first token.
Definition: FormatToken.h:148
bool isNot(T Kind) const
Definition: FormatToken.h:293
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:203
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:122
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:267
LineState State
Should be used for Java.
Definition: Format.h:285
const FormatStyle & Style
uint32_t Offset
Definition: CacheTokens.cpp:43
bool ChildrenAffected
True if a one of this line's children intersects with an input range.
int Offset
Offset of the current line relative to the indent level.
WhitespaceManager * Whitespaces
UnwrappedLineFormatter * BlockFormatter
#define UINT_MAX
Definition: limits.h:72
AnnotatingParser & P
const SmallVectorImpl< AnnotatedLine * >::const_iterator End
WhitespaceManager class manages whitespace around tokens and their replacements.
llvm::SpecificBumpPtrAllocator< StateNode > Allocator
Only merge empty functions.
Definition: Format.h:96
Should be used for JavaScript.
Definition: Format.h:287
ContinuationIndenter * Indenter
StateNode * Previous
SmallVector< AnnotatedLine *, 0 > Children
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:112
Implements a combinartorial exploration of all the different linebreaks unwrapped lines can be format...
Always attach braces to surrounding context.
Definition: Format.h:165
SmallVectorImpl< AnnotatedLine * >::const_iterator Next
AnnotatedLine & Line
unsigned format(const SmallVectorImpl< AnnotatedLine * > &Lines, bool DryRun=false, int AdditionalIndent=0, bool FixBadIndentation=false)
Format the current block and return the penalty.
Always break before braces.
Definition: Format.h:175
ast_type_traits::DynTypedNode Node
Merge all functions fitting on a single line.
Definition: Format.h:100
const AdditionalKeywords & Keywords
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:281
Only merge functions defined inside a class. Implies "empty".
Definition: Format.h:98
FormatToken * Current
const unsigned AdditionalIndent
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:126
unsigned Indent
The current line's indent.