clang  3.7.0
FormatToken.cpp
Go to the documentation of this file.
1 //===--- FormatToken.cpp - Format C++ code --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements specific functions of \c FormatTokens and their
12 /// roles.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "FormatToken.h"
17 #include "ContinuationIndenter.h"
18 #include "clang/Format/Format.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/Support/Debug.h"
21 #include <climits>
22 
23 namespace clang {
24 namespace format {
25 
27  static const char *const TokNames[] = {
28 #define TYPE(X) #X,
30 #undef TYPE
31  nullptr
32  };
33 
34  if (Type < NUM_TOKEN_TYPES)
35  return TokNames[Type];
36  llvm_unreachable("unknown TokenType");
37  return nullptr;
38 }
39 
40 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
41 // duplication.
43  switch (Tok.getKind()) {
44  case tok::kw_short:
45  case tok::kw_long:
46  case tok::kw___int64:
47  case tok::kw___int128:
48  case tok::kw_signed:
49  case tok::kw_unsigned:
50  case tok::kw_void:
51  case tok::kw_char:
52  case tok::kw_int:
53  case tok::kw_half:
54  case tok::kw_float:
55  case tok::kw_double:
56  case tok::kw_wchar_t:
57  case tok::kw_bool:
58  case tok::kw___underlying_type:
59  case tok::annot_typename:
60  case tok::kw_char16_t:
61  case tok::kw_char32_t:
62  case tok::kw_typeof:
63  case tok::kw_decltype:
64  return true;
65  default:
66  return false;
67  }
68 }
69 
71 
73 
76  bool DryRun) {
77  if (State.NextToken == nullptr || !State.NextToken->Previous)
78  return 0;
79 
80  // Ensure that we start on the opening brace.
81  const FormatToken *LBrace =
83  if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->BlockKind == BK_Block ||
84  LBrace->Type == TT_DictLiteral ||
85  LBrace->Next->Type == TT_DesignatedInitializerPeriod)
86  return 0;
87 
88  // Calculate the number of code points we have to format this list. As the
89  // first token is already placed, we have to subtract it.
90  unsigned RemainingCodePoints =
92 
93  // Find the best ColumnFormat, i.e. the best number of columns to use.
94  const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
95  // If no ColumnFormat can be used, the braced list would generally be
96  // bin-packed. Add a severe penalty to this so that column layouts are
97  // preferred if possible.
98  if (!Format)
99  return 10000;
100 
101  // Format the entire list.
102  unsigned Penalty = 0;
103  unsigned Column = 0;
104  unsigned Item = 0;
105  while (State.NextToken != LBrace->MatchingParen) {
106  bool NewLine = false;
107  unsigned ExtraSpaces = 0;
108 
109  // If the previous token was one of our commas, we are now on the next item.
110  if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
111  if (!State.NextToken->isTrailingComment()) {
112  ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
113  ++Column;
114  }
115  ++Item;
116  }
117 
118  if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
119  Column = 0;
120  NewLine = true;
121  }
122 
123  // Place token using the continuation indenter and store the penalty.
124  Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
125  }
126  return Penalty;
127 }
128 
131  bool DryRun) {
132  if (HasNestedBracedList)
133  State.Stack.back().AvoidBinPacking = true;
134  return 0;
135 }
136 
137 // Returns the lengths in code points between Begin and End (both included),
138 // assuming that the entire sequence is put on a single line.
139 static unsigned CodePointsBetween(const FormatToken *Begin,
140  const FormatToken *End) {
141  assert(End->TotalLength >= Begin->TotalLength);
142  return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
143 }
144 
146  // FIXME: At some point we might want to do this for other lists, too.
147  if (!Token->MatchingParen || Token->isNot(tok::l_brace))
148  return;
149 
150  // In C++11 braced list style, we should not format in columns unless they
151  // have many items (20 or more) or we allow bin-packing of function call
152  // arguments.
154  Commas.size() < 19)
155  return;
156 
157  // Column format doesn't really make sense if we don't align after brackets.
159  return;
160 
161  FormatToken *ItemBegin = Token->Next;
162  while (ItemBegin->isTrailingComment())
163  ItemBegin = ItemBegin->Next;
164  SmallVector<bool, 8> MustBreakBeforeItem;
165 
166  // The lengths of an item if it is put at the end of the line. This includes
167  // trailing comments which are otherwise ignored for column alignment.
168  SmallVector<unsigned, 8> EndOfLineItemLength;
169 
170  bool HasSeparatingComment = false;
171  for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
172  // Skip comments on their own line.
173  while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
174  ItemBegin = ItemBegin->Next;
175  HasSeparatingComment = i > 0;
176  }
177 
178  MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
179  if (ItemBegin->is(tok::l_brace))
180  HasNestedBracedList = true;
181  const FormatToken *ItemEnd = nullptr;
182  if (i == Commas.size()) {
183  ItemEnd = Token->MatchingParen;
184  const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
185  ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
187  // In Cpp11 braced list style, the } and possibly other subsequent
188  // tokens will need to stay on a line with the last element.
189  while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
190  ItemEnd = ItemEnd->Next;
191  } else {
192  // In other braced lists styles, the "}" can be wrapped to the new line.
193  ItemEnd = Token->MatchingParen->Previous;
194  }
195  } else {
196  ItemEnd = Commas[i];
197  // The comma is counted as part of the item when calculating the length.
198  ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
199 
200  // Consume trailing comments so the are included in EndOfLineItemLength.
201  if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
202  ItemEnd->Next->isTrailingComment())
203  ItemEnd = ItemEnd->Next;
204  }
205  EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
206  // If there is a trailing comma in the list, the next item will start at the
207  // closing brace. Don't create an extra item for this.
208  if (ItemEnd->getNextNonComment() == Token->MatchingParen)
209  break;
210  ItemBegin = ItemEnd->Next;
211  }
212 
213  // Don't use column layout for nested lists, lists with few elements and in
214  // presence of separating comments.
215  if (Token->NestingLevel != 0 || Commas.size() < 5 || HasSeparatingComment)
216  return;
217 
218  // We can never place more than ColumnLimit / 3 items in a row (because of the
219  // spaces and the comma).
220  unsigned MaxItems = Style.ColumnLimit / 3;
221  std::vector<unsigned> MinSizeInColumn;
222  MinSizeInColumn.reserve(MaxItems);
223  for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
224  ColumnFormat Format;
225  Format.Columns = Columns;
226  Format.ColumnSizes.resize(Columns);
227  MinSizeInColumn.assign(Columns, UINT_MAX);
228  Format.LineCount = 1;
229  bool HasRowWithSufficientColumns = false;
230  unsigned Column = 0;
231  for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
232  assert(i < MustBreakBeforeItem.size());
233  if (MustBreakBeforeItem[i] || Column == Columns) {
234  ++Format.LineCount;
235  Column = 0;
236  }
237  if (Column == Columns - 1)
238  HasRowWithSufficientColumns = true;
239  unsigned Length =
240  (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
241  Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
242  MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
243  ++Column;
244  }
245  // If all rows are terminated early (e.g. by trailing comments), we don't
246  // need to look further.
247  if (!HasRowWithSufficientColumns)
248  break;
249  Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
250 
251  for (unsigned i = 0; i < Columns; ++i)
252  Format.TotalWidth += Format.ColumnSizes[i];
253 
254  // Don't use this Format, if the difference between the longest and shortest
255  // element in a column exceeds a threshold to avoid excessive spaces.
256  if ([&] {
257  for (unsigned i = 0; i < Columns - 1; ++i)
258  if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
259  return true;
260  return false;
261  }())
262  continue;
263 
264  // Ignore layouts that are bound to violate the column limit.
265  if (Format.TotalWidth > Style.ColumnLimit)
266  continue;
267 
268  Formats.push_back(Format);
269  }
270 }
271 
272 const CommaSeparatedList::ColumnFormat *
273 CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
274  const ColumnFormat *BestFormat = nullptr;
276  I = Formats.rbegin(),
277  E = Formats.rend();
278  I != E; ++I) {
279  if (I->TotalWidth <= RemainingCharacters) {
280  if (BestFormat && I->LineCount > BestFormat->LineCount)
281  break;
282  BestFormat = &*I;
283  }
284  }
285  return BestFormat;
286 }
287 
288 } // namespace format
289 } // namespace clang
unsigned NestingLevel
The nesting level of this token, i.e. the number of surrounding (), [], {} or <>. ...
Definition: FormatToken.h:220
Token Tok
The Token.
Definition: FormatToken.h:116
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:394
bool AlignAfterOpenBracket
If true, horizontally aligns arguments after an open bracket.
Definition: Format.h:54
bool isNot(T Kind) const
Definition: FormatToken.h:293
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:203
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:267
static const char *const TokNames[]
Definition: TokenKinds.cpp:18
LineState State
bool CanBreakBefore
true if it is allowed to break before this token.
Definition: FormatToken.h:174
unsigned Column
The number of used columns in the current line.
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:264
const FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:402
unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override
Same as formatFromToken, but assumes that the first token has already been set thereby deciding on th...
Definition: FormatToken.cpp:74
bool BinPackArguments
If false, a function call's arguments will either be all on the same line or will have one line each...
Definition: Format.h:143
tok::TokenKind getKind() const
Definition: Token.h:90
#define UINT_MAX
Definition: limits.h:72
virtual void precomputeFormattingInfos(const FormatToken *Token)
After the TokenAnnotator has finished annotating all the tokens, this function precomputes required i...
Definition: FormatToken.cpp:72
unsigned ColumnLimit
The column limit.
Definition: Format.h:197
The current state when indenting a unwrapped line.
const SmallVectorImpl< AnnotatedLine * >::const_iterator End
ContinuationIndenter * Indenter
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:112
std::vector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
const char * getTokenTypeName(TokenType Type)
Determines the name of a token type.
Definition: FormatToken.cpp:26
bool isTrailingComment() const
Definition: FormatToken.h:355
static unsigned CodePointsBetween(const FormatToken *Begin, const FormatToken *End)
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces=0)
Appends the next token to State and updates information necessary for indentation.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T-> getSizeExpr()))
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Definition: FormatToken.h:138
void precomputeFormattingInfos(const FormatToken *Token) override
After the TokenAnnotator has finished annotating all the tokens, this function precomputes required i...
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:227
for(auto typeArg:T->getTypeArgsAsWritten())
unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override
Apply the special formatting that the given role demands.
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:281
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:261
const FormatStyle & Style
Definition: FormatToken.h:465
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:154
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
FormatToken * NextToken
The token that needs to be next formatted.
This file implements an indenter that manages the indentation of continuations.
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:126
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:166
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:42
unsigned Column
Definition: Format.cpp:1202
#define LIST_TOKEN_TYPES
Definition: FormatToken.h:28