clang  3.8.0
FormatToken.cpp
Go to the documentation of this file.
1 //===--- FormatToken.cpp - Format C++ code --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements specific functions of \c FormatTokens and their
12 /// roles.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "ContinuationIndenter.h"
17 #include "FormatToken.h"
18 #include "clang/Format/Format.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/Support/Debug.h"
21 #include <climits>
22 
23 namespace clang {
24 namespace format {
25 
27  static const char *const TokNames[] = {
28 #define TYPE(X) #X,
30 #undef TYPE
31  nullptr
32  };
33 
34  if (Type < NUM_TOKEN_TYPES)
35  return TokNames[Type];
36  llvm_unreachable("unknown TokenType");
37  return nullptr;
38 }
39 
40 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
41 // duplication.
43  switch (Tok.getKind()) {
44  case tok::kw_short:
45  case tok::kw_long:
46  case tok::kw___int64:
47  case tok::kw___int128:
48  case tok::kw_signed:
49  case tok::kw_unsigned:
50  case tok::kw_void:
51  case tok::kw_char:
52  case tok::kw_int:
53  case tok::kw_half:
54  case tok::kw_float:
55  case tok::kw_double:
56  case tok::kw_wchar_t:
57  case tok::kw_bool:
58  case tok::kw___underlying_type:
59  case tok::annot_typename:
60  case tok::kw_char16_t:
61  case tok::kw_char32_t:
62  case tok::kw_typeof:
63  case tok::kw_decltype:
64  return true;
65  default:
66  return false;
67  }
68 }
69 
71 
73 
76  bool DryRun) {
77  if (State.NextToken == nullptr || !State.NextToken->Previous)
78  return 0;
79 
80  // Ensure that we start on the opening brace.
81  const FormatToken *LBrace =
83  if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
84  LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral ||
85  LBrace->Next->Type == TT_DesignatedInitializerPeriod)
86  return 0;
87 
88  // Calculate the number of code points we have to format this list. As the
89  // first token is already placed, we have to subtract it.
90  unsigned RemainingCodePoints =
92 
93  // Find the best ColumnFormat, i.e. the best number of columns to use.
94  const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
95  // If no ColumnFormat can be used, the braced list would generally be
96  // bin-packed. Add a severe penalty to this so that column layouts are
97  // preferred if possible.
98  if (!Format)
99  return 10000;
100 
101  // Format the entire list.
102  unsigned Penalty = 0;
103  unsigned Column = 0;
104  unsigned Item = 0;
105  while (State.NextToken != LBrace->MatchingParen) {
106  bool NewLine = false;
107  unsigned ExtraSpaces = 0;
108 
109  // If the previous token was one of our commas, we are now on the next item.
110  if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
111  if (!State.NextToken->isTrailingComment()) {
112  ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
113  ++Column;
114  }
115  ++Item;
116  }
117 
118  if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
119  Column = 0;
120  NewLine = true;
121  }
122 
123  // Place token using the continuation indenter and store the penalty.
124  Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
125  }
126  return Penalty;
127 }
128 
131  bool DryRun) {
132  if (HasNestedBracedList)
133  State.Stack.back().AvoidBinPacking = true;
134  return 0;
135 }
136 
137 // Returns the lengths in code points between Begin and End (both included),
138 // assuming that the entire sequence is put on a single line.
139 static unsigned CodePointsBetween(const FormatToken *Begin,
140  const FormatToken *End) {
141  assert(End->TotalLength >= Begin->TotalLength);
142  return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
143 }
144 
146  // FIXME: At some point we might want to do this for other lists, too.
147  if (!Token->MatchingParen ||
148  !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))
149  return;
150 
151  // In C++11 braced list style, we should not format in columns unless they
152  // have many items (20 or more) or we allow bin-packing of function call
153  // arguments.
155  Commas.size() < 19)
156  return;
157 
158  // Limit column layout for JavaScript array initializers to 20 or more items
159  // for now to introduce it carefully. We can become more aggressive if this
160  // necessary.
161  if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
162  return;
163 
164  // Column format doesn't really make sense if we don't align after brackets.
166  return;
167 
168  FormatToken *ItemBegin = Token->Next;
169  while (ItemBegin->isTrailingComment())
170  ItemBegin = ItemBegin->Next;
171  SmallVector<bool, 8> MustBreakBeforeItem;
172 
173  // The lengths of an item if it is put at the end of the line. This includes
174  // trailing comments which are otherwise ignored for column alignment.
175  SmallVector<unsigned, 8> EndOfLineItemLength;
176 
177  bool HasSeparatingComment = false;
178  for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
179  // Skip comments on their own line.
180  while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
181  ItemBegin = ItemBegin->Next;
182  HasSeparatingComment = i > 0;
183  }
184 
185  MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
186  if (ItemBegin->is(tok::l_brace))
187  HasNestedBracedList = true;
188  const FormatToken *ItemEnd = nullptr;
189  if (i == Commas.size()) {
190  ItemEnd = Token->MatchingParen;
191  const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
192  ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
194  !ItemEnd->Previous->isTrailingComment()) {
195  // In Cpp11 braced list style, the } and possibly other subsequent
196  // tokens will need to stay on a line with the last element.
197  while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
198  ItemEnd = ItemEnd->Next;
199  } else {
200  // In other braced lists styles, the "}" can be wrapped to the new line.
201  ItemEnd = Token->MatchingParen->Previous;
202  }
203  } else {
204  ItemEnd = Commas[i];
205  // The comma is counted as part of the item when calculating the length.
206  ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
207 
208  // Consume trailing comments so the are included in EndOfLineItemLength.
209  if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
210  ItemEnd->Next->isTrailingComment())
211  ItemEnd = ItemEnd->Next;
212  }
213  EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
214  // If there is a trailing comma in the list, the next item will start at the
215  // closing brace. Don't create an extra item for this.
216  if (ItemEnd->getNextNonComment() == Token->MatchingParen)
217  break;
218  ItemBegin = ItemEnd->Next;
219  }
220 
221  // Don't use column layout for lists with few elements and in presence of
222  // separating comments.
223  if (Commas.size() < 5 || HasSeparatingComment)
224  return;
225 
226  if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
227  return;
228 
229  // We can never place more than ColumnLimit / 3 items in a row (because of the
230  // spaces and the comma).
231  unsigned MaxItems = Style.ColumnLimit / 3;
232  std::vector<unsigned> MinSizeInColumn;
233  MinSizeInColumn.reserve(MaxItems);
234  for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
235  ColumnFormat Format;
236  Format.Columns = Columns;
237  Format.ColumnSizes.resize(Columns);
238  MinSizeInColumn.assign(Columns, UINT_MAX);
239  Format.LineCount = 1;
240  bool HasRowWithSufficientColumns = false;
241  unsigned Column = 0;
242  for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
243  assert(i < MustBreakBeforeItem.size());
244  if (MustBreakBeforeItem[i] || Column == Columns) {
245  ++Format.LineCount;
246  Column = 0;
247  }
248  if (Column == Columns - 1)
249  HasRowWithSufficientColumns = true;
250  unsigned Length =
251  (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
252  Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
253  MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
254  ++Column;
255  }
256  // If all rows are terminated early (e.g. by trailing comments), we don't
257  // need to look further.
258  if (!HasRowWithSufficientColumns)
259  break;
260  Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
261 
262  for (unsigned i = 0; i < Columns; ++i)
263  Format.TotalWidth += Format.ColumnSizes[i];
264 
265  // Don't use this Format, if the difference between the longest and shortest
266  // element in a column exceeds a threshold to avoid excessive spaces.
267  if ([&] {
268  for (unsigned i = 0; i < Columns - 1; ++i)
269  if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
270  return true;
271  return false;
272  }())
273  continue;
274 
275  // Ignore layouts that are bound to violate the column limit.
276  if (Format.TotalWidth > Style.ColumnLimit)
277  continue;
278 
279  Formats.push_back(Format);
280  }
281 }
282 
283 const CommaSeparatedList::ColumnFormat *
284 CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
285  const ColumnFormat *BestFormat = nullptr;
287  I = Formats.rbegin(),
288  E = Formats.rend();
289  I != E; ++I) {
290  if (I->TotalWidth <= RemainingCharacters) {
291  if (BestFormat && I->LineCount > BestFormat->LineCount)
292  break;
293  BestFormat = &*I;
294  }
295  }
296  return BestFormat;
297 }
298 
299 } // namespace format
300 } // namespace clang
unsigned Length
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:220
Token Tok
The Token.
Definition: FormatToken.h:116
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:290
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:398
The base class of the type hierarchy.
Definition: Type.h:1249
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:203
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:267
static const char *const TokNames[]
Definition: TokenKinds.cpp:18
LineState State
bool CanBreakBefore
true if it is allowed to break before this token.
Definition: FormatToken.h:174
unsigned Column
The number of used columns in the current line.
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:264
const FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:406
Token - This structure provides full information about a lexed token.
Definition: Token.h:37
unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override
Same as formatFromToken, but assumes that the first token has already been set thereby deciding on th...
Definition: FormatToken.cpp:74
bool BinPackArguments
If false, a function call's arguments will either be all on the same line or will have one line each...
Definition: Format.h:200
tok::TokenKind getKind() const
Definition: Token.h:90
#define UINT_MAX
Definition: limits.h:72
detail::InMemoryDirectory::const_iterator I
virtual void precomputeFormattingInfos(const FormatToken *Token)
After the TokenAnnotator has finished annotating all the tokens, this function precomputes required i...
Definition: FormatToken.cpp:72
unsigned ColumnLimit
The column limit.
Definition: Format.h:293
BracketAlignmentStyle AlignAfterOpenBracket
If true, horizontally aligns arguments after an open bracket.
Definition: Format.h:73
The current state when indenting a unwrapped line.
const SmallVectorImpl< AnnotatedLine * >::const_iterator End
ContinuationIndenter * Indenter
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:112
Don't align, instead use ContinuationIndentWidth, e.g.
Definition: Format.h:59
std::vector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
const char * getTokenTypeName(TokenType Type)
Determines the name of a token type.
Definition: FormatToken.cpp:26
bool isTrailingComment() const
Definition: FormatToken.h:359
static unsigned CodePointsBetween(const FormatToken *Begin, const FormatToken *End)
Various functions to configurably format source code.
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces=0)
Appends the next token to State and updates information necessary for indentation.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T-> getSizeExpr()))
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Definition: FormatToken.h:138
void precomputeFormattingInfos(const FormatToken *Token) override
After the TokenAnnotator has finished annotating all the tokens, this function precomputes required i...
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:323
detail::InMemoryDirectory::const_iterator E
for(auto typeArg:T->getTypeArgsAsWritten())
unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override
Apply the special formatting that the given role demands.
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:281
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:261
const FormatStyle & Style
Definition: FormatToken.h:469
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:154
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
FormatToken * NextToken
The token that needs to be next formatted.
This file implements an indenter that manages the indentation of continuations.
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:126
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:166
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:42
unsigned Column
Definition: Format.cpp:1349
#define LIST_TOKEN_TYPES
Definition: FormatToken.h:28