clang  3.7.0
ContinuationIndenter.h
Go to the documentation of this file.
1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements an indenter that manages the indentation of
12 /// continuations.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
18 
19 #include "Encoding.h"
20 #include "FormatToken.h"
21 #include "clang/Format/Format.h"
22 #include "llvm/Support/Regex.h"
23 
24 namespace clang {
25 class SourceManager;
26 
27 namespace format {
28 
29 class AnnotatedLine;
30 struct FormatToken;
31 struct LineState;
32 struct ParenState;
33 class WhitespaceManager;
34 
36 public:
37  /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
38  /// column \p FirstIndent.
39  ContinuationIndenter(const FormatStyle &Style,
40  const AdditionalKeywords &Keywords,
41  SourceManager &SourceMgr, WhitespaceManager &Whitespaces,
42  encoding::Encoding Encoding,
43  bool BinPackInconclusiveFunctions);
44 
45  /// \brief Get the initial state, i.e. the state after placing \p Line's
46  /// first token at \p FirstIndent.
47  LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
48  bool DryRun);
49 
50  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
51  // better home.
52  /// \brief Returns \c true, if a line break after \p State is allowed.
53  bool canBreak(const LineState &State);
54 
55  /// \brief Returns \c true, if a line break after \p State is mandatory.
56  bool mustBreak(const LineState &State);
57 
58  /// \brief Appends the next token to \p State and updates information
59  /// necessary for indentation.
60  ///
61  /// Puts the token on the current line if \p Newline is \c false and adds a
62  /// line break and necessary indentation otherwise.
63  ///
64  /// If \p DryRun is \c false, also creates and stores the required
65  /// \c Replacement.
66  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
67  unsigned ExtraSpaces = 0);
68 
69  /// \brief Get the column limit for this line. This is the style's column
70  /// limit, potentially reduced for preprocessor definitions.
71  unsigned getColumnLimit(const LineState &State) const;
72 
73 private:
74  /// \brief Mark the next token as consumed in \p State and modify its stacks
75  /// accordingly.
76  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
77 
78  /// \brief Update 'State' according to the next token's fake left parentheses.
79  void moveStatePastFakeLParens(LineState &State, bool Newline);
80  /// \brief Update 'State' according to the next token's fake r_parens.
81  void moveStatePastFakeRParens(LineState &State);
82 
83  /// \brief Update 'State' according to the next token being one of "(<{[".
84  void moveStatePastScopeOpener(LineState &State, bool Newline);
85  /// \brief Update 'State' according to the next token being one of ")>}]".
86  void moveStatePastScopeCloser(LineState &State);
87  /// \brief Update 'State' with the next token opening a nested block.
88  void moveStateToNewBlock(LineState &State);
89 
90  /// \brief If the current token sticks out over the end of the line, break
91  /// it if possible.
92  ///
93  /// \returns An extra penalty if a token was broken, otherwise 0.
94  ///
95  /// The returned penalty will cover the cost of the additional line breaks and
96  /// column limit violation in all lines except for the last one. The penalty
97  /// for the column limit violation in the last line (and in single line
98  /// tokens) is handled in \c addNextStateToQueue.
99  unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
100  bool DryRun);
101 
102  /// \brief Appends the next token to \p State and updates information
103  /// necessary for indentation.
104  ///
105  /// Puts the token on the current line.
106  ///
107  /// If \p DryRun is \c false, also creates and stores the required
108  /// \c Replacement.
109  void addTokenOnCurrentLine(LineState &State, bool DryRun,
110  unsigned ExtraSpaces);
111 
112  /// \brief Appends the next token to \p State and updates information
113  /// necessary for indentation.
114  ///
115  /// Adds a line break and necessary indentation.
116  ///
117  /// If \p DryRun is \c false, also creates and stores the required
118  /// \c Replacement.
119  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
120 
121  /// \brief Calculate the new column for a line wrap before the next token.
122  unsigned getNewLineColumn(const LineState &State);
123 
124  /// \brief Adds a multiline token to the \p State.
125  ///
126  /// \returns Extra penalty for the first line of the literal: last line is
127  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
128  /// matter, as we don't change them.
129  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
130 
131  /// \brief Returns \c true if the next token starts a multiline string
132  /// literal.
133  ///
134  /// This includes implicitly concatenated strings, strings that will be broken
135  /// by clang-format and string literals with escaped newlines.
136  bool nextIsMultilineString(const LineState &State);
137 
138  FormatStyle Style;
139  const AdditionalKeywords &Keywords;
140  SourceManager &SourceMgr;
141  WhitespaceManager &Whitespaces;
142  encoding::Encoding Encoding;
143  bool BinPackInconclusiveFunctions;
144  llvm::Regex CommentPragmasRegex;
145 };
146 
147 struct ParenState {
148  ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
149  bool AvoidBinPacking, bool NoLineBreak)
150  : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
152  AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
153  NoLineBreak(NoLineBreak), LastOperatorWrapped(true),
157 
158  /// \brief The position to which a specific parenthesis level needs to be
159  /// indented.
160  unsigned Indent;
161 
162  /// \brief The number of indentation levels of the block.
163  unsigned IndentLevel;
164 
165  /// \brief The position of the last space on each level.
166  ///
167  /// Used e.g. to break like:
168  /// functionCall(Parameter, otherCall(
169  /// OtherParameter));
170  unsigned LastSpace;
171 
172  /// \brief If a block relative to this parenthesis level gets wrapped, indent
173  /// it this much.
175 
176  /// \brief The position the first "<<" operator encountered on each level.
177  ///
178  /// Used to align "<<" operators. 0 if no such operator has been encountered
179  /// on a level.
180  unsigned FirstLessLess = 0;
181 
182  /// \brief The column of a \c ? in a conditional expression;
183  unsigned QuestionColumn = 0;
184 
185  /// \brief The position of the colon in an ObjC method declaration/call.
186  unsigned ColonPos = 0;
187 
188  /// \brief The start of the most recent function in a builder-type call.
189  unsigned StartOfFunctionCall = 0;
190 
191  /// \brief Contains the start of array subscript expressions, so that they
192  /// can be aligned.
194 
195  /// \brief If a nested name specifier was broken over multiple lines, this
196  /// contains the start column of the second line. Otherwise 0.
198 
199  /// \brief If a call expression was broken over multiple lines, this
200  /// contains the start column of the second line. Otherwise 0.
201  unsigned CallContinuation = 0;
202 
203  /// \brief The column of the first variable name in a variable declaration.
204  ///
205  /// Used to align further variables if necessary.
206  unsigned VariablePos = 0;
207 
208  /// \brief Whether a newline needs to be inserted before the block's closing
209  /// brace.
210  ///
211  /// We only want to insert a newline before the closing brace if there also
212  /// was a newline after the beginning left brace.
214 
215  /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
216  /// lines, in this context.
217  bool AvoidBinPacking : 1;
218 
219  /// \brief Break after the next comma (or all the commas in this context if
220  /// \c AvoidBinPacking is \c true).
222 
223  /// \brief Line breaking in this context would break a formatting rule.
224  bool NoLineBreak : 1;
225 
226  /// \brief True if the last binary operator on this level was wrapped to the
227  /// next line.
229 
230  /// \brief \c true if this \c ParenState already contains a line-break.
231  ///
232  /// The first line break in a certain \c ParenState causes extra penalty so
233  /// that clang-format prefers similar breaks, i.e. breaks in the same
234  /// parenthesis.
236 
237  /// \brief \c true if this \c ParenState contains multiple segments of a
238  /// builder-type call on one line.
240 
241  /// \brief \c true if the colons of the curren ObjC method expression should
242  /// be aligned.
243  ///
244  /// Not considered for memoization as it will always have the same value at
245  /// the same token.
246  bool AlignColons : 1;
247 
248  /// \brief \c true if at least one selector name was found in the current
249  /// ObjC method expression.
250  ///
251  /// Not considered for memoization as it will always have the same value at
252  /// the same token.
254 
255  /// \brief \c true if there are multiple nested blocks inside these parens.
256  ///
257  /// Not considered for memoization as it will always have the same value at
258  /// the same token.
260 
261  // \brief The start of a nested block (e.g. lambda introducer in C++ or
262  // "function" in JavaScript) is not wrapped to a new line.
264 
265  bool operator<(const ParenState &Other) const {
266  if (Indent != Other.Indent)
267  return Indent < Other.Indent;
268  if (LastSpace != Other.LastSpace)
269  return LastSpace < Other.LastSpace;
271  return NestedBlockIndent < Other.NestedBlockIndent;
272  if (FirstLessLess != Other.FirstLessLess)
273  return FirstLessLess < Other.FirstLessLess;
276  if (QuestionColumn != Other.QuestionColumn)
277  return QuestionColumn < Other.QuestionColumn;
278  if (AvoidBinPacking != Other.AvoidBinPacking)
279  return AvoidBinPacking;
281  return BreakBeforeParameter;
282  if (NoLineBreak != Other.NoLineBreak)
283  return NoLineBreak;
285  return LastOperatorWrapped;
286  if (ColonPos != Other.ColonPos)
287  return ColonPos < Other.ColonPos;
292  if (CallContinuation != Other.CallContinuation)
293  return CallContinuation < Other.CallContinuation;
294  if (VariablePos != Other.VariablePos)
295  return VariablePos < Other.VariablePos;
297  return ContainsLineBreak;
301  return NestedBlockInlined;
302  return false;
303  }
304 };
305 
306 /// \brief The current state when indenting a unwrapped line.
307 ///
308 /// As the indenting tries different combinations this is copied by value.
309 struct LineState {
310  /// \brief The number of used columns in the current line.
311  unsigned Column;
312 
313  /// \brief The token that needs to be next formatted.
315 
316  /// \brief \c true if this line contains a continued for-loop section.
318 
319  /// \brief The \c NestingLevel at the start of this line.
321 
322  /// \brief The lowest \c NestingLevel on the current line.
324 
325  /// \brief The start column of the string literal, if we're in a string
326  /// literal sequence, 0 otherwise.
328 
329  /// \brief A stack keeping track of properties applying to parenthesis
330  /// levels.
331  std::vector<ParenState> Stack;
332 
333  /// \brief Ignore the stack of \c ParenStates for state comparison.
334  ///
335  /// In long and deeply nested unwrapped lines, the current algorithm can
336  /// be insufficient for finding the best formatting with a reasonable amount
337  /// of time and memory. Setting this flag will effectively lead to the
338  /// algorithm not analyzing some combinations. However, these combinations
339  /// rarely contain the optimal solution: In short, accepting a higher
340  /// penalty early would need to lead to different values in the \c
341  /// ParenState stack (in an otherwise identical state) and these different
342  /// values would need to lead to a significant amount of avoided penalty
343  /// later.
344  ///
345  /// FIXME: Come up with a better algorithm instead.
347 
348  /// \brief The indent of the first token.
349  unsigned FirstIndent;
350 
351  /// \brief The line that is being formatted.
352  ///
353  /// Does not need to be considered for memoization because it doesn't change.
355 
356  /// \brief Comparison operator to be able to used \c LineState in \c map.
357  bool operator<(const LineState &Other) const {
358  if (NextToken != Other.NextToken)
359  return NextToken < Other.NextToken;
360  if (Column != Other.Column)
361  return Column < Other.Column;
365  if (StartOfLineLevel != Other.StartOfLineLevel)
366  return StartOfLineLevel < Other.StartOfLineLevel;
368  return LowestLevelOnLine < Other.LowestLevelOnLine;
372  return false;
373  return Stack < Other.Stack;
374  }
375 };
376 
377 } // end namespace format
378 } // end namespace clang
379 
380 #endif
unsigned LowestLevelOnLine
The lowest NestingLevel on the current line.
bool ContainsLineBreak
true if this ParenState already contains a line-break.
unsigned VariablePos
The column of the first variable name in a variable declaration.
bool BreakBeforeClosingBrace
Whether a newline needs to be inserted before the block's closing brace.
unsigned CallContinuation
If a call expression was broken over multiple lines, this contains the start column of the second lin...
ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak)
unsigned IndentLevel
The number of indentation levels of the block.
LineState State
Contains functions for text encoding manipulation. Supports UTF-8, 8-bit encodings and escape sequenc...
bool AlignColons
true if the colons of the curren ObjC method expression should be aligned.
unsigned Column
The number of used columns in the current line.
Manages the whitespaces around tokens and their replacements.
unsigned Indent
The position to which a specific parenthesis level needs to be indented.
bool HasMultipleNestedBlocks
true if there are multiple nested blocks inside these parens.
const AnnotatedLine * Line
The line that is being formatted.
bool operator<(const ParenState &Other) const
bool LineContainsContinuedForLoopSection
true if this line contains a continued for-loop section.
bool LastOperatorWrapped
True if the last binary operator on this level was wrapped to the next line.
bool BreakBeforeParameter
Break after the next comma (or all the commas in this context if AvoidBinPacking is true)...
bool ObjCSelectorNameFound
true if at least one selector name was found in the current ObjC method expression.
The current state when indenting a unwrapped line.
unsigned QuestionColumn
The column of a ? in a conditional expression;.
unsigned StartOfArraySubscripts
Contains the start of array subscript expressions, so that they can be aligned.
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:112
unsigned NestedNameSpecifierContinuation
If a nested name specifier was broken over multiple lines, this contains the start column of the seco...
std::vector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
bool NoLineBreak
Line breaking in this context would break a formatting rule.
#define false
Definition: stdbool.h:33
AnnotatedLine & Line
unsigned LastSpace
The position of the last space on each level.
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:522
bool IgnoreStackForComparison
Ignore the stack of ParenStates for state comparison.
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces=0)
Appends the next token to State and updates information necessary for indentation.
unsigned getColumnLimit(const LineState &State) const
Get the column limit for this line. This is the style's column limit, potentially reduced for preproc...
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:42
bool operator<(const LineState &Other) const
Comparison operator to be able to used LineState in map.
unsigned FirstIndent
The indent of the first token.
unsigned ColonPos
The position of the colon in an ObjC method declaration/call.
bool canBreak(const LineState &State)
Returns true, if a line break after State is allowed.
bool AvoidBinPacking
Avoid bin packing, i.e. multiple parameters/elements on multiple lines, in this context.
bool mustBreak(const LineState &State)
Returns true, if a line break after State is mandatory.
LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, bool DryRun)
Get the initial state, i.e. the state after placing Line's first token at FirstIndent.
bool ContainsUnwrappedBuilder
true if this ParenState contains multiple segments of a builder-type call on one line.
FormatToken * Current
unsigned NestedBlockIndent
If a block relative to this parenthesis level gets wrapped, indent it this much.
unsigned FirstLessLess
The position the first "<<" operator encountered on each level.
unsigned StartOfLineLevel
The NestingLevel at the start of this line.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
unsigned StartOfStringLiteral
The start column of the string literal, if we're in a string literal sequence, 0 otherwise.
FormatToken * NextToken
The token that needs to be next formatted.
#define true
Definition: stdbool.h:32
unsigned StartOfFunctionCall
The start of the most recent function in a builder-type call.
This class handles loading and caching of source files into memory.
ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, SourceManager &SourceMgr, WhitespaceManager &Whitespaces, encoding::Encoding Encoding, bool BinPackInconclusiveFunctions)
Constructs a ContinuationIndenter to format Line starting in column FirstIndent.