clang  3.7.0
WhitespaceManager.cpp
Go to the documentation of this file.
1 //===--- WhitespaceManager.cpp - Format C++ code --------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements WhitespaceManager class.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "WhitespaceManager.h"
16 #include "llvm/ADT/STLExtras.h"
17 
18 namespace clang {
19 namespace format {
20 
22 operator()(const Change &C1, const Change &C2) const {
23  return SourceMgr.isBeforeInTranslationUnit(
24  C1.OriginalWhitespaceRange.getBegin(),
25  C2.OriginalWhitespaceRange.getBegin());
26 }
27 
28 WhitespaceManager::Change::Change(
29  bool CreateReplacement, const SourceRange &OriginalWhitespaceRange,
30  unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn,
31  unsigned NewlinesBefore, StringRef PreviousLinePostfix,
32  StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective)
33  : CreateReplacement(CreateReplacement),
34  OriginalWhitespaceRange(OriginalWhitespaceRange),
35  StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
36  PreviousLinePostfix(PreviousLinePostfix),
37  CurrentLinePrefix(CurrentLinePrefix), Kind(Kind),
38  ContinuesPPDirective(ContinuesPPDirective), IndentLevel(IndentLevel),
39  Spaces(Spaces), IsTrailingComment(false), TokenLength(0),
40  PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
41  StartOfBlockComment(nullptr), IndentationOffset(0) {}
42 
44  Changes.clear();
45  Replaces.clear();
46 }
47 
49  unsigned IndentLevel, unsigned Spaces,
50  unsigned StartOfTokenColumn,
51  bool InPPDirective) {
52  if (Tok.Finalized)
53  return;
54  Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
55  Changes.push_back(Change(true, Tok.WhitespaceRange, IndentLevel, Spaces,
56  StartOfTokenColumn, Newlines, "", "",
57  Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst));
58 }
59 
61  bool InPPDirective) {
62  if (Tok.Finalized)
63  return;
64  Changes.push_back(Change(false, Tok.WhitespaceRange, /*IndentLevel=*/0,
65  /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore,
66  "", "", Tok.Tok.getKind(),
67  InPPDirective && !Tok.IsFirst));
68 }
69 
71  const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
72  StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
73  unsigned Newlines, unsigned IndentLevel, int Spaces) {
74  if (Tok.Finalized)
75  return;
77  Changes.push_back(Change(
78  true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)),
79  IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix,
80  CurrentPrefix,
81  // If we don't add a newline this change doesn't start a comment. Thus,
82  // when we align line comments, we don't need to treat this change as one.
83  // FIXME: We still need to take this change in account to properly
84  // calculate the new length of the comment and to calculate the changes
85  // for which to do the alignment when aligning comments.
86  Tok.is(TT_LineComment) && Newlines > 0 ? tok::comment : tok::unknown,
87  InPPDirective && !Tok.IsFirst));
88 }
89 
91  if (Changes.empty())
92  return Replaces;
93 
94  std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr));
95  calculateLineBreakInformation();
96  alignConsecutiveAssignments();
97  alignTrailingComments();
98  alignEscapedNewlines();
99  generateChanges();
100 
101  return Replaces;
102 }
103 
104 void WhitespaceManager::calculateLineBreakInformation() {
105  Changes[0].PreviousEndOfTokenColumn = 0;
106  for (unsigned i = 1, e = Changes.size(); i != e; ++i) {
107  unsigned OriginalWhitespaceStart =
108  SourceMgr.getFileOffset(Changes[i].OriginalWhitespaceRange.getBegin());
109  unsigned PreviousOriginalWhitespaceEnd = SourceMgr.getFileOffset(
110  Changes[i - 1].OriginalWhitespaceRange.getEnd());
111  Changes[i - 1].TokenLength = OriginalWhitespaceStart -
112  PreviousOriginalWhitespaceEnd +
113  Changes[i].PreviousLinePostfix.size() +
114  Changes[i - 1].CurrentLinePrefix.size();
115 
116  Changes[i].PreviousEndOfTokenColumn =
117  Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength;
118 
119  Changes[i - 1].IsTrailingComment =
120  (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof) &&
121  Changes[i - 1].Kind == tok::comment;
122  }
123  // FIXME: The last token is currently not always an eof token; in those
124  // cases, setting TokenLength of the last token to 0 is wrong.
125  Changes.back().TokenLength = 0;
126  Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment;
127 
128  const WhitespaceManager::Change *LastBlockComment = nullptr;
129  for (auto &Change : Changes) {
130  Change.StartOfBlockComment = nullptr;
131  Change.IndentationOffset = 0;
132  if (Change.Kind == tok::comment) {
133  LastBlockComment = &Change;
134  } else if (Change.Kind == tok::unknown) {
135  if ((Change.StartOfBlockComment = LastBlockComment))
136  Change.IndentationOffset =
137  Change.StartOfTokenColumn -
138  Change.StartOfBlockComment->StartOfTokenColumn;
139  } else {
140  LastBlockComment = nullptr;
141  }
142  }
143 }
144 
145 // Walk through all of the changes and find sequences of "=" to align. To do
146 // so, keep track of the lines and whether or not an "=" was found on align. If
147 // a "=" is found on a line, extend the current sequence. If the current line
148 // cannot be part of a sequence, e.g. because there is an empty line before it
149 // or it contains non-assignments, finalize the previous sequence.
150 void WhitespaceManager::alignConsecutiveAssignments() {
151  if (!Style.AlignConsecutiveAssignments)
152  return;
153 
154  unsigned MinColumn = 0;
155  unsigned StartOfSequence = 0;
156  unsigned EndOfSequence = 0;
157  bool FoundAssignmentOnLine = false;
158  bool FoundLeftParenOnLine = false;
159  unsigned CurrentLine = 0;
160 
161  auto AlignSequence = [&] {
162  alignConsecutiveAssignments(StartOfSequence, EndOfSequence, MinColumn);
163  MinColumn = 0;
164  StartOfSequence = 0;
165  EndOfSequence = 0;
166  };
167 
168  for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
169  if (Changes[i].NewlinesBefore != 0) {
170  CurrentLine += Changes[i].NewlinesBefore;
171  if (StartOfSequence > 0 &&
172  (Changes[i].NewlinesBefore > 1 || !FoundAssignmentOnLine)) {
173  EndOfSequence = i;
174  AlignSequence();
175  }
176  FoundAssignmentOnLine = false;
177  FoundLeftParenOnLine = false;
178  }
179 
180  if ((Changes[i].Kind == tok::equal &&
181  (FoundAssignmentOnLine || ((Changes[i].NewlinesBefore > 0 ||
182  Changes[i + 1].NewlinesBefore > 0)))) ||
183  (!FoundLeftParenOnLine && Changes[i].Kind == tok::r_paren)) {
184  if (StartOfSequence > 0)
185  AlignSequence();
186  } else if (Changes[i].Kind == tok::l_paren) {
187  FoundLeftParenOnLine = true;
188  if (!FoundAssignmentOnLine && StartOfSequence > 0)
189  AlignSequence();
190  } else if (!FoundAssignmentOnLine && !FoundLeftParenOnLine &&
191  Changes[i].Kind == tok::equal) {
192  FoundAssignmentOnLine = true;
193  EndOfSequence = i;
194  if (StartOfSequence == 0)
195  StartOfSequence = i;
196 
197  unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
198  MinColumn = std::max(MinColumn, ChangeMinColumn);
199  }
200  }
201 
202  if (StartOfSequence > 0) {
203  EndOfSequence = Changes.size();
204  AlignSequence();
205  }
206 }
207 
208 void WhitespaceManager::alignConsecutiveAssignments(unsigned Start,
209  unsigned End,
210  unsigned Column) {
211  bool AlignedAssignment = false;
212  int PreviousShift = 0;
213  for (unsigned i = Start; i != End; ++i) {
214  int Shift = 0;
215  if (Changes[i].NewlinesBefore > 0)
216  AlignedAssignment = false;
217  if (!AlignedAssignment && Changes[i].Kind == tok::equal) {
218  Shift = Column - Changes[i].StartOfTokenColumn;
219  AlignedAssignment = true;
220  PreviousShift = Shift;
221  }
222  assert(Shift >= 0);
223  Changes[i].Spaces += Shift;
224  if (i + 1 != Changes.size())
225  Changes[i + 1].PreviousEndOfTokenColumn += Shift;
226  Changes[i].StartOfTokenColumn += Shift;
227  if (AlignedAssignment) {
228  Changes[i].StartOfTokenColumn += PreviousShift;
229  if (i + 1 != Changes.size())
230  Changes[i + 1].PreviousEndOfTokenColumn += PreviousShift;
231  }
232  }
233 }
234 
235 void WhitespaceManager::alignTrailingComments() {
236  unsigned MinColumn = 0;
237  unsigned MaxColumn = UINT_MAX;
238  unsigned StartOfSequence = 0;
239  bool BreakBeforeNext = false;
240  unsigned Newlines = 0;
241  for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
242  if (Changes[i].StartOfBlockComment)
243  continue;
244  Newlines += Changes[i].NewlinesBefore;
245  if (!Changes[i].IsTrailingComment)
246  continue;
247 
248  unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
249  unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength;
250  if (i + 1 != e && Changes[i + 1].ContinuesPPDirective)
251  ChangeMaxColumn -= 2;
252  // If this comment follows an } in column 0, it probably documents the
253  // closing of a namespace and we don't want to align it.
254  bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 &&
255  Changes[i - 1].Kind == tok::r_brace &&
256  Changes[i - 1].StartOfTokenColumn == 0;
257  bool WasAlignedWithStartOfNextLine = false;
258  if (Changes[i].NewlinesBefore == 1) { // A comment on its own line.
259  unsigned CommentColumn = SourceMgr.getSpellingColumnNumber(
260  Changes[i].OriginalWhitespaceRange.getEnd());
261  for (unsigned j = i + 1; j != e; ++j) {
262  if (Changes[j].Kind != tok::comment) { // Skip over comments.
263  unsigned NextColumn = SourceMgr.getSpellingColumnNumber(
264  Changes[j].OriginalWhitespaceRange.getEnd());
265  // The start of the next token was previously aligned with the
266  // start of this comment.
267  WasAlignedWithStartOfNextLine =
268  CommentColumn == NextColumn ||
269  CommentColumn == NextColumn + Style.IndentWidth;
270  break;
271  }
272  }
273  }
274  if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) {
275  alignTrailingComments(StartOfSequence, i, MinColumn);
276  MinColumn = ChangeMinColumn;
277  MaxColumn = ChangeMinColumn;
278  StartOfSequence = i;
279  } else if (BreakBeforeNext || Newlines > 1 ||
280  (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) ||
281  // Break the comment sequence if the previous line did not end
282  // in a trailing comment.
283  (Changes[i].NewlinesBefore == 1 && i > 0 &&
284  !Changes[i - 1].IsTrailingComment) ||
285  WasAlignedWithStartOfNextLine) {
286  alignTrailingComments(StartOfSequence, i, MinColumn);
287  MinColumn = ChangeMinColumn;
288  MaxColumn = ChangeMaxColumn;
289  StartOfSequence = i;
290  } else {
291  MinColumn = std::max(MinColumn, ChangeMinColumn);
292  MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
293  }
294  BreakBeforeNext =
295  (i == 0) || (Changes[i].NewlinesBefore > 1) ||
296  // Never start a sequence with a comment at the beginning of
297  // the line.
298  (Changes[i].NewlinesBefore == 1 && StartOfSequence == i);
299  Newlines = 0;
300  }
301  alignTrailingComments(StartOfSequence, Changes.size(), MinColumn);
302 }
303 
304 void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
305  unsigned Column) {
306  for (unsigned i = Start; i != End; ++i) {
307  int Shift = 0;
308  if (Changes[i].IsTrailingComment) {
309  Shift = Column - Changes[i].StartOfTokenColumn;
310  }
311  if (Changes[i].StartOfBlockComment) {
312  Shift = Changes[i].IndentationOffset +
313  Changes[i].StartOfBlockComment->StartOfTokenColumn -
314  Changes[i].StartOfTokenColumn;
315  }
316  assert(Shift >= 0);
317  Changes[i].Spaces += Shift;
318  if (i + 1 != End)
319  Changes[i + 1].PreviousEndOfTokenColumn += Shift;
320  Changes[i].StartOfTokenColumn += Shift;
321  }
322 }
323 
324 void WhitespaceManager::alignEscapedNewlines() {
325  unsigned MaxEndOfLine =
326  Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
327  unsigned StartOfMacro = 0;
328  for (unsigned i = 1, e = Changes.size(); i < e; ++i) {
329  Change &C = Changes[i];
330  if (C.NewlinesBefore > 0) {
331  if (C.ContinuesPPDirective) {
332  MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine);
333  } else {
334  alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine);
335  MaxEndOfLine = Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
336  StartOfMacro = i;
337  }
338  }
339  }
340  alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine);
341 }
342 
343 void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End,
344  unsigned Column) {
345  for (unsigned i = Start; i < End; ++i) {
346  Change &C = Changes[i];
347  if (C.NewlinesBefore > 0) {
348  assert(C.ContinuesPPDirective);
349  if (C.PreviousEndOfTokenColumn + 1 > Column)
350  C.EscapedNewlineColumn = 0;
351  else
352  C.EscapedNewlineColumn = Column;
353  }
354  }
355 }
356 
357 void WhitespaceManager::generateChanges() {
358  for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
359  const Change &C = Changes[i];
360  if (i > 0) {
361  assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
362  C.OriginalWhitespaceRange.getBegin() &&
363  "Generating two replacements for the same location");
364  }
365  if (C.CreateReplacement) {
366  std::string ReplacementText = C.PreviousLinePostfix;
367  if (C.ContinuesPPDirective)
368  appendNewlineText(ReplacementText, C.NewlinesBefore,
369  C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn);
370  else
371  appendNewlineText(ReplacementText, C.NewlinesBefore);
372  appendIndentText(ReplacementText, C.IndentLevel, std::max(0, C.Spaces),
373  C.StartOfTokenColumn - std::max(0, C.Spaces));
374  ReplacementText.append(C.CurrentLinePrefix);
375  storeReplacement(C.OriginalWhitespaceRange, ReplacementText);
376  }
377  }
378 }
379 
380 void WhitespaceManager::storeReplacement(const SourceRange &Range,
381  StringRef Text) {
382  unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -
383  SourceMgr.getFileOffset(Range.getBegin());
384  // Don't create a replacement, if it does not change anything.
385  if (StringRef(SourceMgr.getCharacterData(Range.getBegin()),
386  WhitespaceLength) == Text)
387  return;
388  Replaces.insert(tooling::Replacement(
389  SourceMgr, CharSourceRange::getCharRange(Range), Text));
390 }
391 
392 void WhitespaceManager::appendNewlineText(std::string &Text,
393  unsigned Newlines) {
394  for (unsigned i = 0; i < Newlines; ++i)
395  Text.append(UseCRLF ? "\r\n" : "\n");
396 }
397 
398 void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines,
399  unsigned PreviousEndOfTokenColumn,
400  unsigned EscapedNewlineColumn) {
401  if (Newlines > 0) {
402  unsigned Offset =
403  std::min<int>(EscapedNewlineColumn - 1, PreviousEndOfTokenColumn);
404  for (unsigned i = 0; i < Newlines; ++i) {
405  Text.append(EscapedNewlineColumn - Offset - 1, ' ');
406  Text.append(UseCRLF ? "\\\r\n" : "\\\n");
407  Offset = 0;
408  }
409  }
410 }
411 
412 void WhitespaceManager::appendIndentText(std::string &Text,
413  unsigned IndentLevel, unsigned Spaces,
414  unsigned WhitespaceStartColumn) {
415  switch (Style.UseTab) {
417  Text.append(Spaces, ' ');
418  break;
419  case FormatStyle::UT_Always: {
420  unsigned FirstTabWidth =
421  Style.TabWidth - WhitespaceStartColumn % Style.TabWidth;
422  // Indent with tabs only when there's at least one full tab.
423  if (FirstTabWidth + Style.TabWidth <= Spaces) {
424  Spaces -= FirstTabWidth;
425  Text.append("\t");
426  }
427  Text.append(Spaces / Style.TabWidth, '\t');
428  Text.append(Spaces % Style.TabWidth, ' ');
429  break;
430  }
432  if (WhitespaceStartColumn == 0) {
433  unsigned Indentation = IndentLevel * Style.IndentWidth;
434  // This happens, e.g. when a line in a block comment is indented less than
435  // the first one.
436  if (Indentation > Spaces)
437  Indentation = Spaces;
438  unsigned Tabs = Indentation / Style.TabWidth;
439  Text.append(Tabs, '\t');
440  Spaces -= Tabs * Style.TabWidth;
441  }
442  Text.append(Spaces, ' ');
443  break;
444  }
445 }
446 
447 } // namespace format
448 } // namespace clang
Use tabs only for indentation.
Definition: Format.h:434
Token Tok
The Token.
Definition: FormatToken.h:116
std::set< Replacement > Replacements
A set of Replacements. FIXME: Change to a vector and deduplicate in the RefactoringTool.
Definition: Replacement.h:141
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs. The configured TabWidth is used a...
Definition: FormatToken.h:207
bool operator()(const Change &C1, const Change &C2) const
bool IsFirst
Indicates that this is the first token.
Definition: FormatToken.h:148
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:122
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
uint32_t Offset
Definition: CacheTokens.cpp:43
void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned IndentLevel, unsigned Spaces, unsigned StartOfTokenColumn, bool InPPDirective=false)
Replaces the whitespace in front of Tok. Only call once for each AnnotatedToken.
void reset()
Prepares the WhitespaceManager for another run.
tok::TokenKind getKind() const
Definition: Token.h:90
#define UINT_MAX
Definition: limits.h:72
const SmallVectorImpl< AnnotatedLine * >::const_iterator End
WhitespaceManager class manages whitespace around tokens and their replacements.
bool isBeforeInTranslationUnit(SourceLocation LHS, SourceLocation RHS) const
Determines the order of 2 source locations in the translation unit.
void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, unsigned Newlines, unsigned IndentLevel, int Spaces)
Inserts or replaces whitespace in the middle of a token.
SourceManager & SourceMgr
Definition: Format.cpp:1205
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:112
const tooling::Replacements & generateReplacements()
Returns all the Replacements created during formatting.
static CharSourceRange getCharRange(SourceRange R)
#define false
Definition: stdbool.h:33
Kind
Encodes a location in the source. The SourceManager can decode this to get at the full include stack...
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:129
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
Functor to sort changes in original source order.
void addUntouchableToken(const FormatToken &Tok, bool InPPDirective)
Adds information about an unchangeable token's whitespace.
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:279
FormatStyle & Style
Definition: Format.cpp:1207
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:281
SourceLocation getStartOfNonWhitespace() const
Returns actual token start location without leading escaped newlines and whitespace.
Definition: FormatToken.h:385
A trivial tuple used to represent a source range.
FormatDecision Decision
Stores the formatting decision for the token once it was made.
Definition: FormatToken.h:274
unsigned Column
Definition: Format.cpp:1202