clang  3.7.0
BreakableToken.h
Go to the documentation of this file.
1 //===--- BreakableToken.h - Format C++ code -------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Declares BreakableToken, BreakableStringLiteral, and
12 /// BreakableBlockComment classes, that contain token type-specific logic to
13 /// break long lines in tokens.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
18 #define LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
19 
20 #include "Encoding.h"
21 #include "TokenAnnotator.h"
22 #include "WhitespaceManager.h"
23 #include <utility>
24 
25 namespace clang {
26 namespace format {
27 
28 struct FormatStyle;
29 
30 /// \brief Base class for strategies on how to break tokens.
31 ///
32 /// FIXME: The interface seems set in stone, so we might want to just pull the
33 /// strategy into the class, instead of controlling it from the outside.
35 public:
36  /// \brief Contains starting character index and length of split.
37  typedef std::pair<StringRef::size_type, unsigned> Split;
38 
39  virtual ~BreakableToken() {}
40 
41  /// \brief Returns the number of lines in this token in the original code.
42  virtual unsigned getLineCount() const = 0;
43 
44  /// \brief Returns the number of columns required to format the piece of line
45  /// at \p LineIndex, from byte offset \p Offset with length \p Length.
46  ///
47  /// Note that previous breaks are not taken into account. \p Offset is always
48  /// specified from the start of the (original) line.
49  /// \p Length can be set to StringRef::npos, which means "to the end of line".
50  virtual unsigned
51  getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
52  StringRef::size_type Length) const = 0;
53 
54  /// \brief Returns a range (offset, length) at which to break the line at
55  /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
56  /// violate \p ColumnLimit.
57  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
58  unsigned ColumnLimit) const = 0;
59 
60  /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
61  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
63 
64  /// \brief Replaces the whitespace range described by \p Split with a single
65  /// space.
66  virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
67  Split Split,
69 
70  /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
71  virtual void replaceWhitespaceBefore(unsigned LineIndex,
73 
74 protected:
77  const FormatStyle &Style)
78  : Tok(Tok), IndentLevel(IndentLevel), InPPDirective(InPPDirective),
79  Encoding(Encoding), Style(Style) {}
80 
81  const FormatToken &Tok;
82  const unsigned IndentLevel;
83  const bool InPPDirective;
86 };
87 
88 /// \brief Base class for single line tokens that can be broken.
89 ///
90 /// \c getSplit() needs to be implemented by child classes.
92 public:
93  unsigned getLineCount() const override;
94  unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
95  StringRef::size_type Length) const override;
96 
97 protected:
99  unsigned StartColumn, StringRef Prefix,
100  StringRef Postfix, bool InPPDirective,
102  const FormatStyle &Style);
103 
104  // The column in which the token starts.
105  unsigned StartColumn;
106  // The prefix a line needs after a break in the token.
107  StringRef Prefix;
108  // The postfix a line needs before introducing a break.
109  StringRef Postfix;
110  // The token text excluding the prefix and postfix.
111  StringRef Line;
112 };
113 
115 public:
116  /// \brief Creates a breakable token for a single line string literal.
117  ///
118  /// \p StartColumn specifies the column in which the token will start
119  /// after formatting.
121  unsigned StartColumn, StringRef Prefix,
122  StringRef Postfix, bool InPPDirective,
124 
125  Split getSplit(unsigned LineIndex, unsigned TailOffset,
126  unsigned ColumnLimit) const override;
127  void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
128  WhitespaceManager &Whitespaces) override;
129  void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
130  WhitespaceManager &Whitespaces) override {}
131 };
132 
134 public:
135  /// \brief Creates a breakable token for a line comment.
136  ///
137  /// \p StartColumn specifies the column in which the comment will start
138  /// after formatting.
140  unsigned StartColumn, bool InPPDirective,
142 
143  Split getSplit(unsigned LineIndex, unsigned TailOffset,
144  unsigned ColumnLimit) const override;
145  void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
146  WhitespaceManager &Whitespaces) override;
147  void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
148  WhitespaceManager &Whitespaces) override;
149  void replaceWhitespaceBefore(unsigned LineIndex,
150  WhitespaceManager &Whitespaces) override;
151 
152 private:
153  // The prefix without an additional space if one was added.
154  StringRef OriginalPrefix;
155 };
156 
158 public:
159  /// \brief Creates a breakable token for a block comment.
160  ///
161  /// \p StartColumn specifies the column in which the comment will start
162  /// after formatting, while \p OriginalStartColumn specifies in which
163  /// column the comment started before formatting.
164  /// If the comment starts a line after formatting, set \p FirstInLine to true.
166  unsigned StartColumn, unsigned OriginaStartColumn,
167  bool FirstInLine, bool InPPDirective,
169 
170  unsigned getLineCount() const override;
171  unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
172  StringRef::size_type Length) const override;
173  Split getSplit(unsigned LineIndex, unsigned TailOffset,
174  unsigned ColumnLimit) const override;
175  void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
176  WhitespaceManager &Whitespaces) override;
177  void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
178  WhitespaceManager &Whitespaces) override;
179  void replaceWhitespaceBefore(unsigned LineIndex,
180  WhitespaceManager &Whitespaces) override;
181 
182 private:
183  // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
184  // so that all whitespace between the lines is accounted to Lines[LineIndex]
185  // as leading whitespace:
186  // - Lines[LineIndex] points to the text after that whitespace
187  // - Lines[LineIndex-1] shrinks by its trailing whitespace
188  // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
189  // between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
190  //
191  // Sets StartOfLineColumn to the intended column in which the text at
192  // Lines[LineIndex] starts (note that the decoration, if present, is not
193  // considered part of the text).
194  void adjustWhitespace(unsigned LineIndex, int IndentDelta);
195 
196  // Returns the column at which the text in line LineIndex starts, when broken
197  // at TailOffset. Note that the decoration (if present) is not considered part
198  // of the text.
199  unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
200 
201  // Contains the text of the lines of the block comment, excluding the leading
202  // /* in the first line and trailing */ in the last line, and excluding all
203  // trailing whitespace between the lines. Note that the decoration (if
204  // present) is also not considered part of the text.
206 
207  // LeadingWhitespace[i] is the number of characters regarded as whitespace in
208  // front of Lines[i]. Note that this can include "* " sequences, which we
209  // regard as whitespace when all lines have a "*" prefix.
210  SmallVector<unsigned, 16> LeadingWhitespace;
211 
212  // StartOfLineColumn[i] is the target column at which Line[i] should be.
213  // Note that this excludes a leading "* " or "*" in case all lines have
214  // a "*" prefix.
215  // The first line's target column is always positive. The remaining lines'
216  // target columns are relative to the first line to allow correct indentation
217  // of comments in \c WhitespaceManager. Thus they can be negative as well (in
218  // case the first line needs to be unindented more than there's actual
219  // whitespace in another line).
220  SmallVector<int, 16> StartOfLineColumn;
221 
222  // The column at which the text of a broken line should start.
223  // Note that an optional decoration would go before that column.
224  // IndentAtLineBreak is a uniform position for all lines in a block comment,
225  // regardless of their relative position.
226  // FIXME: Revisit the decision to do this; the main reason was to support
227  // patterns like
228  // /**************//**
229  // * Comment
230  // We could also support such patterns by special casing the first line
231  // instead.
232  unsigned IndentAtLineBreak;
233 
234  // This is to distinguish between the case when the last line was empty and
235  // the case when it started with a decoration ("*" or "* ").
236  bool LastLineNeedsDecoration;
237 
238  // Either "* " if all lines begin with a "*", or empty.
239  StringRef Decoration;
240 };
241 
242 } // namespace format
243 } // namespace clang
244 
245 #endif
std::pair< StringRef::size_type, unsigned > Split
Contains starting character index and length of split.
BreakableLineComment(const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a line comment.
void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override
Replaces the whitespace range described by Split with a single space.
const encoding::Encoding Encoding
Contains functions for text encoding manipulation. Supports UTF-8, 8-bit encodings and escape sequenc...
This file implements a token annotator, i.e. creates AnnotatedTokens out of FormatTokens with require...
void replaceWhitespaceBefore(unsigned LineIndex, WhitespaceManager &Whitespaces) override
Replaces the whitespace between LineIndex-1 and LineIndex.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
Manages the whitespaces around tokens and their replacements.
virtual unsigned getLineCount() const =0
Returns the number of lines in this token in the original code.
uint32_t Offset
Definition: CacheTokens.cpp:43
Base class for single line tokens that can be broken.
BreakableStringLiteral(const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, StringRef Prefix, StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override
Emits the previously retrieved Split via Whitespaces.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
WhitespaceManager class manages whitespace around tokens and their replacements.
void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override
Replaces the whitespace range described by Split with a single space.
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:112
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const =0
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, unsigned OriginaStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a block comment.
virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces)=0
Replaces the whitespace range described by Split with a single space.
const FormatStyle & Style
BreakableSingleLineToken(const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, StringRef Prefix, StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
WhitespaceManager Whitespaces
Definition: Format.cpp:1553
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override
Emits the previously retrieved Split via Whitespaces.
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const =0
Returns the number of columns required to format the piece of line at LineIndex, from byte offset Off...
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:42
unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, StringRef::size_type Length) const override
Returns the number of columns required to format the piece of line at LineIndex, from byte offset Off...
Base class for strategies on how to break tokens.
BreakableToken(const FormatToken &Tok, unsigned IndentLevel, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override
Emits the previously retrieved Split via Whitespaces.
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, StringRef::size_type Length) const override
Returns the number of columns required to format the piece of line at LineIndex, from byte offset Off...
void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override
Replaces the whitespace range described by Split with a single space.
virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces)=0
Emits the previously retrieved Split via Whitespaces.
virtual void replaceWhitespaceBefore(unsigned LineIndex, WhitespaceManager &Whitespaces)
Replaces the whitespace between LineIndex-1 and LineIndex.
void replaceWhitespaceBefore(unsigned LineIndex, WhitespaceManager &Whitespaces) override
Replaces the whitespace between LineIndex-1 and LineIndex.