clang  3.8.0
Replacement.cpp
Go to the documentation of this file.
1 //===--- Replacement.cpp - Framework for clang refactoring tools ----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Implements classes to support/store refactorings.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/Diagnostic.h"
19 #include "clang/Lex/Lexer.h"
22 #include "llvm/Support/FileSystem.h"
23 #include "llvm/Support/Path.h"
24 #include "llvm/Support/raw_os_ostream.h"
25 
26 namespace clang {
27 namespace tooling {
28 
29 static const char * const InvalidLocation = "";
30 
33 
34 Replacement::Replacement(StringRef FilePath, unsigned Offset, unsigned Length,
35  StringRef ReplacementText)
36  : FilePath(FilePath), ReplacementRange(Offset, Length),
37  ReplacementText(ReplacementText) {}
38 
40  unsigned Length, StringRef ReplacementText) {
41  setFromSourceLocation(Sources, Start, Length, ReplacementText);
42 }
43 
45  const CharSourceRange &Range,
46  StringRef ReplacementText,
47  const LangOptions &LangOpts) {
48  setFromSourceRange(Sources, Range, ReplacementText, LangOpts);
49 }
50 
52  return FilePath != InvalidLocation;
53 }
54 
55 bool Replacement::apply(Rewriter &Rewrite) const {
56  SourceManager &SM = Rewrite.getSourceMgr();
57  const FileEntry *Entry = SM.getFileManager().getFile(FilePath);
58  if (!Entry)
59  return false;
60  FileID ID;
61  // FIXME: Use SM.translateFile directly.
62  SourceLocation Location = SM.translateFileLineCol(Entry, 1, 1);
63  ID = Location.isValid() ?
64  SM.getFileID(Location) :
66  // FIXME: We cannot check whether Offset + Length is in the file, as
67  // the remapping API is not public in the RewriteBuffer.
68  const SourceLocation Start =
69  SM.getLocForStartOfFile(ID).
70  getLocWithOffset(ReplacementRange.getOffset());
71  // ReplaceText returns false on success.
72  // ReplaceText only fails if the source location is not a file location, in
73  // which case we already returned false earlier.
74  bool RewriteSucceeded = !Rewrite.ReplaceText(
75  Start, ReplacementRange.getLength(), ReplacementText);
76  assert(RewriteSucceeded);
77  return RewriteSucceeded;
78 }
79 
80 std::string Replacement::toString() const {
81  std::string Result;
82  llvm::raw_string_ostream Stream(Result);
83  Stream << FilePath << ": " << ReplacementRange.getOffset() << ":+"
84  << ReplacementRange.getLength() << ":\"" << ReplacementText << "\"";
85  return Stream.str();
86 }
87 
88 bool operator<(const Replacement &LHS, const Replacement &RHS) {
89  if (LHS.getOffset() != RHS.getOffset())
90  return LHS.getOffset() < RHS.getOffset();
91 
92  // Apply longer replacements first, specifically so that deletions are
93  // executed before insertions. It is (hopefully) never the intention to
94  // delete parts of newly inserted code.
95  if (LHS.getLength() != RHS.getLength())
96  return LHS.getLength() > RHS.getLength();
97 
98  if (LHS.getFilePath() != RHS.getFilePath())
99  return LHS.getFilePath() < RHS.getFilePath();
100  return LHS.getReplacementText() < RHS.getReplacementText();
101 }
102 
103 bool operator==(const Replacement &LHS, const Replacement &RHS) {
104  return LHS.getOffset() == RHS.getOffset() &&
105  LHS.getLength() == RHS.getLength() &&
106  LHS.getFilePath() == RHS.getFilePath() &&
107  LHS.getReplacementText() == RHS.getReplacementText();
108 }
109 
110 void Replacement::setFromSourceLocation(const SourceManager &Sources,
111  SourceLocation Start, unsigned Length,
112  StringRef ReplacementText) {
113  const std::pair<FileID, unsigned> DecomposedLocation =
114  Sources.getDecomposedLoc(Start);
115  const FileEntry *Entry = Sources.getFileEntryForID(DecomposedLocation.first);
116  this->FilePath = Entry ? Entry->getName() : InvalidLocation;
117  this->ReplacementRange = Range(DecomposedLocation.second, Length);
118  this->ReplacementText = ReplacementText;
119 }
120 
121 // FIXME: This should go into the Lexer, but we need to figure out how
122 // to handle ranges for refactoring in general first - there is no obvious
123 // good way how to integrate this into the Lexer yet.
124 static int getRangeSize(const SourceManager &Sources,
125  const CharSourceRange &Range,
126  const LangOptions &LangOpts) {
127  SourceLocation SpellingBegin = Sources.getSpellingLoc(Range.getBegin());
128  SourceLocation SpellingEnd = Sources.getSpellingLoc(Range.getEnd());
129  std::pair<FileID, unsigned> Start = Sources.getDecomposedLoc(SpellingBegin);
130  std::pair<FileID, unsigned> End = Sources.getDecomposedLoc(SpellingEnd);
131  if (Start.first != End.first) return -1;
132  if (Range.isTokenRange())
133  End.second += Lexer::MeasureTokenLength(SpellingEnd, Sources, LangOpts);
134  return End.second - Start.second;
135 }
136 
137 void Replacement::setFromSourceRange(const SourceManager &Sources,
138  const CharSourceRange &Range,
139  StringRef ReplacementText,
140  const LangOptions &LangOpts) {
141  setFromSourceLocation(Sources, Sources.getSpellingLoc(Range.getBegin()),
142  getRangeSize(Sources, Range, LangOpts),
143  ReplacementText);
144 }
145 
146 template <typename T>
147 unsigned shiftedCodePositionInternal(const T &Replaces, unsigned Position) {
148  unsigned Offset = 0;
149  for (const auto& R : Replaces) {
150  if (R.getOffset() + R.getLength() <= Position) {
151  Offset += R.getReplacementText().size() - R.getLength();
152  continue;
153  }
154  if (R.getOffset() < Position &&
155  R.getOffset() + R.getReplacementText().size() <= Position) {
156  Position = R.getOffset() + R.getReplacementText().size() - 1;
157  }
158  break;
159  }
160  return Position + Offset;
161 }
162 
163 unsigned shiftedCodePosition(const Replacements &Replaces, unsigned Position) {
164  return shiftedCodePositionInternal(Replaces, Position);
165 }
166 
167 // FIXME: Remove this function when Replacements is implemented as std::vector
168 // instead of std::set.
169 unsigned shiftedCodePosition(const std::vector<Replacement> &Replaces,
170  unsigned Position) {
171  return shiftedCodePositionInternal(Replaces, Position);
172 }
173 
174 void deduplicate(std::vector<Replacement> &Replaces,
175  std::vector<Range> &Conflicts) {
176  if (Replaces.empty())
177  return;
178 
179  auto LessNoPath = [](const Replacement &LHS, const Replacement &RHS) {
180  if (LHS.getOffset() != RHS.getOffset())
181  return LHS.getOffset() < RHS.getOffset();
182  if (LHS.getLength() != RHS.getLength())
183  return LHS.getLength() < RHS.getLength();
184  return LHS.getReplacementText() < RHS.getReplacementText();
185  };
186 
187  auto EqualNoPath = [](const Replacement &LHS, const Replacement &RHS) {
188  return LHS.getOffset() == RHS.getOffset() &&
189  LHS.getLength() == RHS.getLength() &&
190  LHS.getReplacementText() == RHS.getReplacementText();
191  };
192 
193  // Deduplicate. We don't want to deduplicate based on the path as we assume
194  // that all replacements refer to the same file (or are symlinks).
195  std::sort(Replaces.begin(), Replaces.end(), LessNoPath);
196  Replaces.erase(std::unique(Replaces.begin(), Replaces.end(), EqualNoPath),
197  Replaces.end());
198 
199  // Detect conflicts
200  Range ConflictRange(Replaces.front().getOffset(),
201  Replaces.front().getLength());
202  unsigned ConflictStart = 0;
203  unsigned ConflictLength = 1;
204  for (unsigned i = 1; i < Replaces.size(); ++i) {
205  Range Current(Replaces[i].getOffset(), Replaces[i].getLength());
206  if (ConflictRange.overlapsWith(Current)) {
207  // Extend conflicted range
208  ConflictRange = Range(ConflictRange.getOffset(),
209  std::max(ConflictRange.getLength(),
210  Current.getOffset() + Current.getLength() -
211  ConflictRange.getOffset()));
212  ++ConflictLength;
213  } else {
214  if (ConflictLength > 1)
215  Conflicts.push_back(Range(ConflictStart, ConflictLength));
216  ConflictRange = Current;
217  ConflictStart = i;
218  ConflictLength = 1;
219  }
220  }
221 
222  if (ConflictLength > 1)
223  Conflicts.push_back(Range(ConflictStart, ConflictLength));
224 }
225 
226 bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite) {
227  bool Result = true;
228  for (Replacements::const_iterator I = Replaces.begin(),
229  E = Replaces.end();
230  I != E; ++I) {
231  if (I->isApplicable()) {
232  Result = I->apply(Rewrite) && Result;
233  } else {
234  Result = false;
235  }
236  }
237  return Result;
238 }
239 
240 // FIXME: Remove this function when Replacements is implemented as std::vector
241 // instead of std::set.
242 bool applyAllReplacements(const std::vector<Replacement> &Replaces,
243  Rewriter &Rewrite) {
244  bool Result = true;
245  for (std::vector<Replacement>::const_iterator I = Replaces.begin(),
246  E = Replaces.end();
247  I != E; ++I) {
248  if (I->isApplicable()) {
249  Result = I->apply(Rewrite) && Result;
250  } else {
251  Result = false;
252  }
253  }
254  return Result;
255 }
256 
257 std::string applyAllReplacements(StringRef Code, const Replacements &Replaces) {
260  FileManager Files(FileSystemOptions(), InMemoryFileSystem);
261  DiagnosticsEngine Diagnostics(
263  new DiagnosticOptions);
264  SourceManager SourceMgr(Diagnostics, Files);
265  Rewriter Rewrite(SourceMgr, LangOptions());
266  InMemoryFileSystem->addFile(
267  "<stdin>", 0, llvm::MemoryBuffer::getMemBuffer(Code, "<stdin>"));
268  FileID ID = SourceMgr.createFileID(Files.getFile("<stdin>"), SourceLocation(),
270  for (Replacements::const_iterator I = Replaces.begin(), E = Replaces.end();
271  I != E; ++I) {
272  Replacement Replace("<stdin>", I->getOffset(), I->getLength(),
273  I->getReplacementText());
274  if (!Replace.apply(Rewrite))
275  return "";
276  }
277  std::string Result;
278  llvm::raw_string_ostream OS(Result);
279  Rewrite.getEditBuffer(ID).write(OS);
280  OS.flush();
281  return Result;
282 }
283 
284 namespace {
285 // Represents a merged replacement, i.e. a replacement consisting of multiple
286 // overlapping replacements from 'First' and 'Second' in mergeReplacements.
287 //
288 // Position projection:
289 // Offsets and lengths of the replacements can generally refer to two different
290 // coordinate spaces. Replacements from 'First' refer to the original text
291 // whereas replacements from 'Second' refer to the text after applying 'First'.
292 //
293 // MergedReplacement always operates in the coordinate space of the original
294 // text, i.e. transforms elements from 'Second' to take into account what was
295 // changed based on the elements from 'First'.
296 //
297 // We can correctly calculate this projection as we look at the replacements in
298 // order of strictly increasing offsets.
299 //
300 // Invariants:
301 // * We always merge elements from 'First' into elements from 'Second' and vice
302 // versa. Within each set, the replacements are non-overlapping.
303 // * We only extend to the right, i.e. merge elements with strictly increasing
304 // offsets.
305 class MergedReplacement {
306 public:
307  MergedReplacement(const Replacement &R, bool MergeSecond, int D)
308  : MergeSecond(MergeSecond), Delta(D), FilePath(R.getFilePath()),
309  Offset(R.getOffset() + (MergeSecond ? 0 : Delta)), Length(R.getLength()),
310  Text(R.getReplacementText()) {
311  Delta += MergeSecond ? 0 : Text.size() - Length;
312  DeltaFirst = MergeSecond ? Text.size() - Length : 0;
313  }
314 
315  // Merges the next element 'R' into this merged element. As we always merge
316  // from 'First' into 'Second' or vice versa, the MergedReplacement knows what
317  // set the next element is coming from.
318  void merge(const Replacement &R) {
319  if (MergeSecond) {
320  unsigned REnd = R.getOffset() + Delta + R.getLength();
321  unsigned End = Offset + Text.size();
322  if (REnd > End) {
323  Length += REnd - End;
324  MergeSecond = false;
325  }
326  StringRef TextRef = Text;
327  StringRef Head = TextRef.substr(0, R.getOffset() + Delta - Offset);
328  StringRef Tail = TextRef.substr(REnd - Offset);
329  Text = (Head + R.getReplacementText() + Tail).str();
330  Delta += R.getReplacementText().size() - R.getLength();
331  } else {
332  unsigned End = Offset + Length;
333  StringRef RText = R.getReplacementText();
334  StringRef Tail = RText.substr(End - R.getOffset());
335  Text = (Text + Tail).str();
336  if (R.getOffset() + RText.size() > End) {
337  Length = R.getOffset() + R.getLength() - Offset;
338  MergeSecond = true;
339  } else {
340  Length += R.getLength() - RText.size();
341  }
342  DeltaFirst += RText.size() - R.getLength();
343  }
344  }
345 
346  // Returns 'true' if 'R' starts strictly after the MergedReplacement and thus
347  // doesn't need to be merged.
348  bool endsBefore(const Replacement &R) const {
349  if (MergeSecond)
350  return Offset + Text.size() < R.getOffset() + Delta;
351  return Offset + Length < R.getOffset();
352  }
353 
354  // Returns 'true' if an element from the second set should be merged next.
355  bool mergeSecond() const { return MergeSecond; }
356  int deltaFirst() const { return DeltaFirst; }
357  Replacement asReplacement() const { return {FilePath, Offset, Length, Text}; }
358 
359 private:
361 
362  // Amount of characters that elements from 'Second' need to be shifted by in
363  // order to refer to the original text.
364  int Delta;
365 
366  // Sum of all deltas (text-length - length) of elements from 'First' merged
367  // into this element. This is used to update 'Delta' once the
368  // MergedReplacement is completed.
370 
371  // Data of the actually merged replacement. FilePath and Offset aren't changed
372  // as the element is only extended to the right.
373  const StringRef FilePath;
374  const unsigned Offset;
375  unsigned Length;
376  std::string Text;
377 };
378 } // namespace
379 
381  const Replacements &Second) {
382  if (First.empty() || Second.empty())
383  return First.empty() ? Second : First;
384 
385  // Delta is the amount of characters that replacements from 'Second' need to
386  // be shifted so that their offsets refer to the original text.
387  int Delta = 0;
389 
390  // Iterate over both sets and always add the next element (smallest total
391  // Offset) from either 'First' or 'Second'. Merge that element with
392  // subsequent replacements as long as they overlap. See more details in the
393  // comment on MergedReplacement.
394  for (auto FirstI = First.begin(), SecondI = Second.begin();
395  FirstI != First.end() || SecondI != Second.end();) {
396  bool NextIsFirst = SecondI == Second.end() ||
397  (FirstI != First.end() &&
398  FirstI->getOffset() < SecondI->getOffset() + Delta);
399  MergedReplacement Merged(NextIsFirst ? *FirstI : *SecondI, NextIsFirst,
400  Delta);
401  ++(NextIsFirst ? FirstI : SecondI);
402 
403  while ((Merged.mergeSecond() && SecondI != Second.end()) ||
404  (!Merged.mergeSecond() && FirstI != First.end())) {
405  auto &I = Merged.mergeSecond() ? SecondI : FirstI;
406  if (Merged.endsBefore(*I))
407  break;
408  Merged.merge(*I);
409  ++I;
410  }
411  Delta -= Merged.deltaFirst();
412  Result.insert(Merged.asReplacement());
413  }
414  return Result;
415 }
416 
417 } // end namespace tooling
418 } // end namespace clang
419 
int Position
unsigned Length
SourceLocation getBegin() const
Implements support for file system lookup, file system caching, and directory search management...
Definition: FileManager.h:115
SourceManager & getSourceMgr() const
Definition: Rewriter.h:64
Defines the clang::FileManager interface and associated types.
SourceLocation getSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
Defines the SourceManager interface.
std::set< Replacement > Replacements
A set of Replacements.
Definition: Replacement.h:141
std::string Text
unsigned getLength() const
Definition: Replacement.h:44
bool operator==(const Replacement &LHS, const Replacement &RHS)
Equal-to operator between two Replacements.
int Delta
void deduplicate(std::vector< Replacement > &Replaces, std::vector< Range > &Conflicts)
Removes duplicate Replacements and reports if Replacements conflict with one another.
bool apply(Rewriter &Rewrite) const
Applies the replacement on the Rewriter.
Definition: Replacement.cpp:55
RewriteBuffer & getEditBuffer(FileID FID)
getEditBuffer - This is like getRewriteBufferFor, but always returns a buffer, and allows you to writ...
Definition: Rewriter.cpp:223
bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite)
Apply all replacements in Replaces to the Rewriter Rewrite.
Replacements mergeReplacements(const Replacements &First, const Replacements &Second)
Merges two sets of replacements with the second set referring to the code after applying the first se...
StringRef getReplacementText() const
Definition: Replacement.h:110
const StringRef FilePath
unsigned getLength() const
Definition: Replacement.h:109
static int getRangeSize(const SourceManager &Sources, const CharSourceRange &Range, const LangOptions &LangOpts)
bool ReplaceText(SourceLocation Start, unsigned OrigLength, StringRef NewStr)
ReplaceText - This method replaces a range of characters in the input buffer with a new string...
Definition: Rewriter.cpp:303
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:48
An in-memory file system.
uint32_t Offset
Definition: CacheTokens.cpp:44
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:135
const unsigned Offset
SourceLocation translateFileLineCol(const FileEntry *SourceFile, unsigned Line, unsigned Col) const
Get the source location for the given file:line:col triplet.
detail::InMemoryDirectory::const_iterator I
Replacement()
Creates an invalid (not applicable) replacement.
Definition: Replacement.cpp:31
const FileEntry * getFile(StringRef Filename, bool OpenFile=false, bool CacheFailure=true)
Lookup, cache, and verify the specified file (real or virtual).
const FileEntry * getFileEntryForID(FileID FID) const
Returns the FileEntry record for the provided FileID.
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
A text replacement.
Definition: Replacement.h:70
A source range independent of the SourceManager.
Definition: Replacement.h:36
unsigned shiftedCodePositionInternal(const T &Replaces, unsigned Position)
const SmallVectorImpl< AnnotatedLine * >::const_iterator End
bool operator<(const Replacement &LHS, const Replacement &RHS)
Less-than operator between two Replacements.
Definition: Replacement.cpp:88
bool MergeSecond
ID
Defines the set of possible language-specific address spaces.
Definition: AddressSpaces.h:27
SourceManager & SM
Represents a character-granular source range.
SourceLocation getEnd() const
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
Definition: Lexer.cpp:406
FileID createFileID(const FileEntry *SourceFile, SourceLocation IncludePos, SrcMgr::CharacteristicKind FileCharacter, int LoadedID=0, unsigned LoadedOffset=0)
Create a new FileID that represents the specified file being #included from the specified IncludePosi...
SourceManager & SourceMgr
Definition: Format.cpp:1352
FileManager & getFileManager() const
unsigned shiftedCodePosition(const Replacements &Replaces, unsigned Position)
Calculates how a code Position is shifted when Replaces are applied.
unsigned getOffset() const
Definition: Replacement.h:108
The result type of a method or function.
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
static const char *const InvalidLocation
Definition: Replacement.cpp:29
const char * getName() const
Definition: FileManager.h:84
Encodes a location in the source.
int DeltaFirst
bool isValid() const
Return true if this is a valid SourceLocation object.
Options for controlling the compiler diagnostics engine.
StringRef getFilePath() const
Accessors.
Definition: Replacement.h:107
Cached information about one file (either on disk or in the virtual file system). ...
Definition: FileManager.h:53
unsigned getOffset() const
Accessors.
Definition: Replacement.h:43
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
std::string toString() const
Returns a human readable string representation.
Definition: Replacement.cpp:80
Used for handling and querying diagnostic IDs.
detail::InMemoryDirectory::const_iterator E
Defines the Diagnostic-related interfaces.
raw_ostream & write(raw_ostream &Stream) const
Write to Stream the result of applying all changes to the original buffer.
Definition: Rewriter.cpp:25
bool isApplicable() const
Returns whether this replacement can be applied to a file.
Definition: Replacement.cpp:51
Keeps track of options that affect how file operations are performed.
Rewriter - This is the main interface to the rewrite buffers.
Definition: Rewriter.h:31
FormatToken * Current
Defines the Diagnostic IDs-related interfaces.
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file. ...
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
This class handles loading and caching of source files into memory.