clang  3.8.0
SemaCUDA.cpp
Go to the documentation of this file.
1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// \brief This file implements semantic analysis for CUDA constructs.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Sema/Sema.h"
15 #include "clang/AST/ASTContext.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/Lex/Preprocessor.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallVector.h"
21 using namespace clang;
22 
24  MultiExprArg ExecConfig,
25  SourceLocation GGGLoc) {
27  if (!ConfigDecl)
28  return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
29  << "cudaConfigureCall");
30  QualType ConfigQTy = ConfigDecl->getType();
31 
32  DeclRefExpr *ConfigDR = new (Context)
33  DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
34  MarkFunctionReferenced(LLLLoc, ConfigDecl);
35 
36  return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
37  /*IsExecConfig=*/true);
38 }
39 
40 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
42  if (D->hasAttr<CUDAInvalidTargetAttr>())
43  return CFT_InvalidTarget;
44 
45  if (D->hasAttr<CUDAGlobalAttr>())
46  return CFT_Global;
47 
48  if (D->hasAttr<CUDADeviceAttr>()) {
49  if (D->hasAttr<CUDAHostAttr>())
50  return CFT_HostDevice;
51  return CFT_Device;
52  } else if (D->hasAttr<CUDAHostAttr>()) {
53  return CFT_Host;
54  } else if (D->isImplicit()) {
55  // Some implicit declarations (like intrinsic functions) are not marked.
56  // Set the most lenient target on them for maximal flexibility.
57  return CFT_HostDevice;
58  }
59 
60  return CFT_Host;
61 }
62 
63 // * CUDA Call preference table
64 //
65 // F - from,
66 // T - to
67 // Ph - preference in host mode
68 // Pd - preference in device mode
69 // H - handled in (x)
70 // Preferences: b-best, f-fallback, l-last resort, n-never.
71 //
72 // | F | T | Ph | Pd | H |
73 // |----+----+----+----+-----+
74 // | d | d | b | b | (b) |
75 // | d | g | n | n | (a) |
76 // | d | h | l | l | (e) |
77 // | d | hd | f | f | (c) |
78 // | g | d | b | b | (b) |
79 // | g | g | n | n | (a) |
80 // | g | h | l | l | (e) |
81 // | g | hd | f | f | (c) |
82 // | h | d | l | l | (e) |
83 // | h | g | b | b | (b) |
84 // | h | h | b | b | (b) |
85 // | h | hd | f | f | (c) |
86 // | hd | d | l | f | (d) |
87 // | hd | g | f | n |(d/a)|
88 // | hd | h | f | l | (d) |
89 // | hd | hd | b | b | (b) |
90 
93  const FunctionDecl *Callee) {
94  assert(getLangOpts().CUDATargetOverloads &&
95  "Should not be called w/o enabled target overloads.");
96 
97  assert(Callee && "Callee must be valid.");
98  CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
99  CUDAFunctionTarget CallerTarget =
100  (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
101 
102  // If one of the targets is invalid, the check always fails, no matter what
103  // the other target is.
104  if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
105  return CFP_Never;
106 
107  // (a) Can't call global from some contexts until we support CUDA's
108  // dynamic parallelism.
109  if (CalleeTarget == CFT_Global &&
110  (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
111  (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
112  return CFP_Never;
113 
114  // (b) Best case scenarios
115  if (CalleeTarget == CallerTarget ||
116  (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
117  (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
118  return CFP_Best;
119 
120  // (c) Calling HostDevice is OK as a fallback that works for everyone.
121  if (CalleeTarget == CFT_HostDevice)
122  return CFP_Fallback;
123 
124  // Figure out what should be returned 'last resort' cases. Normally
125  // those would not be allowed, but we'll consider them if
126  // CUDADisableTargetCallChecks is true.
127  CUDAFunctionPreference QuestionableResult =
128  getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never;
129 
130  // (d) HostDevice behavior depends on compilation mode.
131  if (CallerTarget == CFT_HostDevice) {
132  // Calling a function that matches compilation mode is OK.
133  // Calling a function from the other side is frowned upon.
134  if (getLangOpts().CUDAIsDevice)
135  return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult;
136  else
137  return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)
138  ? CFP_Fallback
139  : QuestionableResult;
140  }
141 
142  // (e) Calling across device/host boundary is not something you should do.
143  if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
144  (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
145  (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
146  return QuestionableResult;
147 
148  llvm_unreachable("All cases should've been handled by now.");
149 }
150 
152  const FunctionDecl *Callee) {
153  // With target overloads enabled, we only disallow calling
154  // combinations with CFP_Never.
155  if (getLangOpts().CUDATargetOverloads)
156  return IdentifyCUDAPreference(Caller,Callee) == CFP_Never;
157 
158  // The CUDADisableTargetCallChecks short-circuits this check: we assume all
159  // cross-target calls are valid.
160  if (getLangOpts().CUDADisableTargetCallChecks)
161  return false;
162 
163  CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller),
164  CalleeTarget = IdentifyCUDATarget(Callee);
165 
166  // If one of the targets is invalid, the check always fails, no matter what
167  // the other target is.
168  if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
169  return true;
170 
171  // CUDA B.1.1 "The __device__ qualifier declares a function that is [...]
172  // Callable from the device only."
173  if (CallerTarget == CFT_Host && CalleeTarget == CFT_Device)
174  return true;
175 
176  // CUDA B.1.2 "The __global__ qualifier declares a function that is [...]
177  // Callable from the host only."
178  // CUDA B.1.3 "The __host__ qualifier declares a function that is [...]
179  // Callable from the host only."
180  if ((CallerTarget == CFT_Device || CallerTarget == CFT_Global) &&
181  (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))
182  return true;
183 
184  // CUDA B.1.3 "The __device__ and __host__ qualifiers can be used together
185  // however, in which case the function is compiled for both the host and the
186  // device. The __CUDA_ARCH__ macro [...] can be used to differentiate code
187  // paths between host and device."
188  if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice) {
189  // If the caller is implicit then the check always passes.
190  if (Caller->isImplicit()) return false;
191 
192  bool InDeviceMode = getLangOpts().CUDAIsDevice;
193  if (!InDeviceMode && CalleeTarget != CFT_Host)
194  return true;
195  if (InDeviceMode && CalleeTarget != CFT_Device) {
196  // Allow host device functions to call host functions if explicitly
197  // requested.
198  if (CalleeTarget == CFT_Host &&
199  getLangOpts().CUDAAllowHostCallsFromHostDevice) {
200  Diag(Caller->getLocation(),
201  diag::warn_host_calls_from_host_device)
202  << Callee->getNameAsString() << Caller->getNameAsString();
203  return false;
204  }
205 
206  return true;
207  }
208  }
209 
210  return false;
211 }
212 
213 template <typename T, typename FetchDeclFn>
214 static void EraseUnwantedCUDAMatchesImpl(Sema &S, const FunctionDecl *Caller,
216  FetchDeclFn FetchDecl) {
217  assert(S.getLangOpts().CUDATargetOverloads &&
218  "Should not be called w/o enabled target overloads.");
219  if (Matches.size() <= 1)
220  return;
221 
222  // Find the best call preference among the functions in Matches.
224  for (auto const &Match : Matches) {
225  P = S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
226  if (P > BestCFP)
227  BestCFP = P;
228  }
229 
230  // Erase all functions with lower priority.
231  for (unsigned I = 0, N = Matches.size(); I != N;)
232  if (S.IdentifyCUDAPreference(Caller, FetchDecl(Matches[I])) < BestCFP) {
233  Matches[I] = Matches[--N];
234  Matches.resize(N);
235  } else {
236  ++I;
237  }
238 }
239 
242  EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
243  *this, Caller, Matches, [](const FunctionDecl *item) { return item; });
244 }
245 
248  EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
249  *this, Caller, Matches, [](const DeclAccessPair &item) {
250  return dyn_cast<FunctionDecl>(item.getDecl());
251  });
252 }
253 
255  const FunctionDecl *Caller,
256  SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
257  EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
258  *this, Caller, Matches,
259  [](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
260  return dyn_cast<FunctionDecl>(item.second);
261  });
262 }
263 
264 /// When an implicitly-declared special member has to invoke more than one
265 /// base/field special member, conflicts may occur in the targets of these
266 /// members. For example, if one base's member __host__ and another's is
267 /// __device__, it's a conflict.
268 /// This function figures out if the given targets \param Target1 and
269 /// \param Target2 conflict, and if they do not it fills in
270 /// \param ResolvedTarget with a target that resolves for both calls.
271 /// \return true if there's a conflict, false otherwise.
272 static bool
274  Sema::CUDAFunctionTarget Target2,
275  Sema::CUDAFunctionTarget *ResolvedTarget) {
276  if (Target1 == Sema::CFT_Global && Target2 == Sema::CFT_Global) {
277  // TODO: this shouldn't happen, really. Methods cannot be marked __global__.
278  // Clang should detect this earlier and produce an error. Then this
279  // condition can be changed to an assertion.
280  return true;
281  }
282 
283  if (Target1 == Sema::CFT_HostDevice) {
284  *ResolvedTarget = Target2;
285  } else if (Target2 == Sema::CFT_HostDevice) {
286  *ResolvedTarget = Target1;
287  } else if (Target1 != Target2) {
288  return true;
289  } else {
290  *ResolvedTarget = Target1;
291  }
292 
293  return false;
294 }
295 
297  CXXSpecialMember CSM,
298  CXXMethodDecl *MemberDecl,
299  bool ConstRHS,
300  bool Diagnose) {
301  llvm::Optional<CUDAFunctionTarget> InferredTarget;
302 
303  // We're going to invoke special member lookup; mark that these special
304  // members are called from this one, and not from its caller.
305  ContextRAII MethodContext(*this, MemberDecl);
306 
307  // Look for special members in base classes that should be invoked from here.
308  // Infer the target of this member base on the ones it should call.
309  // Skip direct and indirect virtual bases for abstract classes.
311  for (const auto &B : ClassDecl->bases()) {
312  if (!B.isVirtual()) {
313  Bases.push_back(&B);
314  }
315  }
316 
317  if (!ClassDecl->isAbstract()) {
318  for (const auto &VB : ClassDecl->vbases()) {
319  Bases.push_back(&VB);
320  }
321  }
322 
323  for (const auto *B : Bases) {
324  const RecordType *BaseType = B->getType()->getAs<RecordType>();
325  if (!BaseType) {
326  continue;
327  }
328 
329  CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
331  LookupSpecialMember(BaseClassDecl, CSM,
332  /* ConstArg */ ConstRHS,
333  /* VolatileArg */ false,
334  /* RValueThis */ false,
335  /* ConstThis */ false,
336  /* VolatileThis */ false);
337 
338  if (!SMOR || !SMOR->getMethod()) {
339  continue;
340  }
341 
342  CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod());
343  if (!InferredTarget.hasValue()) {
344  InferredTarget = BaseMethodTarget;
345  } else {
346  bool ResolutionError = resolveCalleeCUDATargetConflict(
347  InferredTarget.getValue(), BaseMethodTarget,
348  InferredTarget.getPointer());
349  if (ResolutionError) {
350  if (Diagnose) {
351  Diag(ClassDecl->getLocation(),
352  diag::note_implicit_member_target_infer_collision)
353  << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
354  }
355  MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
356  return true;
357  }
358  }
359  }
360 
361  // Same as for bases, but now for special members of fields.
362  for (const auto *F : ClassDecl->fields()) {
363  if (F->isInvalidDecl()) {
364  continue;
365  }
366 
367  const RecordType *FieldType =
368  Context.getBaseElementType(F->getType())->getAs<RecordType>();
369  if (!FieldType) {
370  continue;
371  }
372 
373  CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
375  LookupSpecialMember(FieldRecDecl, CSM,
376  /* ConstArg */ ConstRHS && !F->isMutable(),
377  /* VolatileArg */ false,
378  /* RValueThis */ false,
379  /* ConstThis */ false,
380  /* VolatileThis */ false);
381 
382  if (!SMOR || !SMOR->getMethod()) {
383  continue;
384  }
385 
386  CUDAFunctionTarget FieldMethodTarget =
387  IdentifyCUDATarget(SMOR->getMethod());
388  if (!InferredTarget.hasValue()) {
389  InferredTarget = FieldMethodTarget;
390  } else {
391  bool ResolutionError = resolveCalleeCUDATargetConflict(
392  InferredTarget.getValue(), FieldMethodTarget,
393  InferredTarget.getPointer());
394  if (ResolutionError) {
395  if (Diagnose) {
396  Diag(ClassDecl->getLocation(),
397  diag::note_implicit_member_target_infer_collision)
398  << (unsigned)CSM << InferredTarget.getValue()
399  << FieldMethodTarget;
400  }
401  MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
402  return true;
403  }
404  }
405  }
406 
407  if (InferredTarget.hasValue()) {
408  if (InferredTarget.getValue() == CFT_Device) {
409  MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
410  } else if (InferredTarget.getValue() == CFT_Host) {
411  MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
412  } else {
413  MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
414  MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
415  }
416  } else {
417  // If no target was inferred, mark this member as __host__ __device__;
418  // it's the least restrictive option that can be invoked from any target.
419  MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
420  MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
421  }
422 
423  return false;
424 }
Defines the clang::ASTContext interface.
FunctionDecl - An instance of this class is created to represent a function declaration or definition...
Definition: Decl.h:1483
A (possibly-)qualified type.
Definition: Type.h:575
base_class_range bases()
Definition: DeclCXX.h:713
void EraseUnwantedCUDAMatches(const FunctionDecl *Caller, SmallVectorImpl< FunctionDecl * > &Matches)
Finds a function in Matches with highest calling priority from Caller context and erases all function...
Definition: SemaCUDA.cpp:240
const LangOptions & getLangOpts() const
Definition: Sema.h:1041
SemaDiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID)
Emit a diagnostic.
Definition: Sema.h:1118
static void EraseUnwantedCUDAMatchesImpl(Sema &S, const FunctionDecl *Caller, llvm::SmallVectorImpl< T > &Matches, FetchDeclFn FetchDecl)
Definition: SemaCUDA.cpp:214
bool inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, CXXSpecialMember CSM, CXXMethodDecl *MemberDecl, bool ConstRHS, bool Diagnose)
Given a implicit special member, infer its CUDA target from the calls it needs to make to underlying ...
Definition: SemaCUDA.cpp:296
CUDAFunctionPreference
Definition: Sema.h:8752
SpecialMemberOverloadResult * LookupSpecialMember(CXXRecordDecl *D, CXXSpecialMember SM, bool ConstArg, bool VolatileArg, bool RValueThis, bool ConstThis, bool VolatileThis)
ExprResult ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc, MultiExprArg ArgExprs, SourceLocation RParenLoc, Expr *ExecConfig=nullptr, bool IsExecConfig=false)
ActOnCallExpr - Handle a call to Fn with the specified array of arguments.
Definition: SemaExpr.cpp:5002
bool hasAttr() const
Definition: DeclBase.h:498
SpecialMemberOverloadResult - The overloading result for a special member function.
Definition: Sema.h:879
ExprResult ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, MultiExprArg ExecConfig, SourceLocation GGGLoc)
Definition: SemaCUDA.cpp:23
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:514
field_range fields() const
Definition: Decl.h:3295
RecordDecl * getDecl() const
Definition: Type.h:3553
std::string getNameAsString() const
getNameAsString - Get a human-readable name for the declaration, even if it is one of the special kin...
Definition: Decl.h:184
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:38
detail::InMemoryDirectory::const_iterator I
QualType getType() const
Definition: Decl.h:530
bool isAbstract() const
Determine whether this class has a pure virtual function.
Definition: DeclCXX.h:1155
AnnotatingParser & P
CXXSpecialMember
Kinds of C++ special members.
Definition: Sema.h:969
Sema - This implements semantic analysis and AST building for C.
Definition: Sema.h:259
bool CheckCUDATarget(const FunctionDecl *Caller, const FunctionDecl *Callee)
Definition: SemaCUDA.cpp:151
Defines the clang::Preprocessor interface.
void addAttr(Attr *A)
Definition: DeclBase.h:449
Encodes a location in the source.
CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D)
IdentifyCUDATarget - Determine the CUDA compilation target for this function.
Definition: SemaCUDA.cpp:41
Represents a static or instance method of a struct/union/class.
Definition: DeclCXX.h:1701
static bool resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, Sema::CUDAFunctionTarget Target2, Sema::CUDAFunctionTarget *ResolvedTarget)
When an implicitly-declared special member has to invoke more than one base/field special member...
Definition: SemaCUDA.cpp:273
bool Matches
FunctionDecl * getcudaConfigureCallDecl()
Definition: ASTContext.h:1093
A POD class for pairing a NamedDecl* with an access specifier.
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of structs/unions/cl...
Definition: Type.h:3544
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:5675
CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller, const FunctionDecl *Callee)
Identifies relative preference of a given Caller/Callee combination, based on their host/device attri...
Definition: SemaCUDA.cpp:92
Represents a C++ struct/union/class.
Definition: DeclCXX.h:285
CUDAFunctionTarget
Definition: Sema.h:8742
ExprResult ExprError()
Definition: Ownership.h:267
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:922
void MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func, bool OdrUse=true)
Mark a function referenced, and check whether it is odr-used (C++ [basic.def.odr]p2, C99 6.9p3)
Definition: SemaExpr.cpp:12583
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:106
SourceLocation getLocation() const
Definition: DeclBase.h:384
ASTContext & Context
Definition: Sema.h:295
base_class_range vbases()
Definition: DeclCXX.h:730
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
A RAII object to temporarily push a declaration context.
Definition: Sema.h:601