1//===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// \brief This file implements semantic analysis for CUDA constructs.
11///
12//===----------------------------------------------------------------------===//
13
14#include "clang/AST/ASTContext.h"
15#include "clang/AST/Decl.h"
16#include "clang/AST/ExprCXX.h"
17#include "clang/Lex/Preprocessor.h"
18#include "clang/Sema/Lookup.h"
19#include "clang/Sema/Sema.h"
20#include "clang/Sema/SemaDiagnostic.h"
21#include "clang/Sema/Template.h"
22#include "llvm/ADT/Optional.h"
23#include "llvm/ADT/SmallVector.h"
24using namespace clang;
25
26ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
27                                         MultiExprArg ExecConfig,
28                                         SourceLocation GGGLoc) {
29  FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
30  if (!ConfigDecl)
31    return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
32                     << "cudaConfigureCall");
33  QualType ConfigQTy = ConfigDecl->getType();
34
35  DeclRefExpr *ConfigDR = new (Context)
36      DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
37  MarkFunctionReferenced(LLLLoc, ConfigDecl);
38
39  return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
40                       /*IsExecConfig=*/true);
41}
42
43/// IdentifyCUDATarget - Determine the CUDA compilation target for this function
44Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
45  if (D->hasAttr<CUDAInvalidTargetAttr>())
46    return CFT_InvalidTarget;
47
48  if (D->hasAttr<CUDAGlobalAttr>())
49    return CFT_Global;
50
51  if (D->hasAttr<CUDADeviceAttr>()) {
52    if (D->hasAttr<CUDAHostAttr>())
53      return CFT_HostDevice;
54    return CFT_Device;
55  } else if (D->hasAttr<CUDAHostAttr>()) {
56    return CFT_Host;
57  } else if (D->isImplicit()) {
58    // Some implicit declarations (like intrinsic functions) are not marked.
59    // Set the most lenient target on them for maximal flexibility.
60    return CFT_HostDevice;
61  }
62
63  return CFT_Host;
64}
65
66// * CUDA Call preference table
67//
68// F - from,
69// T - to
70// Ph - preference in host mode
71// Pd - preference in device mode
72// H  - handled in (x)
73// Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
74//
75// | F  | T  | Ph  | Pd  |  H  |
76// |----+----+-----+-----+-----+
77// | d  | d  | N   | N   | (c) |
78// | d  | g  | --  | --  | (a) |
79// | d  | h  | --  | --  | (e) |
80// | d  | hd | HD  | HD  | (b) |
81// | g  | d  | N   | N   | (c) |
82// | g  | g  | --  | --  | (a) |
83// | g  | h  | --  | --  | (e) |
84// | g  | hd | HD  | HD  | (b) |
85// | h  | d  | --  | --  | (e) |
86// | h  | g  | N   | N   | (c) |
87// | h  | h  | N   | N   | (c) |
88// | h  | hd | HD  | HD  | (b) |
89// | hd | d  | WS  | SS  | (d) |
90// | hd | g  | SS  | --  |(d/a)|
91// | hd | h  | SS  | WS  | (d) |
92// | hd | hd | HD  | HD  | (b) |
93
94Sema::CUDAFunctionPreference
95Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
96                             const FunctionDecl *Callee) {
97  assert(Callee && "Callee must be valid.");
98  CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
99  CUDAFunctionTarget CallerTarget =
100      (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
101
102  // If one of the targets is invalid, the check always fails, no matter what
103  // the other target is.
104  if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
105    return CFP_Never;
106
107  // (a) Can't call global from some contexts until we support CUDA's
108  // dynamic parallelism.
109  if (CalleeTarget == CFT_Global &&
110      (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
111       (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
112    return CFP_Never;
113
114  // (b) Calling HostDevice is OK for everyone.
115  if (CalleeTarget == CFT_HostDevice)
116    return CFP_HostDevice;
117
118  // (c) Best case scenarios
119  if (CalleeTarget == CallerTarget ||
120      (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
121      (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
122    return CFP_Native;
123
124  // (d) HostDevice behavior depends on compilation mode.
125  if (CallerTarget == CFT_HostDevice) {
126    // It's OK to call a compilation-mode matching function from an HD one.
127    if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
128        (!getLangOpts().CUDAIsDevice &&
129         (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
130      return CFP_SameSide;
131
132    // Calls from HD to non-mode-matching functions (i.e., to host functions
133    // when compiling in device mode or to device functions when compiling in
134    // host mode) are allowed at the sema level, but eventually rejected if
135    // they're ever codegened.  TODO: Reject said calls earlier.
136    return CFP_WrongSide;
137  }
138
139  // (e) Calling across device/host boundary is not something you should do.
140  if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
141      (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
142      (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
143    return CFP_Never;
144
145  llvm_unreachable("All cases should've been handled by now.");
146}
147
148template <typename T>
149static void EraseUnwantedCUDAMatchesImpl(
150    Sema &S, const FunctionDecl *Caller, llvm::SmallVectorImpl<T> &Matches,
151    std::function<const FunctionDecl *(const T &)> FetchDecl) {
152  if (Matches.size() <= 1)
153    return;
154
155  // Gets the CUDA function preference for a call from Caller to Match.
156  auto GetCFP = [&](const T &Match) {
157    return S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
158  };
159
160  // Find the best call preference among the functions in Matches.
161  Sema::CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
162      Matches.begin(), Matches.end(),
163      [&](const T &M1, const T &M2) { return GetCFP(M1) < GetCFP(M2); }));
164
165  // Erase all functions with lower priority.
166  Matches.erase(
167      llvm::remove_if(Matches,
168                      [&](const T &Match) { return GetCFP(Match) < BestCFP; }),
169      Matches.end());
170}
171
172void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
173                                    SmallVectorImpl<FunctionDecl *> &Matches){
174  EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
175      *this, Caller, Matches, [](const FunctionDecl *item) { return item; });
176}
177
178void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
179                                    SmallVectorImpl<DeclAccessPair> &Matches) {
180  EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
181      *this, Caller, Matches, [](const DeclAccessPair &item) {
182        return dyn_cast<FunctionDecl>(item.getDecl());
183      });
184}
185
186void Sema::EraseUnwantedCUDAMatches(
187    const FunctionDecl *Caller,
188    SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
189  EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
190      *this, Caller, Matches,
191      [](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
192        return dyn_cast<FunctionDecl>(item.second);
193      });
194}
195
196/// When an implicitly-declared special member has to invoke more than one
197/// base/field special member, conflicts may occur in the targets of these
198/// members. For example, if one base's member __host__ and another's is
199/// __device__, it's a conflict.
200/// This function figures out if the given targets \param Target1 and
201/// \param Target2 conflict, and if they do not it fills in
202/// \param ResolvedTarget with a target that resolves for both calls.
203/// \return true if there's a conflict, false otherwise.
204static bool
205resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
206                                Sema::CUDAFunctionTarget Target2,
207                                Sema::CUDAFunctionTarget *ResolvedTarget) {
208  // Only free functions and static member functions may be global.
209  assert(Target1 != Sema::CFT_Global);
210  assert(Target2 != Sema::CFT_Global);
211
212  if (Target1 == Sema::CFT_HostDevice) {
213    *ResolvedTarget = Target2;
214  } else if (Target2 == Sema::CFT_HostDevice) {
215    *ResolvedTarget = Target1;
216  } else if (Target1 != Target2) {
217    return true;
218  } else {
219    *ResolvedTarget = Target1;
220  }
221
222  return false;
223}
224
225bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
226                                                   CXXSpecialMember CSM,
227                                                   CXXMethodDecl *MemberDecl,
228                                                   bool ConstRHS,
229                                                   bool Diagnose) {
230  llvm::Optional<CUDAFunctionTarget> InferredTarget;
231
232  // We're going to invoke special member lookup; mark that these special
233  // members are called from this one, and not from its caller.
234  ContextRAII MethodContext(*this, MemberDecl);
235
236  // Look for special members in base classes that should be invoked from here.
237  // Infer the target of this member base on the ones it should call.
238  // Skip direct and indirect virtual bases for abstract classes.
239  llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
240  for (const auto &B : ClassDecl->bases()) {
241    if (!B.isVirtual()) {
242      Bases.push_back(&B);
243    }
244  }
245
246  if (!ClassDecl->isAbstract()) {
247    for (const auto &VB : ClassDecl->vbases()) {
248      Bases.push_back(&VB);
249    }
250  }
251
252  for (const auto *B : Bases) {
253    const RecordType *BaseType = B->getType()->getAs<RecordType>();
254    if (!BaseType) {
255      continue;
256    }
257
258    CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
259    Sema::SpecialMemberOverloadResult *SMOR =
260        LookupSpecialMember(BaseClassDecl, CSM,
261                            /* ConstArg */ ConstRHS,
262                            /* VolatileArg */ false,
263                            /* RValueThis */ false,
264                            /* ConstThis */ false,
265                            /* VolatileThis */ false);
266
267    if (!SMOR || !SMOR->getMethod()) {
268      continue;
269    }
270
271    CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod());
272    if (!InferredTarget.hasValue()) {
273      InferredTarget = BaseMethodTarget;
274    } else {
275      bool ResolutionError = resolveCalleeCUDATargetConflict(
276          InferredTarget.getValue(), BaseMethodTarget,
277          InferredTarget.getPointer());
278      if (ResolutionError) {
279        if (Diagnose) {
280          Diag(ClassDecl->getLocation(),
281               diag::note_implicit_member_target_infer_collision)
282              << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
283        }
284        MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
285        return true;
286      }
287    }
288  }
289
290  // Same as for bases, but now for special members of fields.
291  for (const auto *F : ClassDecl->fields()) {
292    if (F->isInvalidDecl()) {
293      continue;
294    }
295
296    const RecordType *FieldType =
297        Context.getBaseElementType(F->getType())->getAs<RecordType>();
298    if (!FieldType) {
299      continue;
300    }
301
302    CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
303    Sema::SpecialMemberOverloadResult *SMOR =
304        LookupSpecialMember(FieldRecDecl, CSM,
305                            /* ConstArg */ ConstRHS && !F->isMutable(),
306                            /* VolatileArg */ false,
307                            /* RValueThis */ false,
308                            /* ConstThis */ false,
309                            /* VolatileThis */ false);
310
311    if (!SMOR || !SMOR->getMethod()) {
312      continue;
313    }
314
315    CUDAFunctionTarget FieldMethodTarget =
316        IdentifyCUDATarget(SMOR->getMethod());
317    if (!InferredTarget.hasValue()) {
318      InferredTarget = FieldMethodTarget;
319    } else {
320      bool ResolutionError = resolveCalleeCUDATargetConflict(
321          InferredTarget.getValue(), FieldMethodTarget,
322          InferredTarget.getPointer());
323      if (ResolutionError) {
324        if (Diagnose) {
325          Diag(ClassDecl->getLocation(),
326               diag::note_implicit_member_target_infer_collision)
327              << (unsigned)CSM << InferredTarget.getValue()
328              << FieldMethodTarget;
329        }
330        MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
331        return true;
332      }
333    }
334  }
335
336  if (InferredTarget.hasValue()) {
337    if (InferredTarget.getValue() == CFT_Device) {
338      MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
339    } else if (InferredTarget.getValue() == CFT_Host) {
340      MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
341    } else {
342      MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
343      MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
344    }
345  } else {
346    // If no target was inferred, mark this member as __host__ __device__;
347    // it's the least restrictive option that can be invoked from any target.
348    MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
349    MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
350  }
351
352  return false;
353}
354
355bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
356  if (!CD->isDefined() && CD->isTemplateInstantiation())
357    InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
358
359  // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
360  // empty at a point in the translation unit, if it is either a
361  // trivial constructor
362  if (CD->isTrivial())
363    return true;
364
365  // ... or it satisfies all of the following conditions:
366  // The constructor function has been defined.
367  // The constructor function has no parameters,
368  // and the function body is an empty compound statement.
369  if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
370    return false;
371
372  // Its class has no virtual functions and no virtual base classes.
373  if (CD->getParent()->isDynamicClass())
374    return false;
375
376  // The only form of initializer allowed is an empty constructor.
377  // This will recursively check all base classes and member initializers
378  if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
379        if (const CXXConstructExpr *CE =
380                dyn_cast<CXXConstructExpr>(CI->getInit()))
381          return isEmptyCudaConstructor(Loc, CE->getConstructor());
382        return false;
383      }))
384    return false;
385
386  return true;
387}
388
389bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
390  // No destructor -> no problem.
391  if (!DD)
392    return true;
393
394  if (!DD->isDefined() && DD->isTemplateInstantiation())
395    InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
396
397  // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
398  // empty at a point in the translation unit, if it is either a
399  // trivial constructor
400  if (DD->isTrivial())
401    return true;
402
403  // ... or it satisfies all of the following conditions:
404  // The destructor function has been defined.
405  // and the function body is an empty compound statement.
406  if (!DD->hasTrivialBody())
407    return false;
408
409  const CXXRecordDecl *ClassDecl = DD->getParent();
410
411  // Its class has no virtual functions and no virtual base classes.
412  if (ClassDecl->isDynamicClass())
413    return false;
414
415  // Only empty destructors are allowed. This will recursively check
416  // destructors for all base classes...
417  if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
418        if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
419          return isEmptyCudaDestructor(Loc, RD->getDestructor());
420        return true;
421      }))
422    return false;
423
424  // ... and member fields.
425  if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
426        if (CXXRecordDecl *RD = Field->getType()
427                                    ->getBaseElementTypeUnsafe()
428                                    ->getAsCXXRecordDecl())
429          return isEmptyCudaDestructor(Loc, RD->getDestructor());
430        return true;
431      }))
432    return false;
433
434  return true;
435}
436
437// With -fcuda-host-device-constexpr, an unattributed constexpr function is
438// treated as implicitly __host__ __device__, unless:
439//  * it is a variadic function (device-side variadic functions are not
440//    allowed), or
441//  * a __device__ function with this signature was already declared, in which
442//    case in which case we output an error, unless the __device__ decl is in a
443//    system header, in which case we leave the constexpr function unattributed.
444void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
445                                       const LookupResult &Previous) {
446  assert(getLangOpts().CUDA && "May be called only for CUDA compilations.");
447  if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
448      NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
449      NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
450    return;
451
452  // Is D a __device__ function with the same signature as NewD, ignoring CUDA
453  // attributes?
454  auto IsMatchingDeviceFn = [&](NamedDecl *D) {
455    if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
456      D = Using->getTargetDecl();
457    FunctionDecl *OldD = D->getAsFunction();
458    return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
459           !OldD->hasAttr<CUDAHostAttr>() &&
460           !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
461                       /* ConsiderCudaAttrs = */ false);
462  };
463  auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
464  if (It != Previous.end()) {
465    // We found a __device__ function with the same name and signature as NewD
466    // (ignoring CUDA attrs).  This is an error unless that function is defined
467    // in a system header, in which case we simply return without making NewD
468    // host+device.
469    NamedDecl *Match = *It;
470    if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
471      Diag(NewD->getLocation(),
472           diag::err_cuda_unattributed_constexpr_cannot_overload_device)
473          << NewD->getName();
474      Diag(Match->getLocation(),
475           diag::note_cuda_conflicting_device_function_declared_here);
476    }
477    return;
478  }
479
480  NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
481  NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
482}
483