1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This provides a class for OpenMP runtime code generation.
11//
12//===----------------------------------------------------------------------===//
13
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGOpenMPRuntime.h"
17#include "CodeGenFunction.h"
18#include "clang/AST/Decl.h"
19#include "clang/AST/StmtOpenMP.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/Bitcode/ReaderWriter.h"
22#include "llvm/IR/CallSite.h"
23#include "llvm/IR/DerivedTypes.h"
24#include "llvm/IR/GlobalValue.h"
25#include "llvm/IR/Value.h"
26#include "llvm/Support/Format.h"
27#include "llvm/Support/raw_ostream.h"
28#include <cassert>
29
30using namespace clang;
31using namespace CodeGen;
32
33namespace {
34/// \brief Base class for handling code generation inside OpenMP regions.
35class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
36public:
37  /// \brief Kinds of OpenMP regions used in codegen.
38  enum CGOpenMPRegionKind {
39    /// \brief Region with outlined function for standalone 'parallel'
40    /// directive.
41    ParallelOutlinedRegion,
42    /// \brief Region with outlined function for standalone 'task' directive.
43    TaskOutlinedRegion,
44    /// \brief Region for constructs that do not require function outlining,
45    /// like 'for', 'sections', 'atomic' etc. directives.
46    InlinedRegion,
47    /// \brief Region with outlined function for standalone 'target' directive.
48    TargetRegion,
49  };
50
51  CGOpenMPRegionInfo(const CapturedStmt &CS,
52                     const CGOpenMPRegionKind RegionKind,
53                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
54                     bool HasCancel)
55      : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
56        CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
57
58  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
59                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
60                     bool HasCancel)
61      : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
62        Kind(Kind), HasCancel(HasCancel) {}
63
64  /// \brief Get a variable or parameter for storing global thread id
65  /// inside OpenMP construct.
66  virtual const VarDecl *getThreadIDVariable() const = 0;
67
68  /// \brief Emit the captured statement body.
69  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
70
71  /// \brief Get an LValue for the current ThreadID variable.
72  /// \return LValue for thread id variable. This LValue always has type int32*.
73  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
74
75  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
76
77  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
78
79  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
80
81  bool hasCancel() const { return HasCancel; }
82
83  static bool classof(const CGCapturedStmtInfo *Info) {
84    return Info->getKind() == CR_OpenMP;
85  }
86
87  ~CGOpenMPRegionInfo() override = default;
88
89protected:
90  CGOpenMPRegionKind RegionKind;
91  RegionCodeGenTy CodeGen;
92  OpenMPDirectiveKind Kind;
93  bool HasCancel;
94};
95
96/// \brief API for captured statement code generation in OpenMP constructs.
97class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
98public:
99  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
100                             const RegionCodeGenTy &CodeGen,
101                             OpenMPDirectiveKind Kind, bool HasCancel)
102      : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
103                           HasCancel),
104        ThreadIDVar(ThreadIDVar) {
105    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
106  }
107
108  /// \brief Get a variable or parameter for storing global thread id
109  /// inside OpenMP construct.
110  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
111
112  /// \brief Get the name of the capture helper.
113  StringRef getHelperName() const override { return ".omp_outlined."; }
114
115  static bool classof(const CGCapturedStmtInfo *Info) {
116    return CGOpenMPRegionInfo::classof(Info) &&
117           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
118               ParallelOutlinedRegion;
119  }
120
121private:
122  /// \brief A variable or parameter storing global thread id for OpenMP
123  /// constructs.
124  const VarDecl *ThreadIDVar;
125};
126
127/// \brief API for captured statement code generation in OpenMP constructs.
128class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
129public:
130  class UntiedTaskActionTy final : public PrePostActionTy {
131    bool Untied;
132    const VarDecl *PartIDVar;
133    const RegionCodeGenTy UntiedCodeGen;
134    llvm::SwitchInst *UntiedSwitch = nullptr;
135
136  public:
137    UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
138                       const RegionCodeGenTy &UntiedCodeGen)
139        : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
140    void Enter(CodeGenFunction &CGF) override {
141      if (Untied) {
142        // Emit task switching point.
143        auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
144            CGF.GetAddrOfLocalVar(PartIDVar),
145            PartIDVar->getType()->castAs<PointerType>());
146        auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
147        auto *DoneBB = CGF.createBasicBlock(".untied.done.");
148        UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
149        CGF.EmitBlock(DoneBB);
150        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
151        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
152        UntiedSwitch->addCase(CGF.Builder.getInt32(0),
153                              CGF.Builder.GetInsertBlock());
154        emitUntiedSwitch(CGF);
155      }
156    }
157    void emitUntiedSwitch(CodeGenFunction &CGF) const {
158      if (Untied) {
159        auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
160            CGF.GetAddrOfLocalVar(PartIDVar),
161            PartIDVar->getType()->castAs<PointerType>());
162        CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
163                              PartIdLVal);
164        UntiedCodeGen(CGF);
165        CodeGenFunction::JumpDest CurPoint =
166            CGF.getJumpDestInCurrentScope(".untied.next.");
167        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169        UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
170                              CGF.Builder.GetInsertBlock());
171        CGF.EmitBranchThroughCleanup(CurPoint);
172        CGF.EmitBlock(CurPoint.getBlock());
173      }
174    }
175    unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
176  };
177  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
178                                 const VarDecl *ThreadIDVar,
179                                 const RegionCodeGenTy &CodeGen,
180                                 OpenMPDirectiveKind Kind, bool HasCancel,
181                                 const UntiedTaskActionTy &Action)
182      : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
183        ThreadIDVar(ThreadIDVar), Action(Action) {
184    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
185  }
186
187  /// \brief Get a variable or parameter for storing global thread id
188  /// inside OpenMP construct.
189  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
190
191  /// \brief Get an LValue for the current ThreadID variable.
192  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
193
194  /// \brief Get the name of the capture helper.
195  StringRef getHelperName() const override { return ".omp_outlined."; }
196
197  void emitUntiedSwitch(CodeGenFunction &CGF) override {
198    Action.emitUntiedSwitch(CGF);
199  }
200
201  static bool classof(const CGCapturedStmtInfo *Info) {
202    return CGOpenMPRegionInfo::classof(Info) &&
203           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
204               TaskOutlinedRegion;
205  }
206
207private:
208  /// \brief A variable or parameter storing global thread id for OpenMP
209  /// constructs.
210  const VarDecl *ThreadIDVar;
211  /// Action for emitting code for untied tasks.
212  const UntiedTaskActionTy &Action;
213};
214
215/// \brief API for inlined captured statement code generation in OpenMP
216/// constructs.
217class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
218public:
219  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
220                            const RegionCodeGenTy &CodeGen,
221                            OpenMPDirectiveKind Kind, bool HasCancel)
222      : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
223        OldCSI(OldCSI),
224        OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
225
226  // \brief Retrieve the value of the context parameter.
227  llvm::Value *getContextValue() const override {
228    if (OuterRegionInfo)
229      return OuterRegionInfo->getContextValue();
230    llvm_unreachable("No context value for inlined OpenMP region");
231  }
232
233  void setContextValue(llvm::Value *V) override {
234    if (OuterRegionInfo) {
235      OuterRegionInfo->setContextValue(V);
236      return;
237    }
238    llvm_unreachable("No context value for inlined OpenMP region");
239  }
240
241  /// \brief Lookup the captured field decl for a variable.
242  const FieldDecl *lookup(const VarDecl *VD) const override {
243    if (OuterRegionInfo)
244      return OuterRegionInfo->lookup(VD);
245    // If there is no outer outlined region,no need to lookup in a list of
246    // captured variables, we can use the original one.
247    return nullptr;
248  }
249
250  FieldDecl *getThisFieldDecl() const override {
251    if (OuterRegionInfo)
252      return OuterRegionInfo->getThisFieldDecl();
253    return nullptr;
254  }
255
256  /// \brief Get a variable or parameter for storing global thread id
257  /// inside OpenMP construct.
258  const VarDecl *getThreadIDVariable() const override {
259    if (OuterRegionInfo)
260      return OuterRegionInfo->getThreadIDVariable();
261    return nullptr;
262  }
263
264  /// \brief Get the name of the capture helper.
265  StringRef getHelperName() const override {
266    if (auto *OuterRegionInfo = getOldCSI())
267      return OuterRegionInfo->getHelperName();
268    llvm_unreachable("No helper name for inlined OpenMP construct");
269  }
270
271  void emitUntiedSwitch(CodeGenFunction &CGF) override {
272    if (OuterRegionInfo)
273      OuterRegionInfo->emitUntiedSwitch(CGF);
274  }
275
276  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
277
278  static bool classof(const CGCapturedStmtInfo *Info) {
279    return CGOpenMPRegionInfo::classof(Info) &&
280           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
281  }
282
283  ~CGOpenMPInlinedRegionInfo() override = default;
284
285private:
286  /// \brief CodeGen info about outer OpenMP region.
287  CodeGenFunction::CGCapturedStmtInfo *OldCSI;
288  CGOpenMPRegionInfo *OuterRegionInfo;
289};
290
291/// \brief API for captured statement code generation in OpenMP target
292/// constructs. For this captures, implicit parameters are used instead of the
293/// captured fields. The name of the target region has to be unique in a given
294/// application so it is provided by the client, because only the client has
295/// the information to generate that.
296class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
297public:
298  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
299                           const RegionCodeGenTy &CodeGen, StringRef HelperName)
300      : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
301                           /*HasCancel=*/false),
302        HelperName(HelperName) {}
303
304  /// \brief This is unused for target regions because each starts executing
305  /// with a single thread.
306  const VarDecl *getThreadIDVariable() const override { return nullptr; }
307
308  /// \brief Get the name of the capture helper.
309  StringRef getHelperName() const override { return HelperName; }
310
311  static bool classof(const CGCapturedStmtInfo *Info) {
312    return CGOpenMPRegionInfo::classof(Info) &&
313           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
314  }
315
316private:
317  StringRef HelperName;
318};
319
320static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
321  llvm_unreachable("No codegen for expressions");
322}
323/// \brief API for generation of expressions captured in a innermost OpenMP
324/// region.
325class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
326public:
327  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
328      : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
329                                  OMPD_unknown,
330                                  /*HasCancel=*/false),
331        PrivScope(CGF) {
332    // Make sure the globals captured in the provided statement are local by
333    // using the privatization logic. We assume the same variable is not
334    // captured more than once.
335    for (auto &C : CS.captures()) {
336      if (!C.capturesVariable() && !C.capturesVariableByCopy())
337        continue;
338
339      const VarDecl *VD = C.getCapturedVar();
340      if (VD->isLocalVarDeclOrParm())
341        continue;
342
343      DeclRefExpr DRE(const_cast<VarDecl *>(VD),
344                      /*RefersToEnclosingVariableOrCapture=*/false,
345                      VD->getType().getNonReferenceType(), VK_LValue,
346                      SourceLocation());
347      PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
348        return CGF.EmitLValue(&DRE).getAddress();
349      });
350    }
351    (void)PrivScope.Privatize();
352  }
353
354  /// \brief Lookup the captured field decl for a variable.
355  const FieldDecl *lookup(const VarDecl *VD) const override {
356    if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
357      return FD;
358    return nullptr;
359  }
360
361  /// \brief Emit the captured statement body.
362  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
363    llvm_unreachable("No body for expressions");
364  }
365
366  /// \brief Get a variable or parameter for storing global thread id
367  /// inside OpenMP construct.
368  const VarDecl *getThreadIDVariable() const override {
369    llvm_unreachable("No thread id for expressions");
370  }
371
372  /// \brief Get the name of the capture helper.
373  StringRef getHelperName() const override {
374    llvm_unreachable("No helper name for expressions");
375  }
376
377  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
378
379private:
380  /// Private scope to capture global variables.
381  CodeGenFunction::OMPPrivateScope PrivScope;
382};
383
384/// \brief RAII for emitting code of OpenMP constructs.
385class InlinedOpenMPRegionRAII {
386  CodeGenFunction &CGF;
387  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
388  FieldDecl *LambdaThisCaptureField = nullptr;
389
390public:
391  /// \brief Constructs region for combined constructs.
392  /// \param CodeGen Code generation sequence for combined directives. Includes
393  /// a list of functions used for code generation of implicitly inlined
394  /// regions.
395  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
396                          OpenMPDirectiveKind Kind, bool HasCancel)
397      : CGF(CGF) {
398    // Start emission for the construct.
399    CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
400        CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
401    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
402    LambdaThisCaptureField = CGF.LambdaThisCaptureField;
403    CGF.LambdaThisCaptureField = nullptr;
404  }
405
406  ~InlinedOpenMPRegionRAII() {
407    // Restore original CapturedStmtInfo only if we're done with code emission.
408    auto *OldCSI =
409        cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
410    delete CGF.CapturedStmtInfo;
411    CGF.CapturedStmtInfo = OldCSI;
412    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413    CGF.LambdaThisCaptureField = LambdaThisCaptureField;
414  }
415};
416
417/// \brief Values for bit flags used in the ident_t to describe the fields.
418/// All enumeric elements are named and described in accordance with the code
419/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
420enum OpenMPLocationFlags {
421  /// \brief Use trampoline for internal microtask.
422  OMP_IDENT_IMD = 0x01,
423  /// \brief Use c-style ident structure.
424  OMP_IDENT_KMPC = 0x02,
425  /// \brief Atomic reduction option for kmpc_reduce.
426  OMP_ATOMIC_REDUCE = 0x10,
427  /// \brief Explicit 'barrier' directive.
428  OMP_IDENT_BARRIER_EXPL = 0x20,
429  /// \brief Implicit barrier in code.
430  OMP_IDENT_BARRIER_IMPL = 0x40,
431  /// \brief Implicit barrier in 'for' directive.
432  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
433  /// \brief Implicit barrier in 'sections' directive.
434  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
435  /// \brief Implicit barrier in 'single' directive.
436  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
437};
438
439/// \brief Describes ident structure that describes a source location.
440/// All descriptions are taken from
441/// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
442/// Original structure:
443/// typedef struct ident {
444///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
445///                                  see above  */
446///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
447///                                  KMP_IDENT_KMPC identifies this union
448///                                  member  */
449///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
450///                                  see above */
451///#if USE_ITT_BUILD
452///                            /*  but currently used for storing
453///                                region-specific ITT */
454///                            /*  contextual information. */
455///#endif /* USE_ITT_BUILD */
456///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
457///                                 C++  */
458///    char const *psource;    /**< String describing the source location.
459///                            The string is composed of semi-colon separated
460//                             fields which describe the source file,
461///                            the function and a pair of line numbers that
462///                            delimit the construct.
463///                             */
464/// } ident_t;
465enum IdentFieldIndex {
466  /// \brief might be used in Fortran
467  IdentField_Reserved_1,
468  /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
469  IdentField_Flags,
470  /// \brief Not really used in Fortran any more
471  IdentField_Reserved_2,
472  /// \brief Source[4] in Fortran, do not use for C++
473  IdentField_Reserved_3,
474  /// \brief String describing the source location. The string is composed of
475  /// semi-colon separated fields which describe the source file, the function
476  /// and a pair of line numbers that delimit the construct.
477  IdentField_PSource
478};
479
480/// \brief Schedule types for 'omp for' loops (these enumerators are taken from
481/// the enum sched_type in kmp.h).
482enum OpenMPSchedType {
483  /// \brief Lower bound for default (unordered) versions.
484  OMP_sch_lower = 32,
485  OMP_sch_static_chunked = 33,
486  OMP_sch_static = 34,
487  OMP_sch_dynamic_chunked = 35,
488  OMP_sch_guided_chunked = 36,
489  OMP_sch_runtime = 37,
490  OMP_sch_auto = 38,
491  /// static with chunk adjustment (e.g., simd)
492  OMP_sch_static_balanced_chunked   = 45,
493  /// \brief Lower bound for 'ordered' versions.
494  OMP_ord_lower = 64,
495  OMP_ord_static_chunked = 65,
496  OMP_ord_static = 66,
497  OMP_ord_dynamic_chunked = 67,
498  OMP_ord_guided_chunked = 68,
499  OMP_ord_runtime = 69,
500  OMP_ord_auto = 70,
501  OMP_sch_default = OMP_sch_static,
502  /// \brief dist_schedule types
503  OMP_dist_sch_static_chunked = 91,
504  OMP_dist_sch_static = 92,
505  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
506  /// Set if the monotonic schedule modifier was present.
507  OMP_sch_modifier_monotonic = (1 << 29),
508  /// Set if the nonmonotonic schedule modifier was present.
509  OMP_sch_modifier_nonmonotonic = (1 << 30),
510};
511
512enum OpenMPRTLFunction {
513  /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
514  /// kmpc_micro microtask, ...);
515  OMPRTL__kmpc_fork_call,
516  /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
517  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
518  OMPRTL__kmpc_threadprivate_cached,
519  /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
520  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
521  OMPRTL__kmpc_threadprivate_register,
522  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
523  OMPRTL__kmpc_global_thread_num,
524  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
525  // kmp_critical_name *crit);
526  OMPRTL__kmpc_critical,
527  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
528  // global_tid, kmp_critical_name *crit, uintptr_t hint);
529  OMPRTL__kmpc_critical_with_hint,
530  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
531  // kmp_critical_name *crit);
532  OMPRTL__kmpc_end_critical,
533  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
534  // global_tid);
535  OMPRTL__kmpc_cancel_barrier,
536  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
537  OMPRTL__kmpc_barrier,
538  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
539  OMPRTL__kmpc_for_static_fini,
540  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
541  // global_tid);
542  OMPRTL__kmpc_serialized_parallel,
543  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
544  // global_tid);
545  OMPRTL__kmpc_end_serialized_parallel,
546  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
547  // kmp_int32 num_threads);
548  OMPRTL__kmpc_push_num_threads,
549  // Call to void __kmpc_flush(ident_t *loc);
550  OMPRTL__kmpc_flush,
551  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
552  OMPRTL__kmpc_master,
553  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
554  OMPRTL__kmpc_end_master,
555  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
556  // int end_part);
557  OMPRTL__kmpc_omp_taskyield,
558  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
559  OMPRTL__kmpc_single,
560  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
561  OMPRTL__kmpc_end_single,
562  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
563  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
564  // kmp_routine_entry_t *task_entry);
565  OMPRTL__kmpc_omp_task_alloc,
566  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
567  // new_task);
568  OMPRTL__kmpc_omp_task,
569  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
570  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
571  // kmp_int32 didit);
572  OMPRTL__kmpc_copyprivate,
573  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
574  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
575  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
576  OMPRTL__kmpc_reduce,
577  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
578  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
579  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
580  // *lck);
581  OMPRTL__kmpc_reduce_nowait,
582  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
583  // kmp_critical_name *lck);
584  OMPRTL__kmpc_end_reduce,
585  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
586  // kmp_critical_name *lck);
587  OMPRTL__kmpc_end_reduce_nowait,
588  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
589  // kmp_task_t * new_task);
590  OMPRTL__kmpc_omp_task_begin_if0,
591  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
592  // kmp_task_t * new_task);
593  OMPRTL__kmpc_omp_task_complete_if0,
594  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
595  OMPRTL__kmpc_ordered,
596  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
597  OMPRTL__kmpc_end_ordered,
598  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
599  // global_tid);
600  OMPRTL__kmpc_omp_taskwait,
601  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
602  OMPRTL__kmpc_taskgroup,
603  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
604  OMPRTL__kmpc_end_taskgroup,
605  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
606  // int proc_bind);
607  OMPRTL__kmpc_push_proc_bind,
608  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
609  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
610  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
611  OMPRTL__kmpc_omp_task_with_deps,
612  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
613  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
614  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
615  OMPRTL__kmpc_omp_wait_deps,
616  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
617  // global_tid, kmp_int32 cncl_kind);
618  OMPRTL__kmpc_cancellationpoint,
619  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
620  // kmp_int32 cncl_kind);
621  OMPRTL__kmpc_cancel,
622  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
623  // kmp_int32 num_teams, kmp_int32 thread_limit);
624  OMPRTL__kmpc_push_num_teams,
625  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
626  // microtask, ...);
627  OMPRTL__kmpc_fork_teams,
628  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
629  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
630  // sched, kmp_uint64 grainsize, void *task_dup);
631  OMPRTL__kmpc_taskloop,
632  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
633  // num_dims, struct kmp_dim *dims);
634  OMPRTL__kmpc_doacross_init,
635  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
636  OMPRTL__kmpc_doacross_fini,
637  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
638  // *vec);
639  OMPRTL__kmpc_doacross_post,
640  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
641  // *vec);
642  OMPRTL__kmpc_doacross_wait,
643
644  //
645  // Offloading related calls
646  //
647  // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
648  // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
649  // *arg_types);
650  OMPRTL__tgt_target,
651  // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
652  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
653  // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
654  OMPRTL__tgt_target_teams,
655  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
656  OMPRTL__tgt_register_lib,
657  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
658  OMPRTL__tgt_unregister_lib,
659  // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
660  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
661  OMPRTL__tgt_target_data_begin,
662  // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
663  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
664  OMPRTL__tgt_target_data_end,
665  // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
666  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
667  OMPRTL__tgt_target_data_update,
668};
669
670/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
671/// region.
672class CleanupTy final : public EHScopeStack::Cleanup {
673  PrePostActionTy *Action;
674
675public:
676  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
677  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
678    if (!CGF.HaveInsertPoint())
679      return;
680    Action->Exit(CGF);
681  }
682};
683
684} // anonymous namespace
685
686void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
687  CodeGenFunction::RunCleanupsScope Scope(CGF);
688  if (PrePostAction) {
689    CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
690    Callback(CodeGen, CGF, *PrePostAction);
691  } else {
692    PrePostActionTy Action;
693    Callback(CodeGen, CGF, Action);
694  }
695}
696
697LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
698  return CGF.EmitLoadOfPointerLValue(
699      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
700      getThreadIDVariable()->getType()->castAs<PointerType>());
701}
702
703void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
704  if (!CGF.HaveInsertPoint())
705    return;
706  // 1.2.2 OpenMP Language Terminology
707  // Structured block - An executable statement with a single entry at the
708  // top and a single exit at the bottom.
709  // The point of exit cannot be a branch out of the structured block.
710  // longjmp() and throw() must not violate the entry/exit criteria.
711  CGF.EHStack.pushTerminate();
712  CodeGen(CGF);
713  CGF.EHStack.popTerminate();
714}
715
716LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
717    CodeGenFunction &CGF) {
718  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
719                            getThreadIDVariable()->getType(),
720                            AlignmentSource::Decl);
721}
722
723CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
724    : CGM(CGM), OffloadEntriesInfoManager(CGM) {
725  IdentTy = llvm::StructType::create(
726      "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
727      CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
728      CGM.Int8PtrTy /* psource */, nullptr);
729  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
730
731  loadOffloadInfoMetadata();
732}
733
734void CGOpenMPRuntime::clear() {
735  InternalVars.clear();
736}
737
738static llvm::Function *
739emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
740                          const Expr *CombinerInitializer, const VarDecl *In,
741                          const VarDecl *Out, bool IsCombiner) {
742  // void .omp_combiner.(Ty *in, Ty *out);
743  auto &C = CGM.getContext();
744  QualType PtrTy = C.getPointerType(Ty).withRestrict();
745  FunctionArgList Args;
746  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
747                               /*Id=*/nullptr, PtrTy);
748  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
749                              /*Id=*/nullptr, PtrTy);
750  Args.push_back(&OmpOutParm);
751  Args.push_back(&OmpInParm);
752  auto &FnInfo =
753      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
754  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
755  auto *Fn = llvm::Function::Create(
756      FnTy, llvm::GlobalValue::InternalLinkage,
757      IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
758  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
759  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
760  CodeGenFunction CGF(CGM);
761  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
762  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
763  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
764  CodeGenFunction::OMPPrivateScope Scope(CGF);
765  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
766  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
767    return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
768        .getAddress();
769  });
770  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
771  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
772    return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
773        .getAddress();
774  });
775  (void)Scope.Privatize();
776  CGF.EmitIgnoredExpr(CombinerInitializer);
777  Scope.ForceCleanup();
778  CGF.FinishFunction();
779  return Fn;
780}
781
782void CGOpenMPRuntime::emitUserDefinedReduction(
783    CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
784  if (UDRMap.count(D) > 0)
785    return;
786  auto &C = CGM.getContext();
787  if (!In || !Out) {
788    In = &C.Idents.get("omp_in");
789    Out = &C.Idents.get("omp_out");
790  }
791  llvm::Function *Combiner = emitCombinerOrInitializer(
792      CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
793      cast<VarDecl>(D->lookup(Out).front()),
794      /*IsCombiner=*/true);
795  llvm::Function *Initializer = nullptr;
796  if (auto *Init = D->getInitializer()) {
797    if (!Priv || !Orig) {
798      Priv = &C.Idents.get("omp_priv");
799      Orig = &C.Idents.get("omp_orig");
800    }
801    Initializer = emitCombinerOrInitializer(
802        CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
803        cast<VarDecl>(D->lookup(Priv).front()),
804        /*IsCombiner=*/false);
805  }
806  UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
807  if (CGF) {
808    auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
809    Decls.second.push_back(D);
810  }
811}
812
813std::pair<llvm::Function *, llvm::Function *>
814CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
815  auto I = UDRMap.find(D);
816  if (I != UDRMap.end())
817    return I->second;
818  emitUserDefinedReduction(/*CGF=*/nullptr, D);
819  return UDRMap.lookup(D);
820}
821
822// Layout information for ident_t.
823static CharUnits getIdentAlign(CodeGenModule &CGM) {
824  return CGM.getPointerAlign();
825}
826static CharUnits getIdentSize(CodeGenModule &CGM) {
827  assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
828  return CharUnits::fromQuantity(16) + CGM.getPointerSize();
829}
830static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
831  // All the fields except the last are i32, so this works beautifully.
832  return unsigned(Field) * CharUnits::fromQuantity(4);
833}
834static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
835                                   IdentFieldIndex Field,
836                                   const llvm::Twine &Name = "") {
837  auto Offset = getOffsetOfIdentField(Field);
838  return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
839}
840
841llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
842    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
843    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
844  assert(ThreadIDVar->getType()->isPointerType() &&
845         "thread id variable must be of type kmp_int32 *");
846  const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
847  CodeGenFunction CGF(CGM, true);
848  bool HasCancel = false;
849  if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
850    HasCancel = OPD->hasCancel();
851  else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
852    HasCancel = OPSD->hasCancel();
853  else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
854    HasCancel = OPFD->hasCancel();
855  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
856                                    HasCancel);
857  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
858  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
859}
860
861llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
862    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
863    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
864    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
865    bool Tied, unsigned &NumberOfParts) {
866  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
867                                              PrePostActionTy &) {
868    auto *ThreadID = getThreadID(CGF, D.getLocStart());
869    auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
870    llvm::Value *TaskArgs[] = {
871        UpLoc, ThreadID,
872        CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
873                                    TaskTVar->getType()->castAs<PointerType>())
874            .getPointer()};
875    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
876  };
877  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
878                                                            UntiedCodeGen);
879  CodeGen.setAction(Action);
880  assert(!ThreadIDVar->getType()->isPointerType() &&
881         "thread id variable must be of type kmp_int32 for tasks");
882  auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
883  auto *TD = dyn_cast<OMPTaskDirective>(&D);
884  CodeGenFunction CGF(CGM, true);
885  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
886                                        InnermostKind,
887                                        TD ? TD->hasCancel() : false, Action);
888  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
889  auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
890  if (!Tied)
891    NumberOfParts = Action.getNumberOfParts();
892  return Res;
893}
894
895Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
896  CharUnits Align = getIdentAlign(CGM);
897  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
898  if (!Entry) {
899    if (!DefaultOpenMPPSource) {
900      // Initialize default location for psource field of ident_t structure of
901      // all ident_t objects. Format is ";file;function;line;column;;".
902      // Taken from
903      // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
904      DefaultOpenMPPSource =
905          CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
906      DefaultOpenMPPSource =
907          llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
908    }
909    auto DefaultOpenMPLocation = new llvm::GlobalVariable(
910        CGM.getModule(), IdentTy, /*isConstant*/ true,
911        llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
912    DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
913    DefaultOpenMPLocation->setAlignment(Align.getQuantity());
914
915    llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
916    llvm::Constant *Values[] = {Zero,
917                                llvm::ConstantInt::get(CGM.Int32Ty, Flags),
918                                Zero, Zero, DefaultOpenMPPSource};
919    llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
920    DefaultOpenMPLocation->setInitializer(Init);
921    OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
922  }
923  return Address(Entry, Align);
924}
925
926llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
927                                                 SourceLocation Loc,
928                                                 unsigned Flags) {
929  Flags |= OMP_IDENT_KMPC;
930  // If no debug info is generated - return global default location.
931  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
932      Loc.isInvalid())
933    return getOrCreateDefaultLocation(Flags).getPointer();
934
935  assert(CGF.CurFn && "No function in current CodeGenFunction.");
936
937  Address LocValue = Address::invalid();
938  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
939  if (I != OpenMPLocThreadIDMap.end())
940    LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
941
942  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
943  // GetOpenMPThreadID was called before this routine.
944  if (!LocValue.isValid()) {
945    // Generate "ident_t .kmpc_loc.addr;"
946    Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
947                                      ".kmpc_loc.addr");
948    auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
949    Elem.second.DebugLoc = AI.getPointer();
950    LocValue = AI;
951
952    CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
953    CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
954    CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
955                             CGM.getSize(getIdentSize(CGF.CGM)));
956  }
957
958  // char **psource = &.kmpc_loc_<flags>.addr.psource;
959  Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
960
961  auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
962  if (OMPDebugLoc == nullptr) {
963    SmallString<128> Buffer2;
964    llvm::raw_svector_ostream OS2(Buffer2);
965    // Build debug location
966    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
967    OS2 << ";" << PLoc.getFilename() << ";";
968    if (const FunctionDecl *FD =
969            dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
970      OS2 << FD->getQualifiedNameAsString();
971    }
972    OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
973    OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
974    OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
975  }
976  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
977  CGF.Builder.CreateStore(OMPDebugLoc, PSource);
978
979  // Our callers always pass this to a runtime function, so for
980  // convenience, go ahead and return a naked pointer.
981  return LocValue.getPointer();
982}
983
984llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
985                                          SourceLocation Loc) {
986  assert(CGF.CurFn && "No function in current CodeGenFunction.");
987
988  llvm::Value *ThreadID = nullptr;
989  // Check whether we've already cached a load of the thread id in this
990  // function.
991  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
992  if (I != OpenMPLocThreadIDMap.end()) {
993    ThreadID = I->second.ThreadID;
994    if (ThreadID != nullptr)
995      return ThreadID;
996  }
997  if (auto *OMPRegionInfo =
998          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
999    if (OMPRegionInfo->getThreadIDVariable()) {
1000      // Check if this an outlined function with thread id passed as argument.
1001      auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1002      ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
1003      // If value loaded in entry block, cache it and use it everywhere in
1004      // function.
1005      if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1006        auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1007        Elem.second.ThreadID = ThreadID;
1008      }
1009      return ThreadID;
1010    }
1011  }
1012
1013  // This is not an outlined function region - need to call __kmpc_int32
1014  // kmpc_global_thread_num(ident_t *loc).
1015  // Generate thread id value and cache this value for use across the
1016  // function.
1017  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1018  CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1019  ThreadID =
1020      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1021                          emitUpdateLocation(CGF, Loc));
1022  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1023  Elem.second.ThreadID = ThreadID;
1024  return ThreadID;
1025}
1026
1027void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1028  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1029  if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1030    OpenMPLocThreadIDMap.erase(CGF.CurFn);
1031  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1032    for(auto *D : FunctionUDRMap[CGF.CurFn]) {
1033      UDRMap.erase(D);
1034    }
1035    FunctionUDRMap.erase(CGF.CurFn);
1036  }
1037}
1038
1039llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1040  if (!IdentTy) {
1041  }
1042  return llvm::PointerType::getUnqual(IdentTy);
1043}
1044
1045llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1046  if (!Kmpc_MicroTy) {
1047    // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1048    llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1049                                 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1050    Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1051  }
1052  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1053}
1054
1055llvm::Constant *
1056CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1057  llvm::Constant *RTLFn = nullptr;
1058  switch (static_cast<OpenMPRTLFunction>(Function)) {
1059  case OMPRTL__kmpc_fork_call: {
1060    // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1061    // microtask, ...);
1062    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1063                                getKmpc_MicroPointerTy()};
1064    llvm::FunctionType *FnTy =
1065        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1066    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1067    break;
1068  }
1069  case OMPRTL__kmpc_global_thread_num: {
1070    // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1071    llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1072    llvm::FunctionType *FnTy =
1073        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1074    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1075    break;
1076  }
1077  case OMPRTL__kmpc_threadprivate_cached: {
1078    // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1079    // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1080    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1081                                CGM.VoidPtrTy, CGM.SizeTy,
1082                                CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1083    llvm::FunctionType *FnTy =
1084        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1085    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1086    break;
1087  }
1088  case OMPRTL__kmpc_critical: {
1089    // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1090    // kmp_critical_name *crit);
1091    llvm::Type *TypeParams[] = {
1092        getIdentTyPointerTy(), CGM.Int32Ty,
1093        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1094    llvm::FunctionType *FnTy =
1095        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1096    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1097    break;
1098  }
1099  case OMPRTL__kmpc_critical_with_hint: {
1100    // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1101    // kmp_critical_name *crit, uintptr_t hint);
1102    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1103                                llvm::PointerType::getUnqual(KmpCriticalNameTy),
1104                                CGM.IntPtrTy};
1105    llvm::FunctionType *FnTy =
1106        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1107    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1108    break;
1109  }
1110  case OMPRTL__kmpc_threadprivate_register: {
1111    // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1112    // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1113    // typedef void *(*kmpc_ctor)(void *);
1114    auto KmpcCtorTy =
1115        llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1116                                /*isVarArg*/ false)->getPointerTo();
1117    // typedef void *(*kmpc_cctor)(void *, void *);
1118    llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1119    auto KmpcCopyCtorTy =
1120        llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1121                                /*isVarArg*/ false)->getPointerTo();
1122    // typedef void (*kmpc_dtor)(void *);
1123    auto KmpcDtorTy =
1124        llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1125            ->getPointerTo();
1126    llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1127                              KmpcCopyCtorTy, KmpcDtorTy};
1128    auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1129                                        /*isVarArg*/ false);
1130    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1131    break;
1132  }
1133  case OMPRTL__kmpc_end_critical: {
1134    // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1135    // kmp_critical_name *crit);
1136    llvm::Type *TypeParams[] = {
1137        getIdentTyPointerTy(), CGM.Int32Ty,
1138        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1139    llvm::FunctionType *FnTy =
1140        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1141    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1142    break;
1143  }
1144  case OMPRTL__kmpc_cancel_barrier: {
1145    // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1146    // global_tid);
1147    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1148    llvm::FunctionType *FnTy =
1149        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1150    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1151    break;
1152  }
1153  case OMPRTL__kmpc_barrier: {
1154    // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1155    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1156    llvm::FunctionType *FnTy =
1157        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1158    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1159    break;
1160  }
1161  case OMPRTL__kmpc_for_static_fini: {
1162    // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1163    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1164    llvm::FunctionType *FnTy =
1165        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1166    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1167    break;
1168  }
1169  case OMPRTL__kmpc_push_num_threads: {
1170    // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1171    // kmp_int32 num_threads)
1172    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1173                                CGM.Int32Ty};
1174    llvm::FunctionType *FnTy =
1175        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1176    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1177    break;
1178  }
1179  case OMPRTL__kmpc_serialized_parallel: {
1180    // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1181    // global_tid);
1182    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1183    llvm::FunctionType *FnTy =
1184        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1185    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1186    break;
1187  }
1188  case OMPRTL__kmpc_end_serialized_parallel: {
1189    // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1190    // global_tid);
1191    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1192    llvm::FunctionType *FnTy =
1193        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1194    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1195    break;
1196  }
1197  case OMPRTL__kmpc_flush: {
1198    // Build void __kmpc_flush(ident_t *loc);
1199    llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1200    llvm::FunctionType *FnTy =
1201        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1202    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1203    break;
1204  }
1205  case OMPRTL__kmpc_master: {
1206    // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1207    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1208    llvm::FunctionType *FnTy =
1209        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1210    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1211    break;
1212  }
1213  case OMPRTL__kmpc_end_master: {
1214    // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1215    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1216    llvm::FunctionType *FnTy =
1217        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1218    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1219    break;
1220  }
1221  case OMPRTL__kmpc_omp_taskyield: {
1222    // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1223    // int end_part);
1224    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1225    llvm::FunctionType *FnTy =
1226        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1227    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1228    break;
1229  }
1230  case OMPRTL__kmpc_single: {
1231    // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1232    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1233    llvm::FunctionType *FnTy =
1234        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1235    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1236    break;
1237  }
1238  case OMPRTL__kmpc_end_single: {
1239    // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1240    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1241    llvm::FunctionType *FnTy =
1242        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1243    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1244    break;
1245  }
1246  case OMPRTL__kmpc_omp_task_alloc: {
1247    // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1248    // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1249    // kmp_routine_entry_t *task_entry);
1250    assert(KmpRoutineEntryPtrTy != nullptr &&
1251           "Type kmp_routine_entry_t must be created.");
1252    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1253                                CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1254    // Return void * and then cast to particular kmp_task_t type.
1255    llvm::FunctionType *FnTy =
1256        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1257    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1258    break;
1259  }
1260  case OMPRTL__kmpc_omp_task: {
1261    // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1262    // *new_task);
1263    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1264                                CGM.VoidPtrTy};
1265    llvm::FunctionType *FnTy =
1266        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1267    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1268    break;
1269  }
1270  case OMPRTL__kmpc_copyprivate: {
1271    // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1272    // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1273    // kmp_int32 didit);
1274    llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1275    auto *CpyFnTy =
1276        llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1277    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1278                                CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1279                                CGM.Int32Ty};
1280    llvm::FunctionType *FnTy =
1281        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1282    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1283    break;
1284  }
1285  case OMPRTL__kmpc_reduce: {
1286    // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1287    // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1288    // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1289    llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1290    auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1291                                               /*isVarArg=*/false);
1292    llvm::Type *TypeParams[] = {
1293        getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1294        CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1295        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1296    llvm::FunctionType *FnTy =
1297        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1298    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1299    break;
1300  }
1301  case OMPRTL__kmpc_reduce_nowait: {
1302    // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1303    // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1304    // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1305    // *lck);
1306    llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1307    auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1308                                               /*isVarArg=*/false);
1309    llvm::Type *TypeParams[] = {
1310        getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1311        CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1312        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1313    llvm::FunctionType *FnTy =
1314        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1315    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1316    break;
1317  }
1318  case OMPRTL__kmpc_end_reduce: {
1319    // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1320    // kmp_critical_name *lck);
1321    llvm::Type *TypeParams[] = {
1322        getIdentTyPointerTy(), CGM.Int32Ty,
1323        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1324    llvm::FunctionType *FnTy =
1325        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1326    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1327    break;
1328  }
1329  case OMPRTL__kmpc_end_reduce_nowait: {
1330    // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1331    // kmp_critical_name *lck);
1332    llvm::Type *TypeParams[] = {
1333        getIdentTyPointerTy(), CGM.Int32Ty,
1334        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1335    llvm::FunctionType *FnTy =
1336        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1337    RTLFn =
1338        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1339    break;
1340  }
1341  case OMPRTL__kmpc_omp_task_begin_if0: {
1342    // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1343    // *new_task);
1344    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1345                                CGM.VoidPtrTy};
1346    llvm::FunctionType *FnTy =
1347        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1348    RTLFn =
1349        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1350    break;
1351  }
1352  case OMPRTL__kmpc_omp_task_complete_if0: {
1353    // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1354    // *new_task);
1355    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1356                                CGM.VoidPtrTy};
1357    llvm::FunctionType *FnTy =
1358        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1359    RTLFn = CGM.CreateRuntimeFunction(FnTy,
1360                                      /*Name=*/"__kmpc_omp_task_complete_if0");
1361    break;
1362  }
1363  case OMPRTL__kmpc_ordered: {
1364    // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1365    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1366    llvm::FunctionType *FnTy =
1367        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1368    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1369    break;
1370  }
1371  case OMPRTL__kmpc_end_ordered: {
1372    // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1373    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1374    llvm::FunctionType *FnTy =
1375        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1376    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1377    break;
1378  }
1379  case OMPRTL__kmpc_omp_taskwait: {
1380    // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1381    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1382    llvm::FunctionType *FnTy =
1383        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1384    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1385    break;
1386  }
1387  case OMPRTL__kmpc_taskgroup: {
1388    // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1389    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1390    llvm::FunctionType *FnTy =
1391        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1392    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1393    break;
1394  }
1395  case OMPRTL__kmpc_end_taskgroup: {
1396    // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1397    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1398    llvm::FunctionType *FnTy =
1399        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1400    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1401    break;
1402  }
1403  case OMPRTL__kmpc_push_proc_bind: {
1404    // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1405    // int proc_bind)
1406    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1407    llvm::FunctionType *FnTy =
1408        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1409    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1410    break;
1411  }
1412  case OMPRTL__kmpc_omp_task_with_deps: {
1413    // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1414    // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1415    // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1416    llvm::Type *TypeParams[] = {
1417        getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
1418        CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
1419    llvm::FunctionType *FnTy =
1420        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1421    RTLFn =
1422        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1423    break;
1424  }
1425  case OMPRTL__kmpc_omp_wait_deps: {
1426    // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1427    // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1428    // kmp_depend_info_t *noalias_dep_list);
1429    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1430                                CGM.Int32Ty,           CGM.VoidPtrTy,
1431                                CGM.Int32Ty,           CGM.VoidPtrTy};
1432    llvm::FunctionType *FnTy =
1433        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1434    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1435    break;
1436  }
1437  case OMPRTL__kmpc_cancellationpoint: {
1438    // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1439    // global_tid, kmp_int32 cncl_kind)
1440    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1441    llvm::FunctionType *FnTy =
1442        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1443    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1444    break;
1445  }
1446  case OMPRTL__kmpc_cancel: {
1447    // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1448    // kmp_int32 cncl_kind)
1449    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1450    llvm::FunctionType *FnTy =
1451        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1452    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1453    break;
1454  }
1455  case OMPRTL__kmpc_push_num_teams: {
1456    // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1457    // kmp_int32 num_teams, kmp_int32 num_threads)
1458    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1459        CGM.Int32Ty};
1460    llvm::FunctionType *FnTy =
1461        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1462    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1463    break;
1464  }
1465  case OMPRTL__kmpc_fork_teams: {
1466    // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1467    // microtask, ...);
1468    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1469                                getKmpc_MicroPointerTy()};
1470    llvm::FunctionType *FnTy =
1471        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1472    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1473    break;
1474  }
1475  case OMPRTL__kmpc_taskloop: {
1476    // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
1477    // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
1478    // sched, kmp_uint64 grainsize, void *task_dup);
1479    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1480                                CGM.IntTy,
1481                                CGM.VoidPtrTy,
1482                                CGM.IntTy,
1483                                CGM.Int64Ty->getPointerTo(),
1484                                CGM.Int64Ty->getPointerTo(),
1485                                CGM.Int64Ty,
1486                                CGM.IntTy,
1487                                CGM.IntTy,
1488                                CGM.Int64Ty,
1489                                CGM.VoidPtrTy};
1490    llvm::FunctionType *FnTy =
1491        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1492    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
1493    break;
1494  }
1495  case OMPRTL__kmpc_doacross_init: {
1496    // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
1497    // num_dims, struct kmp_dim *dims);
1498    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1499                                CGM.Int32Ty,
1500                                CGM.Int32Ty,
1501                                CGM.VoidPtrTy};
1502    llvm::FunctionType *FnTy =
1503        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1504    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
1505    break;
1506  }
1507  case OMPRTL__kmpc_doacross_fini: {
1508    // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
1509    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1510    llvm::FunctionType *FnTy =
1511        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1512    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
1513    break;
1514  }
1515  case OMPRTL__kmpc_doacross_post: {
1516    // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
1517    // *vec);
1518    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1519                                CGM.Int64Ty->getPointerTo()};
1520    llvm::FunctionType *FnTy =
1521        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1522    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
1523    break;
1524  }
1525  case OMPRTL__kmpc_doacross_wait: {
1526    // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
1527    // *vec);
1528    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1529                                CGM.Int64Ty->getPointerTo()};
1530    llvm::FunctionType *FnTy =
1531        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1532    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
1533    break;
1534  }
1535  case OMPRTL__tgt_target: {
1536    // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
1537    // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
1538    // *arg_types);
1539    llvm::Type *TypeParams[] = {CGM.Int32Ty,
1540                                CGM.VoidPtrTy,
1541                                CGM.Int32Ty,
1542                                CGM.VoidPtrPtrTy,
1543                                CGM.VoidPtrPtrTy,
1544                                CGM.SizeTy->getPointerTo(),
1545                                CGM.Int32Ty->getPointerTo()};
1546    llvm::FunctionType *FnTy =
1547        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1548    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
1549    break;
1550  }
1551  case OMPRTL__tgt_target_teams: {
1552    // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
1553    // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
1554    // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
1555    llvm::Type *TypeParams[] = {CGM.Int32Ty,
1556                                CGM.VoidPtrTy,
1557                                CGM.Int32Ty,
1558                                CGM.VoidPtrPtrTy,
1559                                CGM.VoidPtrPtrTy,
1560                                CGM.SizeTy->getPointerTo(),
1561                                CGM.Int32Ty->getPointerTo(),
1562                                CGM.Int32Ty,
1563                                CGM.Int32Ty};
1564    llvm::FunctionType *FnTy =
1565        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1566    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
1567    break;
1568  }
1569  case OMPRTL__tgt_register_lib: {
1570    // Build void __tgt_register_lib(__tgt_bin_desc *desc);
1571    QualType ParamTy =
1572        CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1573    llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1574    llvm::FunctionType *FnTy =
1575        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1576    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
1577    break;
1578  }
1579  case OMPRTL__tgt_unregister_lib: {
1580    // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
1581    QualType ParamTy =
1582        CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
1583    llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
1584    llvm::FunctionType *FnTy =
1585        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1586    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
1587    break;
1588  }
1589  case OMPRTL__tgt_target_data_begin: {
1590    // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
1591    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1592    llvm::Type *TypeParams[] = {CGM.Int32Ty,
1593                                CGM.Int32Ty,
1594                                CGM.VoidPtrPtrTy,
1595                                CGM.VoidPtrPtrTy,
1596                                CGM.SizeTy->getPointerTo(),
1597                                CGM.Int32Ty->getPointerTo()};
1598    llvm::FunctionType *FnTy =
1599        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1600    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
1601    break;
1602  }
1603  case OMPRTL__tgt_target_data_end: {
1604    // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
1605    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1606    llvm::Type *TypeParams[] = {CGM.Int32Ty,
1607                                CGM.Int32Ty,
1608                                CGM.VoidPtrPtrTy,
1609                                CGM.VoidPtrPtrTy,
1610                                CGM.SizeTy->getPointerTo(),
1611                                CGM.Int32Ty->getPointerTo()};
1612    llvm::FunctionType *FnTy =
1613        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1614    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
1615    break;
1616  }
1617  case OMPRTL__tgt_target_data_update: {
1618    // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
1619    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
1620    llvm::Type *TypeParams[] = {CGM.Int32Ty,
1621                                CGM.Int32Ty,
1622                                CGM.VoidPtrPtrTy,
1623                                CGM.VoidPtrPtrTy,
1624                                CGM.SizeTy->getPointerTo(),
1625                                CGM.Int32Ty->getPointerTo()};
1626    llvm::FunctionType *FnTy =
1627        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1628    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
1629    break;
1630  }
1631  }
1632  assert(RTLFn && "Unable to find OpenMP runtime function");
1633  return RTLFn;
1634}
1635
1636llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
1637                                                             bool IVSigned) {
1638  assert((IVSize == 32 || IVSize == 64) &&
1639         "IV size is not compatible with the omp runtime");
1640  auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1641                                       : "__kmpc_for_static_init_4u")
1642                           : (IVSigned ? "__kmpc_for_static_init_8"
1643                                       : "__kmpc_for_static_init_8u");
1644  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1645  auto PtrTy = llvm::PointerType::getUnqual(ITy);
1646  llvm::Type *TypeParams[] = {
1647    getIdentTyPointerTy(),                     // loc
1648    CGM.Int32Ty,                               // tid
1649    CGM.Int32Ty,                               // schedtype
1650    llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1651    PtrTy,                                     // p_lower
1652    PtrTy,                                     // p_upper
1653    PtrTy,                                     // p_stride
1654    ITy,                                       // incr
1655    ITy                                        // chunk
1656  };
1657  llvm::FunctionType *FnTy =
1658      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1659  return CGM.CreateRuntimeFunction(FnTy, Name);
1660}
1661
1662llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
1663                                                            bool IVSigned) {
1664  assert((IVSize == 32 || IVSize == 64) &&
1665         "IV size is not compatible with the omp runtime");
1666  auto Name =
1667      IVSize == 32
1668          ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1669          : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1670  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1671  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1672                               CGM.Int32Ty,           // tid
1673                               CGM.Int32Ty,           // schedtype
1674                               ITy,                   // lower
1675                               ITy,                   // upper
1676                               ITy,                   // stride
1677                               ITy                    // chunk
1678  };
1679  llvm::FunctionType *FnTy =
1680      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1681  return CGM.CreateRuntimeFunction(FnTy, Name);
1682}
1683
1684llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
1685                                                            bool IVSigned) {
1686  assert((IVSize == 32 || IVSize == 64) &&
1687         "IV size is not compatible with the omp runtime");
1688  auto Name =
1689      IVSize == 32
1690          ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1691          : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1692  llvm::Type *TypeParams[] = {
1693      getIdentTyPointerTy(), // loc
1694      CGM.Int32Ty,           // tid
1695  };
1696  llvm::FunctionType *FnTy =
1697      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1698  return CGM.CreateRuntimeFunction(FnTy, Name);
1699}
1700
1701llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1702                                                            bool IVSigned) {
1703  assert((IVSize == 32 || IVSize == 64) &&
1704         "IV size is not compatible with the omp runtime");
1705  auto Name =
1706      IVSize == 32
1707          ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1708          : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1709  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1710  auto PtrTy = llvm::PointerType::getUnqual(ITy);
1711  llvm::Type *TypeParams[] = {
1712    getIdentTyPointerTy(),                     // loc
1713    CGM.Int32Ty,                               // tid
1714    llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1715    PtrTy,                                     // p_lower
1716    PtrTy,                                     // p_upper
1717    PtrTy                                      // p_stride
1718  };
1719  llvm::FunctionType *FnTy =
1720      llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1721  return CGM.CreateRuntimeFunction(FnTy, Name);
1722}
1723
1724llvm::Constant *
1725CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1726  assert(!CGM.getLangOpts().OpenMPUseTLS ||
1727         !CGM.getContext().getTargetInfo().isTLSSupported());
1728  // Lookup the entry, lazily creating it if necessary.
1729  return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1730                                     Twine(CGM.getMangledName(VD)) + ".cache.");
1731}
1732
1733Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1734                                                const VarDecl *VD,
1735                                                Address VDAddr,
1736                                                SourceLocation Loc) {
1737  if (CGM.getLangOpts().OpenMPUseTLS &&
1738      CGM.getContext().getTargetInfo().isTLSSupported())
1739    return VDAddr;
1740
1741  auto VarTy = VDAddr.getElementType();
1742  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1743                         CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1744                                                       CGM.Int8PtrTy),
1745                         CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1746                         getOrCreateThreadPrivateCache(VD)};
1747  return Address(CGF.EmitRuntimeCall(
1748      createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1749                 VDAddr.getAlignment());
1750}
1751
1752void CGOpenMPRuntime::emitThreadPrivateVarInit(
1753    CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1754    llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1755  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1756  // library.
1757  auto OMPLoc = emitUpdateLocation(CGF, Loc);
1758  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1759                      OMPLoc);
1760  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1761  // to register constructor/destructor for variable.
1762  llvm::Value *Args[] = {OMPLoc,
1763                         CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1764                                                       CGM.VoidPtrTy),
1765                         Ctor, CopyCtor, Dtor};
1766  CGF.EmitRuntimeCall(
1767      createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1768}
1769
1770llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1771    const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1772    bool PerformInit, CodeGenFunction *CGF) {
1773  if (CGM.getLangOpts().OpenMPUseTLS &&
1774      CGM.getContext().getTargetInfo().isTLSSupported())
1775    return nullptr;
1776
1777  VD = VD->getDefinition(CGM.getContext());
1778  if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1779    ThreadPrivateWithDefinition.insert(VD);
1780    QualType ASTTy = VD->getType();
1781
1782    llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1783    auto Init = VD->getAnyInitializer();
1784    if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1785      // Generate function that re-emits the declaration's initializer into the
1786      // threadprivate copy of the variable VD
1787      CodeGenFunction CtorCGF(CGM);
1788      FunctionArgList Args;
1789      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1790                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1791      Args.push_back(&Dst);
1792
1793      auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1794          CGM.getContext().VoidPtrTy, Args);
1795      auto FTy = CGM.getTypes().GetFunctionType(FI);
1796      auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1797          FTy, ".__kmpc_global_ctor_.", FI, Loc);
1798      CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1799                            Args, SourceLocation());
1800      auto ArgVal = CtorCGF.EmitLoadOfScalar(
1801          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1802          CGM.getContext().VoidPtrTy, Dst.getLocation());
1803      Address Arg = Address(ArgVal, VDAddr.getAlignment());
1804      Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1805                                             CtorCGF.ConvertTypeForMem(ASTTy));
1806      CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1807                               /*IsInitializer=*/true);
1808      ArgVal = CtorCGF.EmitLoadOfScalar(
1809          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1810          CGM.getContext().VoidPtrTy, Dst.getLocation());
1811      CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1812      CtorCGF.FinishFunction();
1813      Ctor = Fn;
1814    }
1815    if (VD->getType().isDestructedType() != QualType::DK_none) {
1816      // Generate function that emits destructor call for the threadprivate copy
1817      // of the variable VD
1818      CodeGenFunction DtorCGF(CGM);
1819      FunctionArgList Args;
1820      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1821                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1822      Args.push_back(&Dst);
1823
1824      auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1825          CGM.getContext().VoidTy, Args);
1826      auto FTy = CGM.getTypes().GetFunctionType(FI);
1827      auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1828          FTy, ".__kmpc_global_dtor_.", FI, Loc);
1829      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1830      DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1831                            SourceLocation());
1832      // Create a scope with an artificial location for the body of this function.
1833      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1834      auto ArgVal = DtorCGF.EmitLoadOfScalar(
1835          DtorCGF.GetAddrOfLocalVar(&Dst),
1836          /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1837      DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1838                          DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1839                          DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1840      DtorCGF.FinishFunction();
1841      Dtor = Fn;
1842    }
1843    // Do not emit init function if it is not required.
1844    if (!Ctor && !Dtor)
1845      return nullptr;
1846
1847    llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1848    auto CopyCtorTy =
1849        llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1850                                /*isVarArg=*/false)->getPointerTo();
1851    // Copying constructor for the threadprivate variable.
1852    // Must be NULL - reserved by runtime, but currently it requires that this
1853    // parameter is always NULL. Otherwise it fires assertion.
1854    CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1855    if (Ctor == nullptr) {
1856      auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1857                                            /*isVarArg=*/false)->getPointerTo();
1858      Ctor = llvm::Constant::getNullValue(CtorTy);
1859    }
1860    if (Dtor == nullptr) {
1861      auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1862                                            /*isVarArg=*/false)->getPointerTo();
1863      Dtor = llvm::Constant::getNullValue(DtorTy);
1864    }
1865    if (!CGF) {
1866      auto InitFunctionTy =
1867          llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1868      auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1869          InitFunctionTy, ".__omp_threadprivate_init_.",
1870          CGM.getTypes().arrangeNullaryFunction());
1871      CodeGenFunction InitCGF(CGM);
1872      FunctionArgList ArgList;
1873      InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1874                            CGM.getTypes().arrangeNullaryFunction(), ArgList,
1875                            Loc);
1876      emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1877      InitCGF.FinishFunction();
1878      return InitFunction;
1879    }
1880    emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1881  }
1882  return nullptr;
1883}
1884
1885/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1886/// function. Here is the logic:
1887/// if (Cond) {
1888///   ThenGen();
1889/// } else {
1890///   ElseGen();
1891/// }
1892static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1893                            const RegionCodeGenTy &ThenGen,
1894                            const RegionCodeGenTy &ElseGen) {
1895  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1896
1897  // If the condition constant folds and can be elided, try to avoid emitting
1898  // the condition and the dead arm of the if/else.
1899  bool CondConstant;
1900  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1901    if (CondConstant)
1902      ThenGen(CGF);
1903    else
1904      ElseGen(CGF);
1905    return;
1906  }
1907
1908  // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1909  // emit the conditional branch.
1910  auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1911  auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1912  auto ContBlock = CGF.createBasicBlock("omp_if.end");
1913  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1914
1915  // Emit the 'then' code.
1916  CGF.EmitBlock(ThenBlock);
1917  ThenGen(CGF);
1918  CGF.EmitBranch(ContBlock);
1919  // Emit the 'else' code if present.
1920  // There is no need to emit line number for unconditional branch.
1921  (void)ApplyDebugLocation::CreateEmpty(CGF);
1922  CGF.EmitBlock(ElseBlock);
1923  ElseGen(CGF);
1924  // There is no need to emit line number for unconditional branch.
1925  (void)ApplyDebugLocation::CreateEmpty(CGF);
1926  CGF.EmitBranch(ContBlock);
1927  // Emit the continuation block for code after the if.
1928  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1929}
1930
1931void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1932                                       llvm::Value *OutlinedFn,
1933                                       ArrayRef<llvm::Value *> CapturedVars,
1934                                       const Expr *IfCond) {
1935  if (!CGF.HaveInsertPoint())
1936    return;
1937  auto *RTLoc = emitUpdateLocation(CGF, Loc);
1938  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
1939                                                     PrePostActionTy &) {
1940    // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1941    auto &RT = CGF.CGM.getOpenMPRuntime();
1942    llvm::Value *Args[] = {
1943        RTLoc,
1944        CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1945        CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1946    llvm::SmallVector<llvm::Value *, 16> RealArgs;
1947    RealArgs.append(std::begin(Args), std::end(Args));
1948    RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1949
1950    auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
1951    CGF.EmitRuntimeCall(RTLFn, RealArgs);
1952  };
1953  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
1954                                                          PrePostActionTy &) {
1955    auto &RT = CGF.CGM.getOpenMPRuntime();
1956    auto ThreadID = RT.getThreadID(CGF, Loc);
1957    // Build calls:
1958    // __kmpc_serialized_parallel(&Loc, GTid);
1959    llvm::Value *Args[] = {RTLoc, ThreadID};
1960    CGF.EmitRuntimeCall(
1961        RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
1962
1963    // OutlinedFn(&GTid, &zero, CapturedStruct);
1964    auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1965    Address ZeroAddr =
1966        CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1967                             /*Name*/ ".zero.addr");
1968    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1969    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1970    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1971    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1972    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1973    CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1974
1975    // __kmpc_end_serialized_parallel(&Loc, GTid);
1976    llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1977    CGF.EmitRuntimeCall(
1978        RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
1979        EndArgs);
1980  };
1981  if (IfCond)
1982    emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1983  else {
1984    RegionCodeGenTy ThenRCG(ThenGen);
1985    ThenRCG(CGF);
1986  }
1987}
1988
1989// If we're inside an (outlined) parallel region, use the region info's
1990// thread-ID variable (it is passed in a first argument of the outlined function
1991// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1992// regular serial code region, get thread ID by calling kmp_int32
1993// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1994// return the address of that temp.
1995Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1996                                             SourceLocation Loc) {
1997  if (auto *OMPRegionInfo =
1998          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1999    if (OMPRegionInfo->getThreadIDVariable())
2000      return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2001
2002  auto ThreadID = getThreadID(CGF, Loc);
2003  auto Int32Ty =
2004      CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2005  auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2006  CGF.EmitStoreOfScalar(ThreadID,
2007                        CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2008
2009  return ThreadIDTemp;
2010}
2011
2012llvm::Constant *
2013CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
2014                                             const llvm::Twine &Name) {
2015  SmallString<256> Buffer;
2016  llvm::raw_svector_ostream Out(Buffer);
2017  Out << Name;
2018  auto RuntimeName = Out.str();
2019  auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
2020  if (Elem.second) {
2021    assert(Elem.second->getType()->getPointerElementType() == Ty &&
2022           "OMP internal variable has different type than requested");
2023    return &*Elem.second;
2024  }
2025
2026  return Elem.second = new llvm::GlobalVariable(
2027             CGM.getModule(), Ty, /*IsConstant*/ false,
2028             llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2029             Elem.first());
2030}
2031
2032llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2033  llvm::Twine Name(".gomp_critical_user_", CriticalName);
2034  return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
2035}
2036
2037namespace {
2038/// Common pre(post)-action for different OpenMP constructs.
2039class CommonActionTy final : public PrePostActionTy {
2040  llvm::Value *EnterCallee;
2041  ArrayRef<llvm::Value *> EnterArgs;
2042  llvm::Value *ExitCallee;
2043  ArrayRef<llvm::Value *> ExitArgs;
2044  bool Conditional;
2045  llvm::BasicBlock *ContBlock = nullptr;
2046
2047public:
2048  CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2049                 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2050                 bool Conditional = false)
2051      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2052        ExitArgs(ExitArgs), Conditional(Conditional) {}
2053  void Enter(CodeGenFunction &CGF) override {
2054    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2055    if (Conditional) {
2056      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2057      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2058      ContBlock = CGF.createBasicBlock("omp_if.end");
2059      // Generate the branch (If-stmt)
2060      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2061      CGF.EmitBlock(ThenBlock);
2062    }
2063  }
2064  void Done(CodeGenFunction &CGF) {
2065    // Emit the rest of blocks/branches
2066    CGF.EmitBranch(ContBlock);
2067    CGF.EmitBlock(ContBlock, true);
2068  }
2069  void Exit(CodeGenFunction &CGF) override {
2070    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2071  }
2072};
2073} // anonymous namespace
2074
2075void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2076                                         StringRef CriticalName,
2077                                         const RegionCodeGenTy &CriticalOpGen,
2078                                         SourceLocation Loc, const Expr *Hint) {
2079  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2080  // CriticalOpGen();
2081  // __kmpc_end_critical(ident_t *, gtid, Lock);
2082  // Prepare arguments and build a call to __kmpc_critical
2083  if (!CGF.HaveInsertPoint())
2084    return;
2085  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2086                         getCriticalRegionLock(CriticalName)};
2087  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2088                                                std::end(Args));
2089  if (Hint) {
2090    EnterArgs.push_back(CGF.Builder.CreateIntCast(
2091        CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2092  }
2093  CommonActionTy Action(
2094      createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
2095                                 : OMPRTL__kmpc_critical),
2096      EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
2097  CriticalOpGen.setAction(Action);
2098  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2099}
2100
2101void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2102                                       const RegionCodeGenTy &MasterOpGen,
2103                                       SourceLocation Loc) {
2104  if (!CGF.HaveInsertPoint())
2105    return;
2106  // if(__kmpc_master(ident_t *, gtid)) {
2107  //   MasterOpGen();
2108  //   __kmpc_end_master(ident_t *, gtid);
2109  // }
2110  // Prepare arguments and build a call to __kmpc_master
2111  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2112  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2113                        createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
2114                        /*Conditional=*/true);
2115  MasterOpGen.setAction(Action);
2116  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2117  Action.Done(CGF);
2118}
2119
2120void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2121                                        SourceLocation Loc) {
2122  if (!CGF.HaveInsertPoint())
2123    return;
2124  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2125  llvm::Value *Args[] = {
2126      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2127      llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2128  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
2129  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2130    Region->emitUntiedSwitch(CGF);
2131}
2132
2133void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2134                                          const RegionCodeGenTy &TaskgroupOpGen,
2135                                          SourceLocation Loc) {
2136  if (!CGF.HaveInsertPoint())
2137    return;
2138  // __kmpc_taskgroup(ident_t *, gtid);
2139  // TaskgroupOpGen();
2140  // __kmpc_end_taskgroup(ident_t *, gtid);
2141  // Prepare arguments and build a call to __kmpc_taskgroup
2142  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2143  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2144                        createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
2145                        Args);
2146  TaskgroupOpGen.setAction(Action);
2147  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2148}
2149
2150/// Given an array of pointers to variables, project the address of a
2151/// given variable.
2152static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2153                                      unsigned Index, const VarDecl *Var) {
2154  // Pull out the pointer to the variable.
2155  Address PtrAddr =
2156      CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
2157  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2158
2159  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2160  Addr = CGF.Builder.CreateElementBitCast(
2161      Addr, CGF.ConvertTypeForMem(Var->getType()));
2162  return Addr;
2163}
2164
2165static llvm::Value *emitCopyprivateCopyFunction(
2166    CodeGenModule &CGM, llvm::Type *ArgsType,
2167    ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2168    ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
2169  auto &C = CGM.getContext();
2170  // void copy_func(void *LHSArg, void *RHSArg);
2171  FunctionArgList Args;
2172  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2173                           C.VoidPtrTy);
2174  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2175                           C.VoidPtrTy);
2176  Args.push_back(&LHSArg);
2177  Args.push_back(&RHSArg);
2178  auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2179  auto *Fn = llvm::Function::Create(
2180      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2181      ".omp.copyprivate.copy_func", &CGM.getModule());
2182  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2183  CodeGenFunction CGF(CGM);
2184  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2185  // Dest = (void*[n])(LHSArg);
2186  // Src = (void*[n])(RHSArg);
2187  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2188      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2189      ArgsType), CGF.getPointerAlign());
2190  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2191      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2192      ArgsType), CGF.getPointerAlign());
2193  // *(Type0*)Dst[0] = *(Type0*)Src[0];
2194  // *(Type1*)Dst[1] = *(Type1*)Src[1];
2195  // ...
2196  // *(Typen*)Dst[n] = *(Typen*)Src[n];
2197  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2198    auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2199    Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2200
2201    auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2202    Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2203
2204    auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2205    QualType Type = VD->getType();
2206    CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2207  }
2208  CGF.FinishFunction();
2209  return Fn;
2210}
2211
2212void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2213                                       const RegionCodeGenTy &SingleOpGen,
2214                                       SourceLocation Loc,
2215                                       ArrayRef<const Expr *> CopyprivateVars,
2216                                       ArrayRef<const Expr *> SrcExprs,
2217                                       ArrayRef<const Expr *> DstExprs,
2218                                       ArrayRef<const Expr *> AssignmentOps) {
2219  if (!CGF.HaveInsertPoint())
2220    return;
2221  assert(CopyprivateVars.size() == SrcExprs.size() &&
2222         CopyprivateVars.size() == DstExprs.size() &&
2223         CopyprivateVars.size() == AssignmentOps.size());
2224  auto &C = CGM.getContext();
2225  // int32 did_it = 0;
2226  // if(__kmpc_single(ident_t *, gtid)) {
2227  //   SingleOpGen();
2228  //   __kmpc_end_single(ident_t *, gtid);
2229  //   did_it = 1;
2230  // }
2231  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2232  // <copy_func>, did_it);
2233
2234  Address DidIt = Address::invalid();
2235  if (!CopyprivateVars.empty()) {
2236    // int32 did_it = 0;
2237    auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2238    DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2239    CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2240  }
2241  // Prepare arguments and build a call to __kmpc_single
2242  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2243  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
2244                        createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
2245                        /*Conditional=*/true);
2246  SingleOpGen.setAction(Action);
2247  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2248  if (DidIt.isValid()) {
2249    // did_it = 1;
2250    CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2251  }
2252  Action.Done(CGF);
2253  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2254  // <copy_func>, did_it);
2255  if (DidIt.isValid()) {
2256    llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2257    auto CopyprivateArrayTy =
2258        C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2259                               /*IndexTypeQuals=*/0);
2260    // Create a list of all private variables for copyprivate.
2261    Address CopyprivateList =
2262        CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2263    for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2264      Address Elem = CGF.Builder.CreateConstArrayGEP(
2265          CopyprivateList, I, CGF.getPointerSize());
2266      CGF.Builder.CreateStore(
2267          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2268              CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
2269          Elem);
2270    }
2271    // Build function that copies private values from single region to all other
2272    // threads in the corresponding parallel region.
2273    auto *CpyFn = emitCopyprivateCopyFunction(
2274        CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2275        CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
2276    auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2277    Address CL =
2278      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2279                                                      CGF.VoidPtrTy);
2280    auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
2281    llvm::Value *Args[] = {
2282        emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2283        getThreadID(CGF, Loc),        // i32 <gtid>
2284        BufSize,                      // size_t <buf_size>
2285        CL.getPointer(),              // void *<copyprivate list>
2286        CpyFn,                        // void (*) (void *, void *) <copy_func>
2287        DidItVal                      // i32 did_it
2288    };
2289    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
2290  }
2291}
2292
2293void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2294                                        const RegionCodeGenTy &OrderedOpGen,
2295                                        SourceLocation Loc, bool IsThreads) {
2296  if (!CGF.HaveInsertPoint())
2297    return;
2298  // __kmpc_ordered(ident_t *, gtid);
2299  // OrderedOpGen();
2300  // __kmpc_end_ordered(ident_t *, gtid);
2301  // Prepare arguments and build a call to __kmpc_ordered
2302  if (IsThreads) {
2303    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2304    CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
2305                          createRuntimeFunction(OMPRTL__kmpc_end_ordered),
2306                          Args);
2307    OrderedOpGen.setAction(Action);
2308    emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2309    return;
2310  }
2311  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2312}
2313
2314void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2315                                      OpenMPDirectiveKind Kind, bool EmitChecks,
2316                                      bool ForceSimpleCall) {
2317  if (!CGF.HaveInsertPoint())
2318    return;
2319  // Build call __kmpc_cancel_barrier(loc, thread_id);
2320  // Build call __kmpc_barrier(loc, thread_id);
2321  unsigned Flags;
2322  if (Kind == OMPD_for)
2323    Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2324  else if (Kind == OMPD_sections)
2325    Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2326  else if (Kind == OMPD_single)
2327    Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2328  else if (Kind == OMPD_barrier)
2329    Flags = OMP_IDENT_BARRIER_EXPL;
2330  else
2331    Flags = OMP_IDENT_BARRIER_IMPL;
2332  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2333  // thread_id);
2334  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2335                         getThreadID(CGF, Loc)};
2336  if (auto *OMPRegionInfo =
2337          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2338    if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2339      auto *Result = CGF.EmitRuntimeCall(
2340          createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
2341      if (EmitChecks) {
2342        // if (__kmpc_cancel_barrier()) {
2343        //   exit from construct;
2344        // }
2345        auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2346        auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2347        auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2348        CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2349        CGF.EmitBlock(ExitBB);
2350        //   exit from construct;
2351        auto CancelDestination =
2352            CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2353        CGF.EmitBranchThroughCleanup(CancelDestination);
2354        CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2355      }
2356      return;
2357    }
2358  }
2359  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
2360}
2361
2362/// \brief Map the OpenMP loop schedule to the runtime enumeration.
2363static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2364                                          bool Chunked, bool Ordered) {
2365  switch (ScheduleKind) {
2366  case OMPC_SCHEDULE_static:
2367    return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2368                   : (Ordered ? OMP_ord_static : OMP_sch_static);
2369  case OMPC_SCHEDULE_dynamic:
2370    return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2371  case OMPC_SCHEDULE_guided:
2372    return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2373  case OMPC_SCHEDULE_runtime:
2374    return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2375  case OMPC_SCHEDULE_auto:
2376    return Ordered ? OMP_ord_auto : OMP_sch_auto;
2377  case OMPC_SCHEDULE_unknown:
2378    assert(!Chunked && "chunk was specified but schedule kind not known");
2379    return Ordered ? OMP_ord_static : OMP_sch_static;
2380  }
2381  llvm_unreachable("Unexpected runtime schedule");
2382}
2383
2384/// \brief Map the OpenMP distribute schedule to the runtime enumeration.
2385static OpenMPSchedType
2386getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2387  // only static is allowed for dist_schedule
2388  return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2389}
2390
2391bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2392                                         bool Chunked) const {
2393  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2394  return Schedule == OMP_sch_static;
2395}
2396
2397bool CGOpenMPRuntime::isStaticNonchunked(
2398    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2399  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2400  return Schedule == OMP_dist_sch_static;
2401}
2402
2403
2404bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2405  auto Schedule =
2406      getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2407  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2408  return Schedule != OMP_sch_static;
2409}
2410
2411static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
2412                                  OpenMPScheduleClauseModifier M1,
2413                                  OpenMPScheduleClauseModifier M2) {
2414  int Modifier = 0;
2415  switch (M1) {
2416  case OMPC_SCHEDULE_MODIFIER_monotonic:
2417    Modifier = OMP_sch_modifier_monotonic;
2418    break;
2419  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2420    Modifier = OMP_sch_modifier_nonmonotonic;
2421    break;
2422  case OMPC_SCHEDULE_MODIFIER_simd:
2423    if (Schedule == OMP_sch_static_chunked)
2424      Schedule = OMP_sch_static_balanced_chunked;
2425    break;
2426  case OMPC_SCHEDULE_MODIFIER_last:
2427  case OMPC_SCHEDULE_MODIFIER_unknown:
2428    break;
2429  }
2430  switch (M2) {
2431  case OMPC_SCHEDULE_MODIFIER_monotonic:
2432    Modifier = OMP_sch_modifier_monotonic;
2433    break;
2434  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2435    Modifier = OMP_sch_modifier_nonmonotonic;
2436    break;
2437  case OMPC_SCHEDULE_MODIFIER_simd:
2438    if (Schedule == OMP_sch_static_chunked)
2439      Schedule = OMP_sch_static_balanced_chunked;
2440    break;
2441  case OMPC_SCHEDULE_MODIFIER_last:
2442  case OMPC_SCHEDULE_MODIFIER_unknown:
2443    break;
2444  }
2445  return Schedule | Modifier;
2446}
2447
2448void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
2449                                          SourceLocation Loc,
2450                                          const OpenMPScheduleTy &ScheduleKind,
2451                                          unsigned IVSize, bool IVSigned,
2452                                          bool Ordered, llvm::Value *UB,
2453                                          llvm::Value *Chunk) {
2454  if (!CGF.HaveInsertPoint())
2455    return;
2456  OpenMPSchedType Schedule =
2457      getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
2458  assert(Ordered ||
2459         (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2460          Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2461          Schedule != OMP_sch_static_balanced_chunked));
2462  // Call __kmpc_dispatch_init(
2463  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2464  //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2465  //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2466
2467  // If the Chunk was not specified in the clause - use default value 1.
2468  if (Chunk == nullptr)
2469    Chunk = CGF.Builder.getIntN(IVSize, 1);
2470  llvm::Value *Args[] = {
2471      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2472      CGF.Builder.getInt32(addMonoNonMonoModifier(
2473          Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2474      CGF.Builder.getIntN(IVSize, 0),                   // Lower
2475      UB,                                               // Upper
2476      CGF.Builder.getIntN(IVSize, 1),                   // Stride
2477      Chunk                                             // Chunk
2478  };
2479  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2480}
2481
2482static void emitForStaticInitCall(
2483    CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2484    llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
2485    OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2486    unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB,
2487    Address ST, llvm::Value *Chunk) {
2488  if (!CGF.HaveInsertPoint())
2489     return;
2490
2491   assert(!Ordered);
2492   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2493          Schedule == OMP_sch_static_balanced_chunked ||
2494          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2495          Schedule == OMP_dist_sch_static ||
2496          Schedule == OMP_dist_sch_static_chunked);
2497
2498   // Call __kmpc_for_static_init(
2499   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2500   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2501   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2502   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2503   if (Chunk == nullptr) {
2504     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2505             Schedule == OMP_dist_sch_static) &&
2506            "expected static non-chunked schedule");
2507     // If the Chunk was not specified in the clause - use default value 1.
2508       Chunk = CGF.Builder.getIntN(IVSize, 1);
2509   } else {
2510     assert((Schedule == OMP_sch_static_chunked ||
2511             Schedule == OMP_sch_static_balanced_chunked ||
2512             Schedule == OMP_ord_static_chunked ||
2513             Schedule == OMP_dist_sch_static_chunked) &&
2514            "expected static chunked schedule");
2515   }
2516   llvm::Value *Args[] = {
2517       UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(
2518                                     Schedule, M1, M2)), // Schedule type
2519       IL.getPointer(),                                  // &isLastIter
2520       LB.getPointer(),                                  // &LB
2521       UB.getPointer(),                                  // &UB
2522       ST.getPointer(),                                  // &Stride
2523       CGF.Builder.getIntN(IVSize, 1),                   // Incr
2524       Chunk                                             // Chunk
2525   };
2526   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2527}
2528
2529void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2530                                        SourceLocation Loc,
2531                                        const OpenMPScheduleTy &ScheduleKind,
2532                                        unsigned IVSize, bool IVSigned,
2533                                        bool Ordered, Address IL, Address LB,
2534                                        Address UB, Address ST,
2535                                        llvm::Value *Chunk) {
2536  OpenMPSchedType ScheduleNum =
2537      getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
2538  auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2539  auto *ThreadId = getThreadID(CGF, Loc);
2540  auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2541  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2542                        ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize,
2543                        Ordered, IL, LB, UB, ST, Chunk);
2544}
2545
2546void CGOpenMPRuntime::emitDistributeStaticInit(
2547    CodeGenFunction &CGF, SourceLocation Loc,
2548    OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned,
2549    bool Ordered, Address IL, Address LB, Address UB, Address ST,
2550    llvm::Value *Chunk) {
2551  OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
2552  auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
2553  auto *ThreadId = getThreadID(CGF, Loc);
2554  auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
2555  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2556                        ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2557                        OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB,
2558                        UB, ST, Chunk);
2559}
2560
2561void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2562                                          SourceLocation Loc) {
2563  if (!CGF.HaveInsertPoint())
2564    return;
2565  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2566  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2567  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
2568                      Args);
2569}
2570
2571void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2572                                                 SourceLocation Loc,
2573                                                 unsigned IVSize,
2574                                                 bool IVSigned) {
2575  if (!CGF.HaveInsertPoint())
2576    return;
2577  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2578  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2579  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2580}
2581
2582llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2583                                          SourceLocation Loc, unsigned IVSize,
2584                                          bool IVSigned, Address IL,
2585                                          Address LB, Address UB,
2586                                          Address ST) {
2587  // Call __kmpc_dispatch_next(
2588  //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2589  //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2590  //          kmp_int[32|64] *p_stride);
2591  llvm::Value *Args[] = {
2592      emitUpdateLocation(CGF, Loc),
2593      getThreadID(CGF, Loc),
2594      IL.getPointer(), // &isLastIter
2595      LB.getPointer(), // &Lower
2596      UB.getPointer(), // &Upper
2597      ST.getPointer()  // &Stride
2598  };
2599  llvm::Value *Call =
2600      CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2601  return CGF.EmitScalarConversion(
2602      Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
2603      CGF.getContext().BoolTy, Loc);
2604}
2605
2606void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2607                                           llvm::Value *NumThreads,
2608                                           SourceLocation Loc) {
2609  if (!CGF.HaveInsertPoint())
2610    return;
2611  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2612  llvm::Value *Args[] = {
2613      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2614      CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2615  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
2616                      Args);
2617}
2618
2619void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2620                                         OpenMPProcBindClauseKind ProcBind,
2621                                         SourceLocation Loc) {
2622  if (!CGF.HaveInsertPoint())
2623    return;
2624  // Constants for proc bind value accepted by the runtime.
2625  enum ProcBindTy {
2626    ProcBindFalse = 0,
2627    ProcBindTrue,
2628    ProcBindMaster,
2629    ProcBindClose,
2630    ProcBindSpread,
2631    ProcBindIntel,
2632    ProcBindDefault
2633  } RuntimeProcBind;
2634  switch (ProcBind) {
2635  case OMPC_PROC_BIND_master:
2636    RuntimeProcBind = ProcBindMaster;
2637    break;
2638  case OMPC_PROC_BIND_close:
2639    RuntimeProcBind = ProcBindClose;
2640    break;
2641  case OMPC_PROC_BIND_spread:
2642    RuntimeProcBind = ProcBindSpread;
2643    break;
2644  case OMPC_PROC_BIND_unknown:
2645    llvm_unreachable("Unsupported proc_bind value.");
2646  }
2647  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2648  llvm::Value *Args[] = {
2649      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2650      llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
2651  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
2652}
2653
2654void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2655                                SourceLocation Loc) {
2656  if (!CGF.HaveInsertPoint())
2657    return;
2658  // Build call void __kmpc_flush(ident_t *loc)
2659  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
2660                      emitUpdateLocation(CGF, Loc));
2661}
2662
2663namespace {
2664/// \brief Indexes of fields for type kmp_task_t.
2665enum KmpTaskTFields {
2666  /// \brief List of shared variables.
2667  KmpTaskTShareds,
2668  /// \brief Task routine.
2669  KmpTaskTRoutine,
2670  /// \brief Partition id for the untied tasks.
2671  KmpTaskTPartId,
2672  /// Function with call of destructors for private variables.
2673  Data1,
2674  /// Task priority.
2675  Data2,
2676  /// (Taskloops only) Lower bound.
2677  KmpTaskTLowerBound,
2678  /// (Taskloops only) Upper bound.
2679  KmpTaskTUpperBound,
2680  /// (Taskloops only) Stride.
2681  KmpTaskTStride,
2682  /// (Taskloops only) Is last iteration flag.
2683  KmpTaskTLastIter,
2684};
2685} // anonymous namespace
2686
2687bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2688  // FIXME: Add other entries type when they become supported.
2689  return OffloadEntriesTargetRegion.empty();
2690}
2691
2692/// \brief Initialize target region entry.
2693void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2694    initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2695                                    StringRef ParentName, unsigned LineNum,
2696                                    unsigned Order) {
2697  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2698                                             "only required for the device "
2699                                             "code generation.");
2700  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2701      OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
2702  ++OffloadingEntriesNum;
2703}
2704
2705void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2706    registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2707                                  StringRef ParentName, unsigned LineNum,
2708                                  llvm::Constant *Addr, llvm::Constant *ID) {
2709  // If we are emitting code for a target, the entry is already initialized,
2710  // only has to be registered.
2711  if (CGM.getLangOpts().OpenMPIsDevice) {
2712    assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2713           "Entry must exist.");
2714    auto &Entry =
2715        OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2716    assert(Entry.isValid() && "Entry not initialized!");
2717    Entry.setAddress(Addr);
2718    Entry.setID(ID);
2719    return;
2720  } else {
2721    OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
2722    OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2723  }
2724}
2725
2726bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2727    unsigned DeviceID, unsigned FileID, StringRef ParentName,
2728    unsigned LineNum) const {
2729  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2730  if (PerDevice == OffloadEntriesTargetRegion.end())
2731    return false;
2732  auto PerFile = PerDevice->second.find(FileID);
2733  if (PerFile == PerDevice->second.end())
2734    return false;
2735  auto PerParentName = PerFile->second.find(ParentName);
2736  if (PerParentName == PerFile->second.end())
2737    return false;
2738  auto PerLine = PerParentName->second.find(LineNum);
2739  if (PerLine == PerParentName->second.end())
2740    return false;
2741  // Fail if this entry is already registered.
2742  if (PerLine->second.getAddress() || PerLine->second.getID())
2743    return false;
2744  return true;
2745}
2746
2747void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2748    const OffloadTargetRegionEntryInfoActTy &Action) {
2749  // Scan all target region entries and perform the provided action.
2750  for (auto &D : OffloadEntriesTargetRegion)
2751    for (auto &F : D.second)
2752      for (auto &P : F.second)
2753        for (auto &L : P.second)
2754          Action(D.first, F.first, P.first(), L.first, L.second);
2755}
2756
2757/// \brief Create a Ctor/Dtor-like function whose body is emitted through
2758/// \a Codegen. This is used to emit the two functions that register and
2759/// unregister the descriptor of the current compilation unit.
2760static llvm::Function *
2761createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
2762                                         const RegionCodeGenTy &Codegen) {
2763  auto &C = CGM.getContext();
2764  FunctionArgList Args;
2765  ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
2766                             /*Id=*/nullptr, C.VoidPtrTy);
2767  Args.push_back(&DummyPtr);
2768
2769  CodeGenFunction CGF(CGM);
2770  GlobalDecl();
2771  auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2772  auto FTy = CGM.getTypes().GetFunctionType(FI);
2773  auto *Fn =
2774      CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
2775  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
2776  Codegen(CGF);
2777  CGF.FinishFunction();
2778  return Fn;
2779}
2780
2781llvm::Function *
2782CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
2783
2784  // If we don't have entries or if we are emitting code for the device, we
2785  // don't need to do anything.
2786  if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
2787    return nullptr;
2788
2789  auto &M = CGM.getModule();
2790  auto &C = CGM.getContext();
2791
2792  // Get list of devices we care about
2793  auto &Devices = CGM.getLangOpts().OMPTargetTriples;
2794
2795  // We should be creating an offloading descriptor only if there are devices
2796  // specified.
2797  assert(!Devices.empty() && "No OpenMP offloading devices??");
2798
2799  // Create the external variables that will point to the begin and end of the
2800  // host entries section. These will be defined by the linker.
2801  auto *OffloadEntryTy =
2802      CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
2803  llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
2804      M, OffloadEntryTy, /*isConstant=*/true,
2805      llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2806      ".omp_offloading.entries_begin");
2807  llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
2808      M, OffloadEntryTy, /*isConstant=*/true,
2809      llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
2810      ".omp_offloading.entries_end");
2811
2812  // Create all device images
2813  llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires;
2814  auto *DeviceImageTy = cast<llvm::StructType>(
2815      CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
2816
2817  for (unsigned i = 0; i < Devices.size(); ++i) {
2818    StringRef T = Devices[i].getTriple();
2819    auto *ImgBegin = new llvm::GlobalVariable(
2820        M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2821        /*Initializer=*/nullptr,
2822        Twine(".omp_offloading.img_start.") + Twine(T));
2823    auto *ImgEnd = new llvm::GlobalVariable(
2824        M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2825        /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
2826
2827    llvm::Constant *Dev =
2828        llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd,
2829                                  HostEntriesBegin, HostEntriesEnd, nullptr);
2830    DeviceImagesEntires.push_back(Dev);
2831  }
2832
2833  // Create device images global array.
2834  llvm::ArrayType *DeviceImagesInitTy =
2835      llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size());
2836  llvm::Constant *DeviceImagesInit =
2837      llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires);
2838
2839  llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable(
2840      M, DeviceImagesInitTy, /*isConstant=*/true,
2841      llvm::GlobalValue::InternalLinkage, DeviceImagesInit,
2842      ".omp_offloading.device_images");
2843  DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2844
2845  // This is a Zero array to be used in the creation of the constant expressions
2846  llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
2847                             llvm::Constant::getNullValue(CGM.Int32Ty)};
2848
2849  // Create the target region descriptor.
2850  auto *BinaryDescriptorTy = cast<llvm::StructType>(
2851      CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
2852  llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get(
2853      BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
2854      llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages,
2855                                           Index),
2856      HostEntriesBegin, HostEntriesEnd, nullptr);
2857
2858  auto *Desc = new llvm::GlobalVariable(
2859      M, BinaryDescriptorTy, /*isConstant=*/true,
2860      llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit,
2861      ".omp_offloading.descriptor");
2862
2863  // Emit code to register or unregister the descriptor at execution
2864  // startup or closing, respectively.
2865
2866  // Create a variable to drive the registration and unregistration of the
2867  // descriptor, so we can reuse the logic that emits Ctors and Dtors.
2868  auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
2869  ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
2870                                IdentInfo, C.CharTy);
2871
2872  auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
2873      CGM, ".omp_offloading.descriptor_unreg",
2874      [&](CodeGenFunction &CGF, PrePostActionTy &) {
2875        CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
2876                             Desc);
2877      });
2878  auto *RegFn = createOffloadingBinaryDescriptorFunction(
2879      CGM, ".omp_offloading.descriptor_reg",
2880      [&](CodeGenFunction &CGF, PrePostActionTy &) {
2881        CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
2882                             Desc);
2883        CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
2884      });
2885  return RegFn;
2886}
2887
2888void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
2889                                         llvm::Constant *Addr, uint64_t Size) {
2890  StringRef Name = Addr->getName();
2891  auto *TgtOffloadEntryType = cast<llvm::StructType>(
2892      CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
2893  llvm::LLVMContext &C = CGM.getModule().getContext();
2894  llvm::Module &M = CGM.getModule();
2895
2896  // Make sure the address has the right type.
2897  llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
2898
2899  // Create constant string with the name.
2900  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
2901
2902  llvm::GlobalVariable *Str =
2903      new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
2904                               llvm::GlobalValue::InternalLinkage, StrPtrInit,
2905                               ".omp_offloading.entry_name");
2906  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2907  llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
2908
2909  // Create the entry struct.
2910  llvm::Constant *EntryInit = llvm::ConstantStruct::get(
2911      TgtOffloadEntryType, AddrPtr, StrPtr,
2912      llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr);
2913  llvm::GlobalVariable *Entry = new llvm::GlobalVariable(
2914      M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage,
2915      EntryInit, ".omp_offloading.entry");
2916
2917  // The entry has to be created in the section the linker expects it to be.
2918  Entry->setSection(".omp_offloading.entries");
2919  // We can't have any padding between symbols, so we need to have 1-byte
2920  // alignment.
2921  Entry->setAlignment(1);
2922}
2923
2924void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2925  // Emit the offloading entries and metadata so that the device codegen side
2926  // can
2927  // easily figure out what to emit. The produced metadata looks like this:
2928  //
2929  // !omp_offload.info = !{!1, ...}
2930  //
2931  // Right now we only generate metadata for function that contain target
2932  // regions.
2933
2934  // If we do not have entries, we dont need to do anything.
2935  if (OffloadEntriesInfoManager.empty())
2936    return;
2937
2938  llvm::Module &M = CGM.getModule();
2939  llvm::LLVMContext &C = M.getContext();
2940  SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
2941      OrderedEntries(OffloadEntriesInfoManager.size());
2942
2943  // Create the offloading info metadata node.
2944  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
2945
2946  // Auxiliar methods to create metadata values and strings.
2947  auto getMDInt = [&](unsigned v) {
2948    return llvm::ConstantAsMetadata::get(
2949        llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
2950  };
2951
2952  auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
2953
2954  // Create function that emits metadata for each target region entry;
2955  auto &&TargetRegionMetadataEmitter = [&](
2956      unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
2957      OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
2958    llvm::SmallVector<llvm::Metadata *, 32> Ops;
2959    // Generate metadata for target regions. Each entry of this metadata
2960    // contains:
2961    // - Entry 0 -> Kind of this type of metadata (0).
2962    // - Entry 1 -> Device ID of the file where the entry was identified.
2963    // - Entry 2 -> File ID of the file where the entry was identified.
2964    // - Entry 3 -> Mangled name of the function where the entry was identified.
2965    // - Entry 4 -> Line in the file where the entry was identified.
2966    // - Entry 5 -> Order the entry was created.
2967    // The first element of the metadata node is the kind.
2968    Ops.push_back(getMDInt(E.getKind()));
2969    Ops.push_back(getMDInt(DeviceID));
2970    Ops.push_back(getMDInt(FileID));
2971    Ops.push_back(getMDString(ParentName));
2972    Ops.push_back(getMDInt(Line));
2973    Ops.push_back(getMDInt(E.getOrder()));
2974
2975    // Save this entry in the right position of the ordered entries array.
2976    OrderedEntries[E.getOrder()] = &E;
2977
2978    // Add metadata to the named metadata node.
2979    MD->addOperand(llvm::MDNode::get(C, Ops));
2980  };
2981
2982  OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
2983      TargetRegionMetadataEmitter);
2984
2985  for (auto *E : OrderedEntries) {
2986    assert(E && "All ordered entries must exist!");
2987    if (auto *CE =
2988            dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
2989                E)) {
2990      assert(CE->getID() && CE->getAddress() &&
2991             "Entry ID and Addr are invalid!");
2992      createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
2993    } else
2994      llvm_unreachable("Unsupported entry kind.");
2995  }
2996}
2997
2998/// \brief Loads all the offload entries information from the host IR
2999/// metadata.
3000void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3001  // If we are in target mode, load the metadata from the host IR. This code has
3002  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3003
3004  if (!CGM.getLangOpts().OpenMPIsDevice)
3005    return;
3006
3007  if (CGM.getLangOpts().OMPHostIRFile.empty())
3008    return;
3009
3010  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3011  if (Buf.getError())
3012    return;
3013
3014  llvm::LLVMContext C;
3015  auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C);
3016
3017  if (ME.getError())
3018    return;
3019
3020  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3021  if (!MD)
3022    return;
3023
3024  for (auto I : MD->operands()) {
3025    llvm::MDNode *MN = cast<llvm::MDNode>(I);
3026
3027    auto getMDInt = [&](unsigned Idx) {
3028      llvm::ConstantAsMetadata *V =
3029          cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3030      return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3031    };
3032
3033    auto getMDString = [&](unsigned Idx) {
3034      llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
3035      return V->getString();
3036    };
3037
3038    switch (getMDInt(0)) {
3039    default:
3040      llvm_unreachable("Unexpected metadata!");
3041      break;
3042    case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3043        OFFLOAD_ENTRY_INFO_TARGET_REGION:
3044      OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3045          /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
3046          /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
3047          /*Order=*/getMDInt(5));
3048      break;
3049    }
3050  }
3051}
3052
3053void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3054  if (!KmpRoutineEntryPtrTy) {
3055    // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3056    auto &C = CGM.getContext();
3057    QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3058    FunctionProtoType::ExtProtoInfo EPI;
3059    KmpRoutineEntryPtrQTy = C.getPointerType(
3060        C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3061    KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3062  }
3063}
3064
3065static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
3066                                       QualType FieldTy) {
3067  auto *Field = FieldDecl::Create(
3068      C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
3069      C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
3070      /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
3071  Field->setAccess(AS_public);
3072  DC->addDecl(Field);
3073  return Field;
3074}
3075
3076QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3077
3078  // Make sure the type of the entry is already created. This is the type we
3079  // have to create:
3080  // struct __tgt_offload_entry{
3081  //   void      *addr;       // Pointer to the offload entry info.
3082  //                          // (function or global)
3083  //   char      *name;       // Name of the function or global.
3084  //   size_t     size;       // Size of the entry info (0 if it a function).
3085  // };
3086  if (TgtOffloadEntryQTy.isNull()) {
3087    ASTContext &C = CGM.getContext();
3088    auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
3089    RD->startDefinition();
3090    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3091    addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3092    addFieldToRecordDecl(C, RD, C.getSizeType());
3093    RD->completeDefinition();
3094    TgtOffloadEntryQTy = C.getRecordType(RD);
3095  }
3096  return TgtOffloadEntryQTy;
3097}
3098
3099QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
3100  // These are the types we need to build:
3101  // struct __tgt_device_image{
3102  // void   *ImageStart;       // Pointer to the target code start.
3103  // void   *ImageEnd;         // Pointer to the target code end.
3104  // // We also add the host entries to the device image, as it may be useful
3105  // // for the target runtime to have access to that information.
3106  // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
3107  //                                       // the entries.
3108  // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3109  //                                       // entries (non inclusive).
3110  // };
3111  if (TgtDeviceImageQTy.isNull()) {
3112    ASTContext &C = CGM.getContext();
3113    auto *RD = C.buildImplicitRecord("__tgt_device_image");
3114    RD->startDefinition();
3115    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3116    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3117    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3118    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3119    RD->completeDefinition();
3120    TgtDeviceImageQTy = C.getRecordType(RD);
3121  }
3122  return TgtDeviceImageQTy;
3123}
3124
3125QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
3126  // struct __tgt_bin_desc{
3127  //   int32_t              NumDevices;      // Number of devices supported.
3128  //   __tgt_device_image   *DeviceImages;   // Arrays of device images
3129  //                                         // (one per device).
3130  //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
3131  //                                         // entries.
3132  //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
3133  //                                         // entries (non inclusive).
3134  // };
3135  if (TgtBinaryDescriptorQTy.isNull()) {
3136    ASTContext &C = CGM.getContext();
3137    auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
3138    RD->startDefinition();
3139    addFieldToRecordDecl(
3140        C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3141    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
3142    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3143    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
3144    RD->completeDefinition();
3145    TgtBinaryDescriptorQTy = C.getRecordType(RD);
3146  }
3147  return TgtBinaryDescriptorQTy;
3148}
3149
3150namespace {
3151struct PrivateHelpersTy {
3152  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
3153                   const VarDecl *PrivateElemInit)
3154      : Original(Original), PrivateCopy(PrivateCopy),
3155        PrivateElemInit(PrivateElemInit) {}
3156  const VarDecl *Original;
3157  const VarDecl *PrivateCopy;
3158  const VarDecl *PrivateElemInit;
3159};
3160typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3161} // anonymous namespace
3162
3163static RecordDecl *
3164createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3165  if (!Privates.empty()) {
3166    auto &C = CGM.getContext();
3167    // Build struct .kmp_privates_t. {
3168    //         /*  private vars  */
3169    //       };
3170    auto *RD = C.buildImplicitRecord(".kmp_privates.t");
3171    RD->startDefinition();
3172    for (auto &&Pair : Privates) {
3173      auto *VD = Pair.second.Original;
3174      auto Type = VD->getType();
3175      Type = Type.getNonReferenceType();
3176      auto *FD = addFieldToRecordDecl(C, RD, Type);
3177      if (VD->hasAttrs()) {
3178        for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3179             E(VD->getAttrs().end());
3180             I != E; ++I)
3181          FD->addAttr(*I);
3182      }
3183    }
3184    RD->completeDefinition();
3185    return RD;
3186  }
3187  return nullptr;
3188}
3189
3190static RecordDecl *
3191createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3192                         QualType KmpInt32Ty,
3193                         QualType KmpRoutineEntryPointerQTy) {
3194  auto &C = CGM.getContext();
3195  // Build struct kmp_task_t {
3196  //         void *              shareds;
3197  //         kmp_routine_entry_t routine;
3198  //         kmp_int32           part_id;
3199  //         kmp_cmplrdata_t data1;
3200  //         kmp_cmplrdata_t data2;
3201  // For taskloops additional fields:
3202  //         kmp_uint64          lb;
3203  //         kmp_uint64          ub;
3204  //         kmp_int64           st;
3205  //         kmp_int32           liter;
3206  //       };
3207  auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3208  UD->startDefinition();
3209  addFieldToRecordDecl(C, UD, KmpInt32Ty);
3210  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3211  UD->completeDefinition();
3212  QualType KmpCmplrdataTy = C.getRecordType(UD);
3213  auto *RD = C.buildImplicitRecord("kmp_task_t");
3214  RD->startDefinition();
3215  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3216  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3217  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3218  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3219  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3220  if (isOpenMPTaskLoopDirective(Kind)) {
3221    QualType KmpUInt64Ty =
3222        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3223    QualType KmpInt64Ty =
3224        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3225    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3226    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3227    addFieldToRecordDecl(C, RD, KmpInt64Ty);
3228    addFieldToRecordDecl(C, RD, KmpInt32Ty);
3229  }
3230  RD->completeDefinition();
3231  return RD;
3232}
3233
3234static RecordDecl *
3235createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3236                                     ArrayRef<PrivateDataTy> Privates) {
3237  auto &C = CGM.getContext();
3238  // Build struct kmp_task_t_with_privates {
3239  //         kmp_task_t task_data;
3240  //         .kmp_privates_t. privates;
3241  //       };
3242  auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3243  RD->startDefinition();
3244  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3245  if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
3246    addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3247  }
3248  RD->completeDefinition();
3249  return RD;
3250}
3251
3252/// \brief Emit a proxy function which accepts kmp_task_t as the second
3253/// argument.
3254/// \code
3255/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3256///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3257///   For taskloops:
3258///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3259///   tt->shareds);
3260///   return 0;
3261/// }
3262/// \endcode
3263static llvm::Value *
3264emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3265                      OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3266                      QualType KmpTaskTWithPrivatesPtrQTy,
3267                      QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3268                      QualType SharedsPtrTy, llvm::Value *TaskFunction,
3269                      llvm::Value *TaskPrivatesMap) {
3270  auto &C = CGM.getContext();
3271  FunctionArgList Args;
3272  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3273  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3274                                /*Id=*/nullptr,
3275                                KmpTaskTWithPrivatesPtrQTy.withRestrict());
3276  Args.push_back(&GtidArg);
3277  Args.push_back(&TaskTypeArg);
3278  auto &TaskEntryFnInfo =
3279      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3280  auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3281  auto *TaskEntry =
3282      llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
3283                             ".omp_task_entry.", &CGM.getModule());
3284  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
3285  CodeGenFunction CGF(CGM);
3286  CGF.disableDebugInfo();
3287  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
3288
3289  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3290  // tt,
3291  // For taskloops:
3292  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3293  // tt->task_data.shareds);
3294  auto *GtidParam = CGF.EmitLoadOfScalar(
3295      CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3296  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3297      CGF.GetAddrOfLocalVar(&TaskTypeArg),
3298      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3299  auto *KmpTaskTWithPrivatesQTyRD =
3300      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3301  LValue Base =
3302      CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3303  auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3304  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3305  auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3306  auto *PartidParam = PartIdLVal.getPointer();
3307
3308  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3309  auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3310  auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3311      CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
3312      CGF.ConvertTypeForMem(SharedsPtrTy));
3313
3314  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3315  llvm::Value *PrivatesParam;
3316  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3317    auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3318    PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3319        PrivatesLVal.getPointer(), CGF.VoidPtrTy);
3320  } else
3321    PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3322
3323  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3324                               TaskPrivatesMap,
3325                               CGF.Builder
3326                                   .CreatePointerBitCastOrAddrSpaceCast(
3327                                       TDBase.getAddress(), CGF.VoidPtrTy)
3328                                   .getPointer()};
3329  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3330                                          std::end(CommonArgs));
3331  if (isOpenMPTaskLoopDirective(Kind)) {
3332    auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3333    auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3334    auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
3335    auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3336    auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3337    auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
3338    auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3339    auto StLVal = CGF.EmitLValueForField(Base, *StFI);
3340    auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
3341    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3342    auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
3343    auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
3344    CallArgs.push_back(LBParam);
3345    CallArgs.push_back(UBParam);
3346    CallArgs.push_back(StParam);
3347    CallArgs.push_back(LIParam);
3348  }
3349  CallArgs.push_back(SharedsParam);
3350
3351  CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
3352  CGF.EmitStoreThroughLValue(
3353      RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3354      CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3355  CGF.FinishFunction();
3356  return TaskEntry;
3357}
3358
3359static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3360                                            SourceLocation Loc,
3361                                            QualType KmpInt32Ty,
3362                                            QualType KmpTaskTWithPrivatesPtrQTy,
3363                                            QualType KmpTaskTWithPrivatesQTy) {
3364  auto &C = CGM.getContext();
3365  FunctionArgList Args;
3366  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
3367  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
3368                                /*Id=*/nullptr,
3369                                KmpTaskTWithPrivatesPtrQTy.withRestrict());
3370  Args.push_back(&GtidArg);
3371  Args.push_back(&TaskTypeArg);
3372  FunctionType::ExtInfo Info;
3373  auto &DestructorFnInfo =
3374      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3375  auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
3376  auto *DestructorFn =
3377      llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3378                             ".omp_task_destructor.", &CGM.getModule());
3379  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
3380                                    DestructorFnInfo);
3381  CodeGenFunction CGF(CGM);
3382  CGF.disableDebugInfo();
3383  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3384                    Args);
3385
3386  LValue Base = CGF.EmitLoadOfPointerLValue(
3387      CGF.GetAddrOfLocalVar(&TaskTypeArg),
3388      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3389  auto *KmpTaskTWithPrivatesQTyRD =
3390      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3391  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3392  Base = CGF.EmitLValueForField(Base, *FI);
3393  for (auto *Field :
3394       cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3395    if (auto DtorKind = Field->getType().isDestructedType()) {
3396      auto FieldLValue = CGF.EmitLValueForField(Base, Field);
3397      CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3398    }
3399  }
3400  CGF.FinishFunction();
3401  return DestructorFn;
3402}
3403
3404/// \brief Emit a privates mapping function for correct handling of private and
3405/// firstprivate variables.
3406/// \code
3407/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3408/// **noalias priv1,...,  <tyn> **noalias privn) {
3409///   *priv1 = &.privates.priv1;
3410///   ...;
3411///   *privn = &.privates.privn;
3412/// }
3413/// \endcode
3414static llvm::Value *
3415emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3416                               ArrayRef<const Expr *> PrivateVars,
3417                               ArrayRef<const Expr *> FirstprivateVars,
3418                               ArrayRef<const Expr *> LastprivateVars,
3419                               QualType PrivatesQTy,
3420                               ArrayRef<PrivateDataTy> Privates) {
3421  auto &C = CGM.getContext();
3422  FunctionArgList Args;
3423  ImplicitParamDecl TaskPrivatesArg(
3424      C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3425      C.getPointerType(PrivatesQTy).withConst().withRestrict());
3426  Args.push_back(&TaskPrivatesArg);
3427  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3428  unsigned Counter = 1;
3429  for (auto *E: PrivateVars) {
3430    Args.push_back(ImplicitParamDecl::Create(
3431        C, /*DC=*/nullptr, Loc,
3432        /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3433                            .withConst()
3434                            .withRestrict()));
3435    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3436    PrivateVarsPos[VD] = Counter;
3437    ++Counter;
3438  }
3439  for (auto *E : FirstprivateVars) {
3440    Args.push_back(ImplicitParamDecl::Create(
3441        C, /*DC=*/nullptr, Loc,
3442        /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3443                            .withConst()
3444                            .withRestrict()));
3445    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3446    PrivateVarsPos[VD] = Counter;
3447    ++Counter;
3448  }
3449  for (auto *E: LastprivateVars) {
3450    Args.push_back(ImplicitParamDecl::Create(
3451        C, /*DC=*/nullptr, Loc,
3452        /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
3453                            .withConst()
3454                            .withRestrict()));
3455    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3456    PrivateVarsPos[VD] = Counter;
3457    ++Counter;
3458  }
3459  auto &TaskPrivatesMapFnInfo =
3460      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3461  auto *TaskPrivatesMapTy =
3462      CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3463  auto *TaskPrivatesMap = llvm::Function::Create(
3464      TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
3465      ".omp_task_privates_map.", &CGM.getModule());
3466  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
3467                                    TaskPrivatesMapFnInfo);
3468  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3469  CodeGenFunction CGF(CGM);
3470  CGF.disableDebugInfo();
3471  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3472                    TaskPrivatesMapFnInfo, Args);
3473
3474  // *privi = &.privates.privi;
3475  LValue Base = CGF.EmitLoadOfPointerLValue(
3476      CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3477      TaskPrivatesArg.getType()->castAs<PointerType>());
3478  auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3479  Counter = 0;
3480  for (auto *Field : PrivatesQTyRD->fields()) {
3481    auto FieldLVal = CGF.EmitLValueForField(Base, Field);
3482    auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3483    auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3484    auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3485        RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3486    CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
3487    ++Counter;
3488  }
3489  CGF.FinishFunction();
3490  return TaskPrivatesMap;
3491}
3492
3493static int array_pod_sort_comparator(const PrivateDataTy *P1,
3494                                     const PrivateDataTy *P2) {
3495  return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
3496}
3497
3498/// Emit initialization for private variables in task-based directives.
3499static void emitPrivatesInit(CodeGenFunction &CGF,
3500                             const OMPExecutableDirective &D,
3501                             Address KmpTaskSharedsPtr, LValue TDBase,
3502                             const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3503                             QualType SharedsTy, QualType SharedsPtrTy,
3504                             const OMPTaskDataTy &Data,
3505                             ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3506  auto &C = CGF.getContext();
3507  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3508  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3509  LValue SrcBase;
3510  if (!Data.FirstprivateVars.empty()) {
3511    SrcBase = CGF.MakeAddrLValue(
3512        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3513            KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3514        SharedsTy);
3515  }
3516  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
3517      cast<CapturedStmt>(*D.getAssociatedStmt()));
3518  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3519  for (auto &&Pair : Privates) {
3520    auto *VD = Pair.second.PrivateCopy;
3521    auto *Init = VD->getAnyInitializer();
3522    if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3523                             !CGF.isTrivialInitializer(Init)))) {
3524      LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3525      if (auto *Elem = Pair.second.PrivateElemInit) {
3526        auto *OriginalVD = Pair.second.Original;
3527        auto *SharedField = CapturesInfo.lookup(OriginalVD);
3528        auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3529        SharedRefLValue = CGF.MakeAddrLValue(
3530            Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
3531            SharedRefLValue.getType(), AlignmentSource::Decl);
3532        QualType Type = OriginalVD->getType();
3533        if (Type->isArrayType()) {
3534          // Initialize firstprivate array.
3535          if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3536            // Perform simple memcpy.
3537            CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
3538                                    SharedRefLValue.getAddress(), Type);
3539          } else {
3540            // Initialize firstprivate array using element-by-element
3541            // intialization.
3542            CGF.EmitOMPAggregateAssign(
3543                PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3544                [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3545                                                  Address SrcElement) {
3546                  // Clean up any temporaries needed by the initialization.
3547                  CodeGenFunction::OMPPrivateScope InitScope(CGF);
3548                  InitScope.addPrivate(
3549                      Elem, [SrcElement]() -> Address { return SrcElement; });
3550                  (void)InitScope.Privatize();
3551                  // Emit initialization for single element.
3552                  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3553                      CGF, &CapturesInfo);
3554                  CGF.EmitAnyExprToMem(Init, DestElement,
3555                                       Init->getType().getQualifiers(),
3556                                       /*IsInitializer=*/false);
3557                });
3558          }
3559        } else {
3560          CodeGenFunction::OMPPrivateScope InitScope(CGF);
3561          InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
3562            return SharedRefLValue.getAddress();
3563          });
3564          (void)InitScope.Privatize();
3565          CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3566          CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3567                             /*capturedByInit=*/false);
3568        }
3569      } else
3570        CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3571    }
3572    ++FI;
3573  }
3574}
3575
3576/// Check if duplication function is required for taskloops.
3577static bool checkInitIsRequired(CodeGenFunction &CGF,
3578                                ArrayRef<PrivateDataTy> Privates) {
3579  bool InitRequired = false;
3580  for (auto &&Pair : Privates) {
3581    auto *VD = Pair.second.PrivateCopy;
3582    auto *Init = VD->getAnyInitializer();
3583    InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3584                                    !CGF.isTrivialInitializer(Init));
3585  }
3586  return InitRequired;
3587}
3588
3589
3590/// Emit task_dup function (for initialization of
3591/// private/firstprivate/lastprivate vars and last_iter flag)
3592/// \code
3593/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3594/// lastpriv) {
3595/// // setup lastprivate flag
3596///    task_dst->last = lastpriv;
3597/// // could be constructor calls here...
3598/// }
3599/// \endcode
3600static llvm::Value *
3601emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3602                    const OMPExecutableDirective &D,
3603                    QualType KmpTaskTWithPrivatesPtrQTy,
3604                    const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3605                    const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3606                    QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3607                    ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3608  auto &C = CGM.getContext();
3609  FunctionArgList Args;
3610  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc,
3611                           /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
3612  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc,
3613                           /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
3614  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc,
3615                                /*Id=*/nullptr, C.IntTy);
3616  Args.push_back(&DstArg);
3617  Args.push_back(&SrcArg);
3618  Args.push_back(&LastprivArg);
3619  auto &TaskDupFnInfo =
3620      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3621  auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3622  auto *TaskDup =
3623      llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
3624                             ".omp_task_dup.", &CGM.getModule());
3625  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
3626  CodeGenFunction CGF(CGM);
3627  CGF.disableDebugInfo();
3628  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
3629
3630  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3631      CGF.GetAddrOfLocalVar(&DstArg),
3632      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3633  // task_dst->liter = lastpriv;
3634  if (WithLastIter) {
3635    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3636    LValue Base = CGF.EmitLValueForField(
3637        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3638    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3639    llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3640        CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3641    CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3642  }
3643
3644  // Emit initial values for private copies (if any).
3645  assert(!Privates.empty());
3646  Address KmpTaskSharedsPtr = Address::invalid();
3647  if (!Data.FirstprivateVars.empty()) {
3648    LValue TDBase = CGF.EmitLoadOfPointerLValue(
3649        CGF.GetAddrOfLocalVar(&SrcArg),
3650        KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3651    LValue Base = CGF.EmitLValueForField(
3652        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3653    KmpTaskSharedsPtr = Address(
3654        CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3655                                 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3656                                                  KmpTaskTShareds)),
3657                             Loc),
3658        CGF.getNaturalTypeAlignment(SharedsTy));
3659  }
3660  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3661                   SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3662  CGF.FinishFunction();
3663  return TaskDup;
3664}
3665
3666/// Checks if destructor function is required to be generated.
3667/// \return true if cleanups are required, false otherwise.
3668static bool
3669checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
3670  bool NeedsCleanup = false;
3671  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3672  auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
3673  for (auto *FD : PrivateRD->fields()) {
3674    NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
3675    if (NeedsCleanup)
3676      break;
3677  }
3678  return NeedsCleanup;
3679}
3680
3681CGOpenMPRuntime::TaskResultTy
3682CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3683                              const OMPExecutableDirective &D,
3684                              llvm::Value *TaskFunction, QualType SharedsTy,
3685                              Address Shareds, const OMPTaskDataTy &Data) {
3686  auto &C = CGM.getContext();
3687  llvm::SmallVector<PrivateDataTy, 4> Privates;
3688  // Aggregate privates and sort them by the alignment.
3689  auto I = Data.PrivateCopies.begin();
3690  for (auto *E : Data.PrivateVars) {
3691    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3692    Privates.push_back(std::make_pair(
3693        C.getDeclAlign(VD),
3694        PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3695                         /*PrivateElemInit=*/nullptr)));
3696    ++I;
3697  }
3698  I = Data.FirstprivateCopies.begin();
3699  auto IElemInitRef = Data.FirstprivateInits.begin();
3700  for (auto *E : Data.FirstprivateVars) {
3701    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3702    Privates.push_back(std::make_pair(
3703        C.getDeclAlign(VD),
3704        PrivateHelpersTy(
3705            VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3706            cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
3707    ++I;
3708    ++IElemInitRef;
3709  }
3710  I = Data.LastprivateCopies.begin();
3711  for (auto *E : Data.LastprivateVars) {
3712    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3713    Privates.push_back(std::make_pair(
3714        C.getDeclAlign(VD),
3715        PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3716                         /*PrivateElemInit=*/nullptr)));
3717    ++I;
3718  }
3719  llvm::array_pod_sort(Privates.begin(), Privates.end(),
3720                       array_pod_sort_comparator);
3721  auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3722  // Build type kmp_routine_entry_t (if not built yet).
3723  emitKmpRoutineEntryT(KmpInt32Ty);
3724  // Build type kmp_task_t (if not built yet).
3725  if (KmpTaskTQTy.isNull()) {
3726    KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3727        CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3728  }
3729  auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3730  // Build particular struct kmp_task_t for the given task.
3731  auto *KmpTaskTWithPrivatesQTyRD =
3732      createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3733  auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3734  QualType KmpTaskTWithPrivatesPtrQTy =
3735      C.getPointerType(KmpTaskTWithPrivatesQTy);
3736  auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3737  auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
3738  auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3739  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3740
3741  // Emit initial values for private copies (if any).
3742  llvm::Value *TaskPrivatesMap = nullptr;
3743  auto *TaskPrivatesMapTy =
3744      std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
3745                3)
3746          ->getType();
3747  if (!Privates.empty()) {
3748    auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3749    TaskPrivatesMap = emitTaskPrivateMappingFunction(
3750        CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
3751        FI->getType(), Privates);
3752    TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3753        TaskPrivatesMap, TaskPrivatesMapTy);
3754  } else {
3755    TaskPrivatesMap = llvm::ConstantPointerNull::get(
3756        cast<llvm::PointerType>(TaskPrivatesMapTy));
3757  }
3758  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3759  // kmp_task_t *tt);
3760  auto *TaskEntry = emitProxyTaskFunction(
3761      CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3762      KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3763      TaskPrivatesMap);
3764
3765  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3766  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3767  // kmp_routine_entry_t *task_entry);
3768  // Task flags. Format is taken from
3769  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
3770  // description of kmp_tasking_flags struct.
3771  enum {
3772    TiedFlag = 0x1,
3773    FinalFlag = 0x2,
3774    DestructorsFlag = 0x8,
3775    PriorityFlag = 0x20
3776  };
3777  unsigned Flags = Data.Tied ? TiedFlag : 0;
3778  bool NeedsCleanup = false;
3779  if (!Privates.empty()) {
3780    NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
3781    if (NeedsCleanup)
3782      Flags = Flags | DestructorsFlag;
3783  }
3784  if (Data.Priority.getInt())
3785    Flags = Flags | PriorityFlag;
3786  auto *TaskFlags =
3787      Data.Final.getPointer()
3788          ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3789                                     CGF.Builder.getInt32(FinalFlag),
3790                                     CGF.Builder.getInt32(/*C=*/0))
3791          : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3792  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3793  auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3794  llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
3795                              getThreadID(CGF, Loc), TaskFlags,
3796                              KmpTaskTWithPrivatesTySize, SharedsSize,
3797                              CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3798                                  TaskEntry, KmpRoutineEntryPtrTy)};
3799  auto *NewTask = CGF.EmitRuntimeCall(
3800      createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
3801  auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3802      NewTask, KmpTaskTWithPrivatesPtrTy);
3803  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
3804                                               KmpTaskTWithPrivatesQTy);
3805  LValue TDBase =
3806      CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3807  // Fill the data in the resulting kmp_task_t record.
3808  // Copy shareds if there are any.
3809  Address KmpTaskSharedsPtr = Address::invalid();
3810  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3811    KmpTaskSharedsPtr =
3812        Address(CGF.EmitLoadOfScalar(
3813                    CGF.EmitLValueForField(
3814                        TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
3815                                           KmpTaskTShareds)),
3816                    Loc),
3817                CGF.getNaturalTypeAlignment(SharedsTy));
3818    CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
3819  }
3820  // Emit initial values for private copies (if any).
3821  TaskResultTy Result;
3822  if (!Privates.empty()) {
3823    emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3824                     SharedsTy, SharedsPtrTy, Data, Privates,
3825                     /*ForDup=*/false);
3826    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3827        (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3828      Result.TaskDupFn = emitTaskDupFunction(
3829          CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3830          KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3831          /*WithLastIter=*/!Data.LastprivateVars.empty());
3832    }
3833  }
3834  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3835  enum { Priority = 0, Destructors = 1 };
3836  // Provide pointer to function with destructors for privates.
3837  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3838  auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
3839  if (NeedsCleanup) {
3840    llvm::Value *DestructorFn = emitDestructorsFunction(
3841        CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3842        KmpTaskTWithPrivatesQTy);
3843    LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3844    LValue DestructorsLV = CGF.EmitLValueForField(
3845        Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3846    CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3847                              DestructorFn, KmpRoutineEntryPtrTy),
3848                          DestructorsLV);
3849  }
3850  // Set priority.
3851  if (Data.Priority.getInt()) {
3852    LValue Data2LV = CGF.EmitLValueForField(
3853        TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3854    LValue PriorityLV = CGF.EmitLValueForField(
3855        Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3856    CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3857  }
3858  Result.NewTask = NewTask;
3859  Result.TaskEntry = TaskEntry;
3860  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3861  Result.TDBase = TDBase;
3862  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3863  return Result;
3864}
3865
3866void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
3867                                   const OMPExecutableDirective &D,
3868                                   llvm::Value *TaskFunction,
3869                                   QualType SharedsTy, Address Shareds,
3870                                   const Expr *IfCond,
3871                                   const OMPTaskDataTy &Data) {
3872  if (!CGF.HaveInsertPoint())
3873    return;
3874
3875  TaskResultTy Result =
3876      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
3877  llvm::Value *NewTask = Result.NewTask;
3878  llvm::Value *TaskEntry = Result.TaskEntry;
3879  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
3880  LValue TDBase = Result.TDBase;
3881  RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
3882  auto &C = CGM.getContext();
3883  // Process list of dependences.
3884  Address DependenciesArray = Address::invalid();
3885  unsigned NumDependencies = Data.Dependences.size();
3886  if (NumDependencies) {
3887    // Dependence kind for RTL.
3888    enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
3889    enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
3890    RecordDecl *KmpDependInfoRD;
3891    QualType FlagsTy =
3892        C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
3893    llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
3894    if (KmpDependInfoTy.isNull()) {
3895      KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
3896      KmpDependInfoRD->startDefinition();
3897      addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
3898      addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
3899      addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
3900      KmpDependInfoRD->completeDefinition();
3901      KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
3902    } else
3903      KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
3904    CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
3905    // Define type kmp_depend_info[<Dependences.size()>];
3906    QualType KmpDependInfoArrayTy = C.getConstantArrayType(
3907        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
3908        ArrayType::Normal, /*IndexTypeQuals=*/0);
3909    // kmp_depend_info[<Dependences.size()>] deps;
3910    DependenciesArray =
3911        CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
3912    for (unsigned i = 0; i < NumDependencies; ++i) {
3913      const Expr *E = Data.Dependences[i].second;
3914      auto Addr = CGF.EmitLValue(E);
3915      llvm::Value *Size;
3916      QualType Ty = E->getType();
3917      if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3918        LValue UpAddrLVal =
3919            CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
3920        llvm::Value *UpAddr =
3921            CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
3922        llvm::Value *LowIntPtr =
3923            CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
3924        llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
3925        Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3926      } else
3927        Size = CGF.getTypeSize(Ty);
3928      auto Base = CGF.MakeAddrLValue(
3929          CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
3930          KmpDependInfoTy);
3931      // deps[i].base_addr = &<Dependences[i].second>;
3932      auto BaseAddrLVal = CGF.EmitLValueForField(
3933          Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
3934      CGF.EmitStoreOfScalar(
3935          CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
3936          BaseAddrLVal);
3937      // deps[i].len = sizeof(<Dependences[i].second>);
3938      auto LenLVal = CGF.EmitLValueForField(
3939          Base, *std::next(KmpDependInfoRD->field_begin(), Len));
3940      CGF.EmitStoreOfScalar(Size, LenLVal);
3941      // deps[i].flags = <Dependences[i].first>;
3942      RTLDependenceKindTy DepKind;
3943      switch (Data.Dependences[i].first) {
3944      case OMPC_DEPEND_in:
3945        DepKind = DepIn;
3946        break;
3947      // Out and InOut dependencies must use the same code.
3948      case OMPC_DEPEND_out:
3949      case OMPC_DEPEND_inout:
3950        DepKind = DepInOut;
3951        break;
3952      case OMPC_DEPEND_source:
3953      case OMPC_DEPEND_sink:
3954      case OMPC_DEPEND_unknown:
3955        llvm_unreachable("Unknown task dependence type");
3956      }
3957      auto FlagsLVal = CGF.EmitLValueForField(
3958          Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
3959      CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
3960                            FlagsLVal);
3961    }
3962    DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3963        CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
3964        CGF.VoidPtrTy);
3965  }
3966
3967  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
3968  // libcall.
3969  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
3970  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3971  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
3972  // list is not empty
3973  auto *ThreadID = getThreadID(CGF, Loc);
3974  auto *UpLoc = emitUpdateLocation(CGF, Loc);
3975  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
3976  llvm::Value *DepTaskArgs[7];
3977  if (NumDependencies) {
3978    DepTaskArgs[0] = UpLoc;
3979    DepTaskArgs[1] = ThreadID;
3980    DepTaskArgs[2] = NewTask;
3981    DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
3982    DepTaskArgs[4] = DependenciesArray.getPointer();
3983    DepTaskArgs[5] = CGF.Builder.getInt32(0);
3984    DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3985  }
3986  auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD,
3987                        NumDependencies, &TaskArgs,
3988                        &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
3989    if (!Data.Tied) {
3990      auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3991      auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
3992      CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
3993    }
3994    if (NumDependencies) {
3995      CGF.EmitRuntimeCall(
3996          createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
3997    } else {
3998      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
3999                          TaskArgs);
4000    }
4001    // Check if parent region is untied and build return for untied task;
4002    if (auto *Region =
4003            dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4004      Region->emitUntiedSwitch(CGF);
4005  };
4006
4007  llvm::Value *DepWaitTaskArgs[6];
4008  if (NumDependencies) {
4009    DepWaitTaskArgs[0] = UpLoc;
4010    DepWaitTaskArgs[1] = ThreadID;
4011    DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
4012    DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4013    DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4014    DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4015  }
4016  auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
4017                        NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
4018                                                           PrePostActionTy &) {
4019    auto &RT = CGF.CGM.getOpenMPRuntime();
4020    CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4021    // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4022    // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4023    // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4024    // is specified.
4025    if (NumDependencies)
4026      CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
4027                          DepWaitTaskArgs);
4028    // Call proxy_task_entry(gtid, new_task);
4029    auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
4030        CodeGenFunction &CGF, PrePostActionTy &Action) {
4031      Action.Enter(CGF);
4032      llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4033      CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
4034    };
4035
4036    // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4037    // kmp_task_t *new_task);
4038    // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4039    // kmp_task_t *new_task);
4040    RegionCodeGenTy RCG(CodeGen);
4041    CommonActionTy Action(
4042        RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
4043        RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
4044    RCG.setAction(Action);
4045    RCG(CGF);
4046  };
4047
4048  if (IfCond)
4049    emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4050  else {
4051    RegionCodeGenTy ThenRCG(ThenCodeGen);
4052    ThenRCG(CGF);
4053  }
4054}
4055
4056void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4057                                       const OMPLoopDirective &D,
4058                                       llvm::Value *TaskFunction,
4059                                       QualType SharedsTy, Address Shareds,
4060                                       const Expr *IfCond,
4061                                       const OMPTaskDataTy &Data) {
4062  if (!CGF.HaveInsertPoint())
4063    return;
4064  TaskResultTy Result =
4065      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4066  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4067  // libcall.
4068  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4069  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4070  // sched, kmp_uint64 grainsize, void *task_dup);
4071  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4072  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4073  llvm::Value *IfVal;
4074  if (IfCond) {
4075    IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4076                                      /*isSigned=*/true);
4077  } else
4078    IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4079
4080  LValue LBLVal = CGF.EmitLValueForField(
4081      Result.TDBase,
4082      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4083  auto *LBVar =
4084      cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4085  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4086                       /*IsInitializer=*/true);
4087  LValue UBLVal = CGF.EmitLValueForField(
4088      Result.TDBase,
4089      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4090  auto *UBVar =
4091      cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4092  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4093                       /*IsInitializer=*/true);
4094  LValue StLVal = CGF.EmitLValueForField(
4095      Result.TDBase,
4096      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4097  auto *StVar =
4098      cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4099  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4100                       /*IsInitializer=*/true);
4101  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4102  llvm::Value *TaskArgs[] = {
4103      UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(),
4104      UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
4105      llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0),
4106      llvm::ConstantInt::getSigned(
4107          CGF.IntTy, Data.Schedule.getPointer()
4108                         ? Data.Schedule.getInt() ? NumTasks : Grainsize
4109                         : NoSchedule),
4110      Data.Schedule.getPointer()
4111          ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4112                                      /*isSigned=*/false)
4113          : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4114      Result.TaskDupFn
4115          ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn,
4116                                                            CGF.VoidPtrTy)
4117          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4118  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
4119}
4120
4121/// \brief Emit reduction operation for each element of array (required for
4122/// array sections) LHS op = RHS.
4123/// \param Type Type of array.
4124/// \param LHSVar Variable on the left side of the reduction operation
4125/// (references element of array in original variable).
4126/// \param RHSVar Variable on the right side of the reduction operation
4127/// (references element of array in original variable).
4128/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4129/// RHSVar.
4130static void EmitOMPAggregateReduction(
4131    CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4132    const VarDecl *RHSVar,
4133    const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4134                                  const Expr *, const Expr *)> &RedOpGen,
4135    const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4136    const Expr *UpExpr = nullptr) {
4137  // Perform element-by-element initialization.
4138  QualType ElementTy;
4139  Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4140  Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4141
4142  // Drill down to the base element type on both arrays.
4143  auto ArrayTy = Type->getAsArrayTypeUnsafe();
4144  auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4145
4146  auto RHSBegin = RHSAddr.getPointer();
4147  auto LHSBegin = LHSAddr.getPointer();
4148  // Cast from pointer to array type to pointer to single element.
4149  auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
4150  // The basic structure here is a while-do loop.
4151  auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4152  auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4153  auto IsEmpty =
4154      CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4155  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4156
4157  // Enter the loop body, making that address the current address.
4158  auto EntryBB = CGF.Builder.GetInsertBlock();
4159  CGF.EmitBlock(BodyBB);
4160
4161  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4162
4163  llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4164      RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4165  RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4166  Address RHSElementCurrent =
4167      Address(RHSElementPHI,
4168              RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4169
4170  llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4171      LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4172  LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4173  Address LHSElementCurrent =
4174      Address(LHSElementPHI,
4175              LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4176
4177  // Emit copy.
4178  CodeGenFunction::OMPPrivateScope Scope(CGF);
4179  Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
4180  Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
4181  Scope.Privatize();
4182  RedOpGen(CGF, XExpr, EExpr, UpExpr);
4183  Scope.ForceCleanup();
4184
4185  // Shift the address forward by one element.
4186  auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4187      LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
4188  auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4189      RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
4190  // Check whether we've reached the end.
4191  auto Done =
4192      CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4193  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4194  LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4195  RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4196
4197  // Done.
4198  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4199}
4200
4201/// Emit reduction combiner. If the combiner is a simple expression emit it as
4202/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4203/// UDR combiner function.
4204static void emitReductionCombiner(CodeGenFunction &CGF,
4205                                  const Expr *ReductionOp) {
4206  if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
4207    if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4208      if (auto *DRE =
4209              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4210        if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4211          std::pair<llvm::Function *, llvm::Function *> Reduction =
4212              CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4213          RValue Func = RValue::get(Reduction.first);
4214          CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4215          CGF.EmitIgnoredExpr(ReductionOp);
4216          return;
4217        }
4218  CGF.EmitIgnoredExpr(ReductionOp);
4219}
4220
4221static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
4222                                          llvm::Type *ArgsType,
4223                                          ArrayRef<const Expr *> Privates,
4224                                          ArrayRef<const Expr *> LHSExprs,
4225                                          ArrayRef<const Expr *> RHSExprs,
4226                                          ArrayRef<const Expr *> ReductionOps) {
4227  auto &C = CGM.getContext();
4228
4229  // void reduction_func(void *LHSArg, void *RHSArg);
4230  FunctionArgList Args;
4231  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
4232                           C.VoidPtrTy);
4233  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
4234                           C.VoidPtrTy);
4235  Args.push_back(&LHSArg);
4236  Args.push_back(&RHSArg);
4237  auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4238  auto *Fn = llvm::Function::Create(
4239      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
4240      ".omp.reduction.reduction_func", &CGM.getModule());
4241  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
4242  CodeGenFunction CGF(CGM);
4243  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
4244
4245  // Dst = (void*[n])(LHSArg);
4246  // Src = (void*[n])(RHSArg);
4247  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4248      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4249      ArgsType), CGF.getPointerAlign());
4250  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4251      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4252      ArgsType), CGF.getPointerAlign());
4253
4254  //  ...
4255  //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4256  //  ...
4257  CodeGenFunction::OMPPrivateScope Scope(CGF);
4258  auto IPriv = Privates.begin();
4259  unsigned Idx = 0;
4260  for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4261    auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4262    Scope.addPrivate(RHSVar, [&]() -> Address {
4263      return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
4264    });
4265    auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4266    Scope.addPrivate(LHSVar, [&]() -> Address {
4267      return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
4268    });
4269    QualType PrivTy = (*IPriv)->getType();
4270    if (PrivTy->isVariablyModifiedType()) {
4271      // Get array size and emit VLA type.
4272      ++Idx;
4273      Address Elem =
4274          CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
4275      llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4276      auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
4277      auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4278      CodeGenFunction::OpaqueValueMapping OpaqueMap(
4279          CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4280      CGF.EmitVariablyModifiedType(PrivTy);
4281    }
4282  }
4283  Scope.Privatize();
4284  IPriv = Privates.begin();
4285  auto ILHS = LHSExprs.begin();
4286  auto IRHS = RHSExprs.begin();
4287  for (auto *E : ReductionOps) {
4288    if ((*IPriv)->getType()->isArrayType()) {
4289      // Emit reduction for array section.
4290      auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4291      auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4292      EmitOMPAggregateReduction(
4293          CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4294          [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4295            emitReductionCombiner(CGF, E);
4296          });
4297    } else
4298      // Emit reduction for array subscript or single variable.
4299      emitReductionCombiner(CGF, E);
4300    ++IPriv;
4301    ++ILHS;
4302    ++IRHS;
4303  }
4304  Scope.ForceCleanup();
4305  CGF.FinishFunction();
4306  return Fn;
4307}
4308
4309static void emitSingleReductionCombiner(CodeGenFunction &CGF,
4310                                        const Expr *ReductionOp,
4311                                        const Expr *PrivateRef,
4312                                        const DeclRefExpr *LHS,
4313                                        const DeclRefExpr *RHS) {
4314  if (PrivateRef->getType()->isArrayType()) {
4315    // Emit reduction for array section.
4316    auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4317    auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4318    EmitOMPAggregateReduction(
4319        CGF, PrivateRef->getType(), LHSVar, RHSVar,
4320        [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4321          emitReductionCombiner(CGF, ReductionOp);
4322        });
4323  } else
4324    // Emit reduction for array subscript or single variable.
4325    emitReductionCombiner(CGF, ReductionOp);
4326}
4327
4328void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4329                                    ArrayRef<const Expr *> Privates,
4330                                    ArrayRef<const Expr *> LHSExprs,
4331                                    ArrayRef<const Expr *> RHSExprs,
4332                                    ArrayRef<const Expr *> ReductionOps,
4333                                    bool WithNowait, bool SimpleReduction) {
4334  if (!CGF.HaveInsertPoint())
4335    return;
4336  // Next code should be emitted for reduction:
4337  //
4338  // static kmp_critical_name lock = { 0 };
4339  //
4340  // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4341  //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4342  //  ...
4343  //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4344  //  *(Type<n>-1*)rhs[<n>-1]);
4345  // }
4346  //
4347  // ...
4348  // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4349  // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4350  // RedList, reduce_func, &<lock>)) {
4351  // case 1:
4352  //  ...
4353  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4354  //  ...
4355  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4356  // break;
4357  // case 2:
4358  //  ...
4359  //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4360  //  ...
4361  // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4362  // break;
4363  // default:;
4364  // }
4365  //
4366  // if SimpleReduction is true, only the next code is generated:
4367  //  ...
4368  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4369  //  ...
4370
4371  auto &C = CGM.getContext();
4372
4373  if (SimpleReduction) {
4374    CodeGenFunction::RunCleanupsScope Scope(CGF);
4375    auto IPriv = Privates.begin();
4376    auto ILHS = LHSExprs.begin();
4377    auto IRHS = RHSExprs.begin();
4378    for (auto *E : ReductionOps) {
4379      emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4380                                  cast<DeclRefExpr>(*IRHS));
4381      ++IPriv;
4382      ++ILHS;
4383      ++IRHS;
4384    }
4385    return;
4386  }
4387
4388  // 1. Build a list of reduction variables.
4389  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4390  auto Size = RHSExprs.size();
4391  for (auto *E : Privates) {
4392    if (E->getType()->isVariablyModifiedType())
4393      // Reserve place for array size.
4394      ++Size;
4395  }
4396  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4397  QualType ReductionArrayTy =
4398      C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
4399                             /*IndexTypeQuals=*/0);
4400  Address ReductionList =
4401      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4402  auto IPriv = Privates.begin();
4403  unsigned Idx = 0;
4404  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4405    Address Elem =
4406      CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
4407    CGF.Builder.CreateStore(
4408        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4409            CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
4410        Elem);
4411    if ((*IPriv)->getType()->isVariablyModifiedType()) {
4412      // Store array size.
4413      ++Idx;
4414      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
4415                                             CGF.getPointerSize());
4416      llvm::Value *Size = CGF.Builder.CreateIntCast(
4417          CGF.getVLASize(
4418                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
4419              .first,
4420          CGF.SizeTy, /*isSigned=*/false);
4421      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
4422                              Elem);
4423    }
4424  }
4425
4426  // 2. Emit reduce_func().
4427  auto *ReductionFn = emitReductionFunction(
4428      CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
4429      LHSExprs, RHSExprs, ReductionOps);
4430
4431  // 3. Create static kmp_critical_name lock = { 0 };
4432  auto *Lock = getCriticalRegionLock(".reduction");
4433
4434  // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4435  // RedList, reduce_func, &<lock>);
4436  auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4437  auto *ThreadId = getThreadID(CGF, Loc);
4438  auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
4439  auto *RL =
4440    CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
4441                                                    CGF.VoidPtrTy);
4442  llvm::Value *Args[] = {
4443      IdentTLoc,                             // ident_t *<loc>
4444      ThreadId,                              // i32 <gtid>
4445      CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
4446      ReductionArrayTySize,                  // size_type sizeof(RedList)
4447      RL,                                    // void *RedList
4448      ReductionFn, // void (*) (void *, void *) <reduce_func>
4449      Lock         // kmp_critical_name *&<lock>
4450  };
4451  auto Res = CGF.EmitRuntimeCall(
4452      createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
4453                                       : OMPRTL__kmpc_reduce),
4454      Args);
4455
4456  // 5. Build switch(res)
4457  auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
4458  auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
4459
4460  // 6. Build case 1:
4461  //  ...
4462  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4463  //  ...
4464  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4465  // break;
4466  auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
4467  SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
4468  CGF.EmitBlock(Case1BB);
4469
4470  // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4471  llvm::Value *EndArgs[] = {
4472      IdentTLoc, // ident_t *<loc>
4473      ThreadId,  // i32 <gtid>
4474      Lock       // kmp_critical_name *&<lock>
4475  };
4476  auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
4477      CodeGenFunction &CGF, PrePostActionTy &Action) {
4478    auto IPriv = Privates.begin();
4479    auto ILHS = LHSExprs.begin();
4480    auto IRHS = RHSExprs.begin();
4481    for (auto *E : ReductionOps) {
4482      emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4483                                  cast<DeclRefExpr>(*IRHS));
4484      ++IPriv;
4485      ++ILHS;
4486      ++IRHS;
4487    }
4488  };
4489  RegionCodeGenTy RCG(CodeGen);
4490  CommonActionTy Action(
4491      nullptr, llvm::None,
4492      createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
4493                                       : OMPRTL__kmpc_end_reduce),
4494      EndArgs);
4495  RCG.setAction(Action);
4496  RCG(CGF);
4497
4498  CGF.EmitBranch(DefaultBB);
4499
4500  // 7. Build case 2:
4501  //  ...
4502  //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4503  //  ...
4504  // break;
4505  auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
4506  SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
4507  CGF.EmitBlock(Case2BB);
4508
4509  auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
4510      CodeGenFunction &CGF, PrePostActionTy &Action) {
4511    auto ILHS = LHSExprs.begin();
4512    auto IRHS = RHSExprs.begin();
4513    auto IPriv = Privates.begin();
4514    for (auto *E : ReductionOps) {
4515      const Expr *XExpr = nullptr;
4516      const Expr *EExpr = nullptr;
4517      const Expr *UpExpr = nullptr;
4518      BinaryOperatorKind BO = BO_Comma;
4519      if (auto *BO = dyn_cast<BinaryOperator>(E)) {
4520        if (BO->getOpcode() == BO_Assign) {
4521          XExpr = BO->getLHS();
4522          UpExpr = BO->getRHS();
4523        }
4524      }
4525      // Try to emit update expression as a simple atomic.
4526      auto *RHSExpr = UpExpr;
4527      if (RHSExpr) {
4528        // Analyze RHS part of the whole expression.
4529        if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
4530                RHSExpr->IgnoreParenImpCasts())) {
4531          // If this is a conditional operator, analyze its condition for
4532          // min/max reduction operator.
4533          RHSExpr = ACO->getCond();
4534        }
4535        if (auto *BORHS =
4536                dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
4537          EExpr = BORHS->getRHS();
4538          BO = BORHS->getOpcode();
4539        }
4540      }
4541      if (XExpr) {
4542        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4543        auto &&AtomicRedGen = [BO, VD, IPriv,
4544                               Loc](CodeGenFunction &CGF, const Expr *XExpr,
4545                                    const Expr *EExpr, const Expr *UpExpr) {
4546          LValue X = CGF.EmitLValue(XExpr);
4547          RValue E;
4548          if (EExpr)
4549            E = CGF.EmitAnyExpr(EExpr);
4550          CGF.EmitOMPAtomicSimpleUpdateExpr(
4551              X, E, BO, /*IsXLHSInRHSPart=*/true,
4552              llvm::AtomicOrdering::Monotonic, Loc,
4553              [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
4554                CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4555                PrivateScope.addPrivate(
4556                    VD, [&CGF, VD, XRValue, Loc]() -> Address {
4557                      Address LHSTemp = CGF.CreateMemTemp(VD->getType());
4558                      CGF.emitOMPSimpleStore(
4559                          CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
4560                          VD->getType().getNonReferenceType(), Loc);
4561                      return LHSTemp;
4562                    });
4563                (void)PrivateScope.Privatize();
4564                return CGF.EmitAnyExpr(UpExpr);
4565              });
4566        };
4567        if ((*IPriv)->getType()->isArrayType()) {
4568          // Emit atomic reduction for array section.
4569          auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4570          EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
4571                                    AtomicRedGen, XExpr, EExpr, UpExpr);
4572        } else
4573          // Emit atomic reduction for array subscript or single variable.
4574          AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
4575      } else {
4576        // Emit as a critical region.
4577        auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
4578                                     const Expr *, const Expr *) {
4579          auto &RT = CGF.CGM.getOpenMPRuntime();
4580          RT.emitCriticalRegion(
4581              CGF, ".atomic_reduction",
4582              [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
4583                Action.Enter(CGF);
4584                emitReductionCombiner(CGF, E);
4585              },
4586              Loc);
4587        };
4588        if ((*IPriv)->getType()->isArrayType()) {
4589          auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4590          auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4591          EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4592                                    CritRedGen);
4593        } else
4594          CritRedGen(CGF, nullptr, nullptr, nullptr);
4595      }
4596      ++ILHS;
4597      ++IRHS;
4598      ++IPriv;
4599    }
4600  };
4601  RegionCodeGenTy AtomicRCG(AtomicCodeGen);
4602  if (!WithNowait) {
4603    // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
4604    llvm::Value *EndArgs[] = {
4605        IdentTLoc, // ident_t *<loc>
4606        ThreadId,  // i32 <gtid>
4607        Lock       // kmp_critical_name *&<lock>
4608    };
4609    CommonActionTy Action(nullptr, llvm::None,
4610                          createRuntimeFunction(OMPRTL__kmpc_end_reduce),
4611                          EndArgs);
4612    AtomicRCG.setAction(Action);
4613    AtomicRCG(CGF);
4614  } else
4615    AtomicRCG(CGF);
4616
4617  CGF.EmitBranch(DefaultBB);
4618  CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
4619}
4620
4621void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
4622                                       SourceLocation Loc) {
4623  if (!CGF.HaveInsertPoint())
4624    return;
4625  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
4626  // global_tid);
4627  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
4628  // Ignore return result until untied tasks are supported.
4629  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
4630  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4631    Region->emitUntiedSwitch(CGF);
4632}
4633
4634void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
4635                                           OpenMPDirectiveKind InnerKind,
4636                                           const RegionCodeGenTy &CodeGen,
4637                                           bool HasCancel) {
4638  if (!CGF.HaveInsertPoint())
4639    return;
4640  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
4641  CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
4642}
4643
4644namespace {
4645enum RTCancelKind {
4646  CancelNoreq = 0,
4647  CancelParallel = 1,
4648  CancelLoop = 2,
4649  CancelSections = 3,
4650  CancelTaskgroup = 4
4651};
4652} // anonymous namespace
4653
4654static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
4655  RTCancelKind CancelKind = CancelNoreq;
4656  if (CancelRegion == OMPD_parallel)
4657    CancelKind = CancelParallel;
4658  else if (CancelRegion == OMPD_for)
4659    CancelKind = CancelLoop;
4660  else if (CancelRegion == OMPD_sections)
4661    CancelKind = CancelSections;
4662  else {
4663    assert(CancelRegion == OMPD_taskgroup);
4664    CancelKind = CancelTaskgroup;
4665  }
4666  return CancelKind;
4667}
4668
4669void CGOpenMPRuntime::emitCancellationPointCall(
4670    CodeGenFunction &CGF, SourceLocation Loc,
4671    OpenMPDirectiveKind CancelRegion) {
4672  if (!CGF.HaveInsertPoint())
4673    return;
4674  // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
4675  // global_tid, kmp_int32 cncl_kind);
4676  if (auto *OMPRegionInfo =
4677          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4678    if (OMPRegionInfo->hasCancel()) {
4679      llvm::Value *Args[] = {
4680          emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
4681          CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4682      // Ignore return result until untied tasks are supported.
4683      auto *Result = CGF.EmitRuntimeCall(
4684          createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
4685      // if (__kmpc_cancellationpoint()) {
4686      //  __kmpc_cancel_barrier();
4687      //   exit from construct;
4688      // }
4689      auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4690      auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4691      auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4692      CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4693      CGF.EmitBlock(ExitBB);
4694      // __kmpc_cancel_barrier();
4695      emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4696      // exit from construct;
4697      auto CancelDest =
4698          CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4699      CGF.EmitBranchThroughCleanup(CancelDest);
4700      CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4701    }
4702  }
4703}
4704
4705void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
4706                                     const Expr *IfCond,
4707                                     OpenMPDirectiveKind CancelRegion) {
4708  if (!CGF.HaveInsertPoint())
4709    return;
4710  // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
4711  // kmp_int32 cncl_kind);
4712  if (auto *OMPRegionInfo =
4713          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
4714    auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
4715                                                        PrePostActionTy &) {
4716      auto &RT = CGF.CGM.getOpenMPRuntime();
4717      llvm::Value *Args[] = {
4718          RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
4719          CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
4720      // Ignore return result until untied tasks are supported.
4721      auto *Result = CGF.EmitRuntimeCall(
4722          RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
4723      // if (__kmpc_cancel()) {
4724      //  __kmpc_cancel_barrier();
4725      //   exit from construct;
4726      // }
4727      auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
4728      auto *ContBB = CGF.createBasicBlock(".cancel.continue");
4729      auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
4730      CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
4731      CGF.EmitBlock(ExitBB);
4732      // __kmpc_cancel_barrier();
4733      RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
4734      // exit from construct;
4735      auto CancelDest =
4736          CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
4737      CGF.EmitBranchThroughCleanup(CancelDest);
4738      CGF.EmitBlock(ContBB, /*IsFinished=*/true);
4739    };
4740    if (IfCond)
4741      emitOMPIfClause(CGF, IfCond, ThenGen,
4742                      [](CodeGenFunction &, PrePostActionTy &) {});
4743    else {
4744      RegionCodeGenTy ThenRCG(ThenGen);
4745      ThenRCG(CGF);
4746    }
4747  }
4748}
4749
4750/// \brief Obtain information that uniquely identifies a target entry. This
4751/// consists of the file and device IDs as well as line number associated with
4752/// the relevant entry source location.
4753static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
4754                                     unsigned &DeviceID, unsigned &FileID,
4755                                     unsigned &LineNum) {
4756
4757  auto &SM = C.getSourceManager();
4758
4759  // The loc should be always valid and have a file ID (the user cannot use
4760  // #pragma directives in macros)
4761
4762  assert(Loc.isValid() && "Source location is expected to be always valid.");
4763  assert(Loc.isFileID() && "Source location is expected to refer to a file.");
4764
4765  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
4766  assert(PLoc.isValid() && "Source location is expected to be always valid.");
4767
4768  llvm::sys::fs::UniqueID ID;
4769  if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
4770    llvm_unreachable("Source file with target region no longer exists!");
4771
4772  DeviceID = ID.getDevice();
4773  FileID = ID.getFile();
4774  LineNum = PLoc.getLine();
4775}
4776
4777void CGOpenMPRuntime::emitTargetOutlinedFunction(
4778    const OMPExecutableDirective &D, StringRef ParentName,
4779    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4780    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4781  assert(!ParentName.empty() && "Invalid target region parent name!");
4782
4783  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
4784                                   IsOffloadEntry, CodeGen);
4785}
4786
4787void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
4788    const OMPExecutableDirective &D, StringRef ParentName,
4789    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
4790    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
4791  // Create a unique name for the entry function using the source location
4792  // information of the current target region. The name will be something like:
4793  //
4794  // __omp_offloading_DD_FFFF_PP_lBB
4795  //
4796  // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
4797  // mangled name of the function that encloses the target region and BB is the
4798  // line number of the target region.
4799
4800  unsigned DeviceID;
4801  unsigned FileID;
4802  unsigned Line;
4803  getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
4804                           Line);
4805  SmallString<64> EntryFnName;
4806  {
4807    llvm::raw_svector_ostream OS(EntryFnName);
4808    OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
4809       << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
4810  }
4811
4812  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4813
4814  CodeGenFunction CGF(CGM, true);
4815  CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
4816  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4817
4818  OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
4819
4820  // If this target outline function is not an offload entry, we don't need to
4821  // register it.
4822  if (!IsOffloadEntry)
4823    return;
4824
4825  // The target region ID is used by the runtime library to identify the current
4826  // target region, so it only has to be unique and not necessarily point to
4827  // anything. It could be the pointer to the outlined function that implements
4828  // the target region, but we aren't using that so that the compiler doesn't
4829  // need to keep that, and could therefore inline the host function if proven
4830  // worthwhile during optimization. In the other hand, if emitting code for the
4831  // device, the ID has to be the function address so that it can retrieved from
4832  // the offloading entry and launched by the runtime library. We also mark the
4833  // outlined function to have external linkage in case we are emitting code for
4834  // the device, because these functions will be entry points to the device.
4835
4836  if (CGM.getLangOpts().OpenMPIsDevice) {
4837    OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
4838    OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
4839  } else
4840    OutlinedFnID = new llvm::GlobalVariable(
4841        CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
4842        llvm::GlobalValue::PrivateLinkage,
4843        llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
4844
4845  // Register the information for the entry associated with this target region.
4846  OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
4847      DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
4848}
4849
4850/// discard all CompoundStmts intervening between two constructs
4851static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
4852  while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
4853    Body = CS->body_front();
4854
4855  return Body;
4856}
4857
4858/// \brief Emit the num_teams clause of an enclosed teams directive at the
4859/// target region scope. If there is no teams directive associated with the
4860/// target directive, or if there is no num_teams clause associated with the
4861/// enclosed teams directive, return nullptr.
4862static llvm::Value *
4863emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4864                                     CodeGenFunction &CGF,
4865                                     const OMPExecutableDirective &D) {
4866
4867  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4868                                              "teams directive expected to be "
4869                                              "emitted only for the host!");
4870
4871  // FIXME: For the moment we do not support combined directives with target and
4872  // teams, so we do not expect to get any num_teams clause in the provided
4873  // directive. Once we support that, this assertion can be replaced by the
4874  // actual emission of the clause expression.
4875  assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
4876         "Not expecting clause in directive.");
4877
4878  // If the current target region has a teams region enclosed, we need to get
4879  // the number of teams to pass to the runtime function call. This is done
4880  // by generating the expression in a inlined region. This is required because
4881  // the expression is captured in the enclosing target environment when the
4882  // teams directive is not combined with target.
4883
4884  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4885
4886  // FIXME: Accommodate other combined directives with teams when they become
4887  // available.
4888  if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
4889          ignoreCompoundStmts(CS.getCapturedStmt()))) {
4890    if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
4891      CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4892      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4893      llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
4894      return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
4895                                       /*IsSigned=*/true);
4896    }
4897
4898    // If we have an enclosed teams directive but no num_teams clause we use
4899    // the default value 0.
4900    return CGF.Builder.getInt32(0);
4901  }
4902
4903  // No teams associated with the directive.
4904  return nullptr;
4905}
4906
4907/// \brief Emit the thread_limit clause of an enclosed teams directive at the
4908/// target region scope. If there is no teams directive associated with the
4909/// target directive, or if there is no thread_limit clause associated with the
4910/// enclosed teams directive, return nullptr.
4911static llvm::Value *
4912emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4913                                        CodeGenFunction &CGF,
4914                                        const OMPExecutableDirective &D) {
4915
4916  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4917                                              "teams directive expected to be "
4918                                              "emitted only for the host!");
4919
4920  // FIXME: For the moment we do not support combined directives with target and
4921  // teams, so we do not expect to get any thread_limit clause in the provided
4922  // directive. Once we support that, this assertion can be replaced by the
4923  // actual emission of the clause expression.
4924  assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
4925         "Not expecting clause in directive.");
4926
4927  // If the current target region has a teams region enclosed, we need to get
4928  // the thread limit to pass to the runtime function call. This is done
4929  // by generating the expression in a inlined region. This is required because
4930  // the expression is captured in the enclosing target environment when the
4931  // teams directive is not combined with target.
4932
4933  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4934
4935  // FIXME: Accommodate other combined directives with teams when they become
4936  // available.
4937  if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
4938          ignoreCompoundStmts(CS.getCapturedStmt()))) {
4939    if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
4940      CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4941      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4942      llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
4943      return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
4944                                       /*IsSigned=*/true);
4945    }
4946
4947    // If we have an enclosed teams directive but no thread_limit clause we use
4948    // the default value 0.
4949    return CGF.Builder.getInt32(0);
4950  }
4951
4952  // No teams associated with the directive.
4953  return nullptr;
4954}
4955
4956namespace {
4957// \brief Utility to handle information from clauses associated with a given
4958// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
4959// It provides a convenient interface to obtain the information and generate
4960// code for that information.
4961class MappableExprsHandler {
4962public:
4963  /// \brief Values for bit flags used to specify the mapping type for
4964  /// offloading.
4965  enum OpenMPOffloadMappingFlags {
4966    /// \brief Allocate memory on the device and move data from host to device.
4967    OMP_MAP_TO = 0x01,
4968    /// \brief Allocate memory on the device and move data from device to host.
4969    OMP_MAP_FROM = 0x02,
4970    /// \brief Always perform the requested mapping action on the element, even
4971    /// if it was already mapped before.
4972    OMP_MAP_ALWAYS = 0x04,
4973    /// \brief Delete the element from the device environment, ignoring the
4974    /// current reference count associated with the element.
4975    OMP_MAP_DELETE = 0x08,
4976    /// \brief The element being mapped is a pointer, therefore the pointee
4977    /// should be mapped as well.
4978    OMP_MAP_IS_PTR = 0x10,
4979    /// \brief This flags signals that an argument is the first one relating to
4980    /// a map/private clause expression. For some cases a single
4981    /// map/privatization results in multiple arguments passed to the runtime
4982    /// library.
4983    OMP_MAP_FIRST_REF = 0x20,
4984    /// \brief This flag signals that the reference being passed is a pointer to
4985    /// private data.
4986    OMP_MAP_PRIVATE_PTR = 0x80,
4987    /// \brief Pass the element to the device by value.
4988    OMP_MAP_PRIVATE_VAL = 0x100,
4989  };
4990
4991  typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
4992  typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
4993
4994private:
4995  /// \brief Directive from where the map clauses were extracted.
4996  const OMPExecutableDirective &Directive;
4997
4998  /// \brief Function the directive is being generated for.
4999  CodeGenFunction &CGF;
5000
5001  /// \brief Set of all first private variables in the current directive.
5002  llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
5003
5004  llvm::Value *getExprTypeSize(const Expr *E) const {
5005    auto ExprTy = E->getType().getCanonicalType();
5006
5007    // Reference types are ignored for mapping purposes.
5008    if (auto *RefTy = ExprTy->getAs<ReferenceType>())
5009      ExprTy = RefTy->getPointeeType().getCanonicalType();
5010
5011    // Given that an array section is considered a built-in type, we need to
5012    // do the calculation based on the length of the section instead of relying
5013    // on CGF.getTypeSize(E->getType()).
5014    if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
5015      QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
5016                            OAE->getBase()->IgnoreParenImpCasts())
5017                            .getCanonicalType();
5018
5019      // If there is no length associated with the expression, that means we
5020      // are using the whole length of the base.
5021      if (!OAE->getLength() && OAE->getColonLoc().isValid())
5022        return CGF.getTypeSize(BaseTy);
5023
5024      llvm::Value *ElemSize;
5025      if (auto *PTy = BaseTy->getAs<PointerType>())
5026        ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
5027      else {
5028        auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
5029        assert(ATy && "Expecting array type if not a pointer type.");
5030        ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
5031      }
5032
5033      // If we don't have a length at this point, that is because we have an
5034      // array section with a single element.
5035      if (!OAE->getLength())
5036        return ElemSize;
5037
5038      auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
5039      LengthVal =
5040          CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
5041      return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
5042    }
5043    return CGF.getTypeSize(ExprTy);
5044  }
5045
5046  /// \brief Return the corresponding bits for a given map clause modifier. Add
5047  /// a flag marking the map as a pointer if requested. Add a flag marking the
5048  /// map as the first one of a series of maps that relate to the same map
5049  /// expression.
5050  unsigned getMapTypeBits(OpenMPMapClauseKind MapType,
5051                          OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
5052                          bool AddIsFirstFlag) const {
5053    unsigned Bits = 0u;
5054    switch (MapType) {
5055    case OMPC_MAP_alloc:
5056    case OMPC_MAP_release:
5057      // alloc and release is the default behavior in the runtime library,  i.e.
5058      // if we don't pass any bits alloc/release that is what the runtime is
5059      // going to do. Therefore, we don't need to signal anything for these two
5060      // type modifiers.
5061      break;
5062    case OMPC_MAP_to:
5063      Bits = OMP_MAP_TO;
5064      break;
5065    case OMPC_MAP_from:
5066      Bits = OMP_MAP_FROM;
5067      break;
5068    case OMPC_MAP_tofrom:
5069      Bits = OMP_MAP_TO | OMP_MAP_FROM;
5070      break;
5071    case OMPC_MAP_delete:
5072      Bits = OMP_MAP_DELETE;
5073      break;
5074    default:
5075      llvm_unreachable("Unexpected map type!");
5076      break;
5077    }
5078    if (AddPtrFlag)
5079      Bits |= OMP_MAP_IS_PTR;
5080    if (AddIsFirstFlag)
5081      Bits |= OMP_MAP_FIRST_REF;
5082    if (MapTypeModifier == OMPC_MAP_always)
5083      Bits |= OMP_MAP_ALWAYS;
5084    return Bits;
5085  }
5086
5087  /// \brief Return true if the provided expression is a final array section. A
5088  /// final array section, is one whose length can't be proved to be one.
5089  bool isFinalArraySectionExpression(const Expr *E) const {
5090    auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
5091
5092    // It is not an array section and therefore not a unity-size one.
5093    if (!OASE)
5094      return false;
5095
5096    // An array section with no colon always refer to a single element.
5097    if (OASE->getColonLoc().isInvalid())
5098      return false;
5099
5100    auto *Length = OASE->getLength();
5101
5102    // If we don't have a length we have to check if the array has size 1
5103    // for this dimension. Also, we should always expect a length if the
5104    // base type is pointer.
5105    if (!Length) {
5106      auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
5107                         OASE->getBase()->IgnoreParenImpCasts())
5108                         .getCanonicalType();
5109      if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
5110        return ATy->getSize().getSExtValue() != 1;
5111      // If we don't have a constant dimension length, we have to consider
5112      // the current section as having any size, so it is not necessarily
5113      // unitary. If it happen to be unity size, that's user fault.
5114      return true;
5115    }
5116
5117    // Check if the length evaluates to 1.
5118    llvm::APSInt ConstLength;
5119    if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
5120      return true; // Can have more that size 1.
5121
5122    return ConstLength.getSExtValue() != 1;
5123  }
5124
5125  /// \brief Generate the base pointers, section pointers, sizes and map type
5126  /// bits for the provided map type, map modifier, and expression components.
5127  /// \a IsFirstComponent should be set to true if the provided set of
5128  /// components is the first associated with a capture.
5129  void generateInfoForComponentList(
5130      OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
5131      OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
5132      MapValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
5133      MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
5134      bool IsFirstComponentList) const {
5135
5136    // The following summarizes what has to be generated for each map and the
5137    // types bellow. The generated information is expressed in this order:
5138    // base pointer, section pointer, size, flags
5139    // (to add to the ones that come from the map type and modifier).
5140    //
5141    // double d;
5142    // int i[100];
5143    // float *p;
5144    //
5145    // struct S1 {
5146    //   int i;
5147    //   float f[50];
5148    // }
5149    // struct S2 {
5150    //   int i;
5151    //   float f[50];
5152    //   S1 s;
5153    //   double *p;
5154    //   struct S2 *ps;
5155    // }
5156    // S2 s;
5157    // S2 *ps;
5158    //
5159    // map(d)
5160    // &d, &d, sizeof(double), noflags
5161    //
5162    // map(i)
5163    // &i, &i, 100*sizeof(int), noflags
5164    //
5165    // map(i[1:23])
5166    // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
5167    //
5168    // map(p)
5169    // &p, &p, sizeof(float*), noflags
5170    //
5171    // map(p[1:24])
5172    // p, &p[1], 24*sizeof(float), noflags
5173    //
5174    // map(s)
5175    // &s, &s, sizeof(S2), noflags
5176    //
5177    // map(s.i)
5178    // &s, &(s.i), sizeof(int), noflags
5179    //
5180    // map(s.s.f)
5181    // &s, &(s.i.f), 50*sizeof(int), noflags
5182    //
5183    // map(s.p)
5184    // &s, &(s.p), sizeof(double*), noflags
5185    //
5186    // map(s.p[:22], s.a s.b)
5187    // &s, &(s.p), sizeof(double*), noflags
5188    // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag
5189    //
5190    // map(s.ps)
5191    // &s, &(s.ps), sizeof(S2*), noflags
5192    //
5193    // map(s.ps->s.i)
5194    // &s, &(s.ps), sizeof(S2*), noflags
5195    // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag
5196    //
5197    // map(s.ps->ps)
5198    // &s, &(s.ps), sizeof(S2*), noflags
5199    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5200    //
5201    // map(s.ps->ps->ps)
5202    // &s, &(s.ps), sizeof(S2*), noflags
5203    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5204    // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5205    //
5206    // map(s.ps->ps->s.f[:22])
5207    // &s, &(s.ps), sizeof(S2*), noflags
5208    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
5209    // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag
5210    //
5211    // map(ps)
5212    // &ps, &ps, sizeof(S2*), noflags
5213    //
5214    // map(ps->i)
5215    // ps, &(ps->i), sizeof(int), noflags
5216    //
5217    // map(ps->s.f)
5218    // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
5219    //
5220    // map(ps->p)
5221    // ps, &(ps->p), sizeof(double*), noflags
5222    //
5223    // map(ps->p[:22])
5224    // ps, &(ps->p), sizeof(double*), noflags
5225    // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag
5226    //
5227    // map(ps->ps)
5228    // ps, &(ps->ps), sizeof(S2*), noflags
5229    //
5230    // map(ps->ps->s.i)
5231    // ps, &(ps->ps), sizeof(S2*), noflags
5232    // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag
5233    //
5234    // map(ps->ps->ps)
5235    // ps, &(ps->ps), sizeof(S2*), noflags
5236    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5237    //
5238    // map(ps->ps->ps->ps)
5239    // ps, &(ps->ps), sizeof(S2*), noflags
5240    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5241    // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5242    //
5243    // map(ps->ps->ps->s.f[:22])
5244    // ps, &(ps->ps), sizeof(S2*), noflags
5245    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
5246    // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag +
5247    // extra_flag
5248
5249    // Track if the map information being generated is the first for a capture.
5250    bool IsCaptureFirstInfo = IsFirstComponentList;
5251
5252    // Scan the components from the base to the complete expression.
5253    auto CI = Components.rbegin();
5254    auto CE = Components.rend();
5255    auto I = CI;
5256
5257    // Track if the map information being generated is the first for a list of
5258    // components.
5259    bool IsExpressionFirstInfo = true;
5260    llvm::Value *BP = nullptr;
5261
5262    if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
5263      // The base is the 'this' pointer. The content of the pointer is going
5264      // to be the base of the field being mapped.
5265      BP = CGF.EmitScalarExpr(ME->getBase());
5266    } else {
5267      // The base is the reference to the variable.
5268      // BP = &Var.
5269      BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression()))
5270               .getPointer();
5271
5272      // If the variable is a pointer and is being dereferenced (i.e. is not
5273      // the last component), the base has to be the pointer itself, not its
5274      // reference.
5275      if (I->getAssociatedDeclaration()->getType()->isAnyPointerType() &&
5276          std::next(I) != CE) {
5277        auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(
5278            BP, I->getAssociatedDeclaration()->getType());
5279        BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
5280                                         I->getAssociatedDeclaration()
5281                                             ->getType()
5282                                             ->getAs<PointerType>())
5283                 .getPointer();
5284
5285        // We do not need to generate individual map information for the
5286        // pointer, it can be associated with the combined storage.
5287        ++I;
5288      }
5289    }
5290
5291    for (; I != CE; ++I) {
5292      auto Next = std::next(I);
5293
5294      // We need to generate the addresses and sizes if this is the last
5295      // component, if the component is a pointer or if it is an array section
5296      // whose length can't be proved to be one. If this is a pointer, it
5297      // becomes the base address for the following components.
5298
5299      // A final array section, is one whose length can't be proved to be one.
5300      bool IsFinalArraySection =
5301          isFinalArraySectionExpression(I->getAssociatedExpression());
5302
5303      // Get information on whether the element is a pointer. Have to do a
5304      // special treatment for array sections given that they are built-in
5305      // types.
5306      const auto *OASE =
5307          dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
5308      bool IsPointer =
5309          (OASE &&
5310           OMPArraySectionExpr::getBaseOriginalType(OASE)
5311               .getCanonicalType()
5312               ->isAnyPointerType()) ||
5313          I->getAssociatedExpression()->getType()->isAnyPointerType();
5314
5315      if (Next == CE || IsPointer || IsFinalArraySection) {
5316
5317        // If this is not the last component, we expect the pointer to be
5318        // associated with an array expression or member expression.
5319        assert((Next == CE ||
5320                isa<MemberExpr>(Next->getAssociatedExpression()) ||
5321                isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
5322                isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
5323               "Unexpected expression");
5324
5325        // Save the base we are currently using.
5326        BasePointers.push_back(BP);
5327
5328        auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer();
5329        auto *Size = getExprTypeSize(I->getAssociatedExpression());
5330
5331        Pointers.push_back(LB);
5332        Sizes.push_back(Size);
5333        // We need to add a pointer flag for each map that comes from the
5334        // same expression except for the first one. We also need to signal
5335        // this map is the first one that relates with the current capture
5336        // (there is a set of entries for each capture).
5337        Types.push_back(getMapTypeBits(MapType, MapTypeModifier,
5338                                       !IsExpressionFirstInfo,
5339                                       IsCaptureFirstInfo));
5340
5341        // If we have a final array section, we are done with this expression.
5342        if (IsFinalArraySection)
5343          break;
5344
5345        // The pointer becomes the base for the next element.
5346        if (Next != CE)
5347          BP = LB;
5348
5349        IsExpressionFirstInfo = false;
5350        IsCaptureFirstInfo = false;
5351        continue;
5352      }
5353    }
5354  }
5355
5356  /// \brief Return the adjusted map modifiers if the declaration a capture
5357  /// refers to appears in a first-private clause. This is expected to be used
5358  /// only with directives that start with 'target'.
5359  unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
5360                                               unsigned CurrentModifiers) {
5361    assert(Cap.capturesVariable() && "Expected capture by reference only!");
5362
5363    // A first private variable captured by reference will use only the
5364    // 'private ptr' and 'map to' flag. Return the right flags if the captured
5365    // declaration is known as first-private in this handler.
5366    if (FirstPrivateDecls.count(Cap.getCapturedVar()))
5367      return MappableExprsHandler::OMP_MAP_PRIVATE_PTR |
5368             MappableExprsHandler::OMP_MAP_TO;
5369
5370    // We didn't modify anything.
5371    return CurrentModifiers;
5372  }
5373
5374public:
5375  MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
5376      : Directive(Dir), CGF(CGF) {
5377    // Extract firstprivate clause information.
5378    for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
5379      for (const auto *D : C->varlists())
5380        FirstPrivateDecls.insert(
5381            cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
5382  }
5383
5384  /// \brief Generate all the base pointers, section pointers, sizes and map
5385  /// types for the extracted mappable expressions.
5386  void generateAllInfo(MapValuesArrayTy &BasePointers,
5387                       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
5388                       MapFlagsArrayTy &Types) const {
5389    BasePointers.clear();
5390    Pointers.clear();
5391    Sizes.clear();
5392    Types.clear();
5393
5394    struct MapInfo {
5395      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
5396      OpenMPMapClauseKind MapType;
5397      OpenMPMapClauseKind MapTypeModifier;
5398    };
5399
5400    // We have to process the component lists that relate with the same
5401    // declaration in a single chunk so that we can generate the map flags
5402    // correctly. Therefore, we organize all lists in a map.
5403    llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
5404
5405    // Helper function to fill the information map for the different supported
5406    // clauses.
5407    auto &&InfoGen =
5408        [&Info](const ValueDecl *D,
5409                OMPClauseMappableExprCommon::MappableExprComponentListRef L,
5410                OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier) {
5411          const ValueDecl *VD =
5412              D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
5413          Info[VD].push_back({L, MapType, MapModifier});
5414        };
5415
5416    for (auto *C : Directive.getClausesOfKind<OMPMapClause>())
5417      for (auto L : C->component_lists())
5418        InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier());
5419    for (auto *C : Directive.getClausesOfKind<OMPToClause>())
5420      for (auto L : C->component_lists())
5421        InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown);
5422    for (auto *C : Directive.getClausesOfKind<OMPFromClause>())
5423      for (auto L : C->component_lists())
5424        InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown);
5425
5426    for (auto &M : Info) {
5427      // We need to know when we generate information for the first component
5428      // associated with a capture, because the mapping flags depend on it.
5429      bool IsFirstComponentList = true;
5430      for (MapInfo &L : M.second) {
5431        assert(!L.Components.empty() &&
5432               "Not expecting declaration with no component lists.");
5433        generateInfoForComponentList(L.MapType, L.MapTypeModifier, L.Components,
5434                                     BasePointers, Pointers, Sizes, Types,
5435                                     IsFirstComponentList);
5436        IsFirstComponentList = false;
5437      }
5438    }
5439  }
5440
5441  /// \brief Generate the base pointers, section pointers, sizes and map types
5442  /// associated to a given capture.
5443  void generateInfoForCapture(const CapturedStmt::Capture *Cap,
5444                              MapValuesArrayTy &BasePointers,
5445                              MapValuesArrayTy &Pointers,
5446                              MapValuesArrayTy &Sizes,
5447                              MapFlagsArrayTy &Types) const {
5448    assert(!Cap->capturesVariableArrayType() &&
5449           "Not expecting to generate map info for a variable array type!");
5450
5451    BasePointers.clear();
5452    Pointers.clear();
5453    Sizes.clear();
5454    Types.clear();
5455
5456    const ValueDecl *VD =
5457        Cap->capturesThis()
5458            ? nullptr
5459            : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
5460
5461    // We need to know when we generating information for the first component
5462    // associated with a capture, because the mapping flags depend on it.
5463    bool IsFirstComponentList = true;
5464    for (auto *C : Directive.getClausesOfKind<OMPMapClause>())
5465      for (auto L : C->decl_component_lists(VD)) {
5466        assert(L.first == VD &&
5467               "We got information for the wrong declaration??");
5468        assert(!L.second.empty() &&
5469               "Not expecting declaration with no component lists.");
5470        generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
5471                                     L.second, BasePointers, Pointers, Sizes,
5472                                     Types, IsFirstComponentList);
5473        IsFirstComponentList = false;
5474      }
5475
5476    return;
5477  }
5478
5479  /// \brief Generate the default map information for a given capture \a CI,
5480  /// record field declaration \a RI and captured value \a CV.
5481  void generateDefaultMapInfo(
5482      const CapturedStmt::Capture &CI, const FieldDecl &RI, llvm::Value *CV,
5483      MappableExprsHandler::MapValuesArrayTy &CurBasePointers,
5484      MappableExprsHandler::MapValuesArrayTy &CurPointers,
5485      MappableExprsHandler::MapValuesArrayTy &CurSizes,
5486      MappableExprsHandler::MapFlagsArrayTy &CurMapTypes) {
5487
5488    // Do the default mapping.
5489    if (CI.capturesThis()) {
5490      CurBasePointers.push_back(CV);
5491      CurPointers.push_back(CV);
5492      const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
5493      CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
5494      // Default map type.
5495      CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_TO |
5496                            MappableExprsHandler::OMP_MAP_FROM);
5497    } else if (CI.capturesVariableByCopy()) {
5498      CurBasePointers.push_back(CV);
5499      CurPointers.push_back(CV);
5500      if (!RI.getType()->isAnyPointerType()) {
5501        // We have to signal to the runtime captures passed by value that are
5502        // not pointers.
5503        CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL);
5504        CurSizes.push_back(CGF.getTypeSize(RI.getType()));
5505      } else {
5506        // Pointers are implicitly mapped with a zero size and no flags
5507        // (other than first map that is added for all implicit maps).
5508        CurMapTypes.push_back(0u);
5509        CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
5510      }
5511    } else {
5512      assert(CI.capturesVariable() && "Expected captured reference.");
5513      CurBasePointers.push_back(CV);
5514      CurPointers.push_back(CV);
5515
5516      const ReferenceType *PtrTy =
5517          cast<ReferenceType>(RI.getType().getTypePtr());
5518      QualType ElementType = PtrTy->getPointeeType();
5519      CurSizes.push_back(CGF.getTypeSize(ElementType));
5520      // The default map type for a scalar/complex type is 'to' because by
5521      // default the value doesn't have to be retrieved. For an aggregate
5522      // type, the default is 'tofrom'.
5523      CurMapTypes.push_back(ElementType->isAggregateType()
5524                                ? (MappableExprsHandler::OMP_MAP_TO |
5525                                   MappableExprsHandler::OMP_MAP_FROM)
5526                                : MappableExprsHandler::OMP_MAP_TO);
5527
5528      // If we have a capture by reference we may need to add the private
5529      // pointer flag if the base declaration shows in some first-private
5530      // clause.
5531      CurMapTypes.back() =
5532          adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
5533    }
5534    // Every default map produces a single argument, so, it is always the
5535    // first one.
5536    CurMapTypes.back() |= MappableExprsHandler::OMP_MAP_FIRST_REF;
5537  }
5538};
5539
5540enum OpenMPOffloadingReservedDeviceIDs {
5541  /// \brief Device ID if the device was not defined, runtime should get it
5542  /// from environment variables in the spec.
5543  OMP_DEVICEID_UNDEF = -1,
5544};
5545} // anonymous namespace
5546
5547/// \brief Emit the arrays used to pass the captures and map information to the
5548/// offloading runtime library. If there is no map or capture information,
5549/// return nullptr by reference.
5550static void
5551emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray,
5552                     llvm::Value *&PointersArray, llvm::Value *&SizesArray,
5553                     llvm::Value *&MapTypesArray,
5554                     MappableExprsHandler::MapValuesArrayTy &BasePointers,
5555                     MappableExprsHandler::MapValuesArrayTy &Pointers,
5556                     MappableExprsHandler::MapValuesArrayTy &Sizes,
5557                     MappableExprsHandler::MapFlagsArrayTy &MapTypes) {
5558  auto &CGM = CGF.CGM;
5559  auto &Ctx = CGF.getContext();
5560
5561  BasePointersArray = PointersArray = SizesArray = MapTypesArray = nullptr;
5562
5563  if (unsigned PointerNumVal = BasePointers.size()) {
5564    // Detect if we have any capture size requiring runtime evaluation of the
5565    // size so that a constant array could be eventually used.
5566    bool hasRuntimeEvaluationCaptureSize = false;
5567    for (auto *S : Sizes)
5568      if (!isa<llvm::Constant>(S)) {
5569        hasRuntimeEvaluationCaptureSize = true;
5570        break;
5571      }
5572
5573    llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
5574    QualType PointerArrayType =
5575        Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
5576                                 /*IndexTypeQuals=*/0);
5577
5578    BasePointersArray =
5579        CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
5580    PointersArray =
5581        CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
5582
5583    // If we don't have any VLA types or other types that require runtime
5584    // evaluation, we can use a constant array for the map sizes, otherwise we
5585    // need to fill up the arrays as we do for the pointers.
5586    if (hasRuntimeEvaluationCaptureSize) {
5587      QualType SizeArrayType = Ctx.getConstantArrayType(
5588          Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
5589          /*IndexTypeQuals=*/0);
5590      SizesArray =
5591          CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
5592    } else {
5593      // We expect all the sizes to be constant, so we collect them to create
5594      // a constant array.
5595      SmallVector<llvm::Constant *, 16> ConstSizes;
5596      for (auto S : Sizes)
5597        ConstSizes.push_back(cast<llvm::Constant>(S));
5598
5599      auto *SizesArrayInit = llvm::ConstantArray::get(
5600          llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
5601      auto *SizesArrayGbl = new llvm::GlobalVariable(
5602          CGM.getModule(), SizesArrayInit->getType(),
5603          /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
5604          SizesArrayInit, ".offload_sizes");
5605      SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
5606      SizesArray = SizesArrayGbl;
5607    }
5608
5609    // The map types are always constant so we don't need to generate code to
5610    // fill arrays. Instead, we create an array constant.
5611    llvm::Constant *MapTypesArrayInit =
5612        llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
5613    auto *MapTypesArrayGbl = new llvm::GlobalVariable(
5614        CGM.getModule(), MapTypesArrayInit->getType(),
5615        /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
5616        MapTypesArrayInit, ".offload_maptypes");
5617    MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
5618    MapTypesArray = MapTypesArrayGbl;
5619
5620    for (unsigned i = 0; i < PointerNumVal; ++i) {
5621      llvm::Value *BPVal = BasePointers[i];
5622      if (BPVal->getType()->isPointerTy())
5623        BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
5624      else {
5625        assert(BPVal->getType()->isIntegerTy() &&
5626               "If not a pointer, the value type must be an integer.");
5627        BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
5628      }
5629      llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
5630          llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
5631          0, i);
5632      Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
5633      CGF.Builder.CreateStore(BPVal, BPAddr);
5634
5635      llvm::Value *PVal = Pointers[i];
5636      if (PVal->getType()->isPointerTy())
5637        PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
5638      else {
5639        assert(PVal->getType()->isIntegerTy() &&
5640               "If not a pointer, the value type must be an integer.");
5641        PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
5642      }
5643      llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
5644          llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0,
5645          i);
5646      Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
5647      CGF.Builder.CreateStore(PVal, PAddr);
5648
5649      if (hasRuntimeEvaluationCaptureSize) {
5650        llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
5651            llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
5652            /*Idx0=*/0,
5653            /*Idx1=*/i);
5654        Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
5655        CGF.Builder.CreateStore(
5656            CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
5657            SAddr);
5658      }
5659    }
5660  }
5661}
5662/// \brief Emit the arguments to be passed to the runtime library based on the
5663/// arrays of pointers, sizes and map types.
5664static void emitOffloadingArraysArgument(
5665    CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
5666    llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
5667    llvm::Value *&MapTypesArrayArg, llvm::Value *BasePointersArray,
5668    llvm::Value *PointersArray, llvm::Value *SizesArray,
5669    llvm::Value *MapTypesArray, unsigned NumElems) {
5670  auto &CGM = CGF.CGM;
5671  if (NumElems) {
5672    BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5673        llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), BasePointersArray,
5674        /*Idx0=*/0, /*Idx1=*/0);
5675    PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5676        llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), PointersArray,
5677        /*Idx0=*/0,
5678        /*Idx1=*/0);
5679    SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5680        llvm::ArrayType::get(CGM.SizeTy, NumElems), SizesArray,
5681        /*Idx0=*/0, /*Idx1=*/0);
5682    MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
5683        llvm::ArrayType::get(CGM.Int32Ty, NumElems), MapTypesArray,
5684        /*Idx0=*/0,
5685        /*Idx1=*/0);
5686  } else {
5687    BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
5688    PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
5689    SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
5690    MapTypesArrayArg =
5691        llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
5692  }
5693}
5694
5695void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
5696                                     const OMPExecutableDirective &D,
5697                                     llvm::Value *OutlinedFn,
5698                                     llvm::Value *OutlinedFnID,
5699                                     const Expr *IfCond, const Expr *Device,
5700                                     ArrayRef<llvm::Value *> CapturedVars) {
5701  if (!CGF.HaveInsertPoint())
5702    return;
5703
5704  assert(OutlinedFn && "Invalid outlined function!");
5705
5706  auto &Ctx = CGF.getContext();
5707
5708  // Fill up the arrays with all the captured variables.
5709  MappableExprsHandler::MapValuesArrayTy KernelArgs;
5710  MappableExprsHandler::MapValuesArrayTy BasePointers;
5711  MappableExprsHandler::MapValuesArrayTy Pointers;
5712  MappableExprsHandler::MapValuesArrayTy Sizes;
5713  MappableExprsHandler::MapFlagsArrayTy MapTypes;
5714
5715  MappableExprsHandler::MapValuesArrayTy CurBasePointers;
5716  MappableExprsHandler::MapValuesArrayTy CurPointers;
5717  MappableExprsHandler::MapValuesArrayTy CurSizes;
5718  MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
5719
5720  // Get mappable expression information.
5721  MappableExprsHandler MEHandler(D, CGF);
5722
5723  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5724  auto RI = CS.getCapturedRecordDecl()->field_begin();
5725  auto CV = CapturedVars.begin();
5726  for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
5727                                            CE = CS.capture_end();
5728       CI != CE; ++CI, ++RI, ++CV) {
5729    StringRef Name;
5730    QualType Ty;
5731
5732    CurBasePointers.clear();
5733    CurPointers.clear();
5734    CurSizes.clear();
5735    CurMapTypes.clear();
5736
5737    // VLA sizes are passed to the outlined region by copy and do not have map
5738    // information associated.
5739    if (CI->capturesVariableArrayType()) {
5740      CurBasePointers.push_back(*CV);
5741      CurPointers.push_back(*CV);
5742      CurSizes.push_back(CGF.getTypeSize(RI->getType()));
5743      // Copy to the device as an argument. No need to retrieve it.
5744      CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL |
5745                            MappableExprsHandler::OMP_MAP_FIRST_REF);
5746    } else {
5747      // If we have any information in the map clause, we use it, otherwise we
5748      // just do a default mapping.
5749      MEHandler.generateInfoForCapture(CI, CurBasePointers, CurPointers,
5750                                       CurSizes, CurMapTypes);
5751      if (CurBasePointers.empty())
5752        MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
5753                                         CurPointers, CurSizes, CurMapTypes);
5754    }
5755    // We expect to have at least an element of information for this capture.
5756    assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
5757    assert(CurBasePointers.size() == CurPointers.size() &&
5758           CurBasePointers.size() == CurSizes.size() &&
5759           CurBasePointers.size() == CurMapTypes.size() &&
5760           "Inconsistent map information sizes!");
5761
5762    // The kernel args are always the first elements of the base pointers
5763    // associated with a capture.
5764    KernelArgs.push_back(CurBasePointers.front());
5765    // We need to append the results of this capture to what we already have.
5766    BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
5767    Pointers.append(CurPointers.begin(), CurPointers.end());
5768    Sizes.append(CurSizes.begin(), CurSizes.end());
5769    MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
5770  }
5771
5772  // Keep track on whether the host function has to be executed.
5773  auto OffloadErrorQType =
5774      Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
5775  auto OffloadError = CGF.MakeAddrLValue(
5776      CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
5777      OffloadErrorQType);
5778  CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
5779                        OffloadError);
5780
5781  // Fill up the pointer arrays and transfer execution to the device.
5782  auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
5783                    OutlinedFnID, OffloadError, OffloadErrorQType,
5784                    &D](CodeGenFunction &CGF, PrePostActionTy &) {
5785    auto &RT = CGF.CGM.getOpenMPRuntime();
5786    // Emit the offloading arrays.
5787    llvm::Value *BasePointersArray;
5788    llvm::Value *PointersArray;
5789    llvm::Value *SizesArray;
5790    llvm::Value *MapTypesArray;
5791    emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray,
5792                         MapTypesArray, BasePointers, Pointers, Sizes,
5793                         MapTypes);
5794    emitOffloadingArraysArgument(CGF, BasePointersArray, PointersArray,
5795                                 SizesArray, MapTypesArray, BasePointersArray,
5796                                 PointersArray, SizesArray, MapTypesArray,
5797                                 BasePointers.size());
5798
5799    // On top of the arrays that were filled up, the target offloading call
5800    // takes as arguments the device id as well as the host pointer. The host
5801    // pointer is used by the runtime library to identify the current target
5802    // region, so it only has to be unique and not necessarily point to
5803    // anything. It could be the pointer to the outlined function that
5804    // implements the target region, but we aren't using that so that the
5805    // compiler doesn't need to keep that, and could therefore inline the host
5806    // function if proven worthwhile during optimization.
5807
5808    // From this point on, we need to have an ID of the target region defined.
5809    assert(OutlinedFnID && "Invalid outlined function ID!");
5810
5811    // Emit device ID if any.
5812    llvm::Value *DeviceID;
5813    if (Device)
5814      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5815                                           CGF.Int32Ty, /*isSigned=*/true);
5816    else
5817      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
5818
5819    // Emit the number of elements in the offloading arrays.
5820    llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
5821
5822    // Return value of the runtime offloading call.
5823    llvm::Value *Return;
5824
5825    auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D);
5826    auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D);
5827
5828    // If we have NumTeams defined this means that we have an enclosed teams
5829    // region. Therefore we also expect to have ThreadLimit defined. These two
5830    // values should be defined in the presence of a teams directive, regardless
5831    // of having any clauses associated. If the user is using teams but no
5832    // clauses, these two values will be the default that should be passed to
5833    // the runtime library - a 32-bit integer with the value zero.
5834    if (NumTeams) {
5835      assert(ThreadLimit && "Thread limit expression should be available along "
5836                            "with number of teams.");
5837      llvm::Value *OffloadingArgs[] = {
5838          DeviceID,          OutlinedFnID,  PointerNum,
5839          BasePointersArray, PointersArray, SizesArray,
5840          MapTypesArray,     NumTeams,      ThreadLimit};
5841      Return = CGF.EmitRuntimeCall(
5842          RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
5843    } else {
5844      llvm::Value *OffloadingArgs[] = {
5845          DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
5846          PointersArray, SizesArray,   MapTypesArray};
5847      Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
5848                                   OffloadingArgs);
5849    }
5850
5851    CGF.EmitStoreOfScalar(Return, OffloadError);
5852  };
5853
5854  // Notify that the host version must be executed.
5855  auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) {
5856    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u),
5857                          OffloadError);
5858  };
5859
5860  // If we have a target function ID it means that we need to support
5861  // offloading, otherwise, just execute on the host. We need to execute on host
5862  // regardless of the conditional in the if clause if, e.g., the user do not
5863  // specify target triples.
5864  if (OutlinedFnID) {
5865    if (IfCond)
5866      emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
5867    else {
5868      RegionCodeGenTy ThenRCG(ThenGen);
5869      ThenRCG(CGF);
5870    }
5871  } else {
5872    RegionCodeGenTy ElseRCG(ElseGen);
5873    ElseRCG(CGF);
5874  }
5875
5876  // Check the error code and execute the host version if required.
5877  auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
5878  auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
5879  auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
5880  auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
5881  CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
5882
5883  CGF.EmitBlock(OffloadFailedBlock);
5884  CGF.Builder.CreateCall(OutlinedFn, KernelArgs);
5885  CGF.EmitBranch(OffloadContBlock);
5886
5887  CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
5888}
5889
5890void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
5891                                                    StringRef ParentName) {
5892  if (!S)
5893    return;
5894
5895  // If we find a OMP target directive, codegen the outline function and
5896  // register the result.
5897  // FIXME: Add other directives with target when they become supported.
5898  bool isTargetDirective = isa<OMPTargetDirective>(S);
5899
5900  if (isTargetDirective) {
5901    auto *E = cast<OMPExecutableDirective>(S);
5902    unsigned DeviceID;
5903    unsigned FileID;
5904    unsigned Line;
5905    getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
5906                             FileID, Line);
5907
5908    // Is this a target region that should not be emitted as an entry point? If
5909    // so just signal we are done with this target region.
5910    if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
5911                                                            ParentName, Line))
5912      return;
5913
5914    llvm::Function *Fn;
5915    llvm::Constant *Addr;
5916    std::tie(Fn, Addr) =
5917        CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
5918            CGM, cast<OMPTargetDirective>(*E), ParentName,
5919            /*isOffloadEntry=*/true);
5920    assert(Fn && Addr && "Target region emission failed.");
5921    return;
5922  }
5923
5924  if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
5925    if (!E->hasAssociatedStmt())
5926      return;
5927
5928    scanForTargetRegionsFunctions(
5929        cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
5930        ParentName);
5931    return;
5932  }
5933
5934  // If this is a lambda function, look into its body.
5935  if (auto *L = dyn_cast<LambdaExpr>(S))
5936    S = L->getBody();
5937
5938  // Keep looking for target regions recursively.
5939  for (auto *II : S->children())
5940    scanForTargetRegionsFunctions(II, ParentName);
5941}
5942
5943bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
5944  auto &FD = *cast<FunctionDecl>(GD.getDecl());
5945
5946  // If emitting code for the host, we do not process FD here. Instead we do
5947  // the normal code generation.
5948  if (!CGM.getLangOpts().OpenMPIsDevice)
5949    return false;
5950
5951  // Try to detect target regions in the function.
5952  scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
5953
5954  // We should not emit any function othen that the ones created during the
5955  // scanning. Therefore, we signal that this function is completely dealt
5956  // with.
5957  return true;
5958}
5959
5960bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
5961  if (!CGM.getLangOpts().OpenMPIsDevice)
5962    return false;
5963
5964  // Check if there are Ctors/Dtors in this declaration and look for target
5965  // regions in it. We use the complete variant to produce the kernel name
5966  // mangling.
5967  QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
5968  if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
5969    for (auto *Ctor : RD->ctors()) {
5970      StringRef ParentName =
5971          CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
5972      scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
5973    }
5974    auto *Dtor = RD->getDestructor();
5975    if (Dtor) {
5976      StringRef ParentName =
5977          CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
5978      scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
5979    }
5980  }
5981
5982  // If we are in target mode we do not emit any global (declare target is not
5983  // implemented yet). Therefore we signal that GD was processed in this case.
5984  return true;
5985}
5986
5987bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
5988  auto *VD = GD.getDecl();
5989  if (isa<FunctionDecl>(VD))
5990    return emitTargetFunctions(GD);
5991
5992  return emitTargetGlobalVariable(GD);
5993}
5994
5995llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
5996  // If we have offloading in the current module, we need to emit the entries
5997  // now and register the offloading descriptor.
5998  createOffloadEntriesAndInfoMetadata();
5999
6000  // Create and register the offloading binary descriptors. This is the main
6001  // entity that captures all the information about offloading in the current
6002  // compilation unit.
6003  return createOffloadingBinaryDescriptorRegistration();
6004}
6005
6006void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
6007                                    const OMPExecutableDirective &D,
6008                                    SourceLocation Loc,
6009                                    llvm::Value *OutlinedFn,
6010                                    ArrayRef<llvm::Value *> CapturedVars) {
6011  if (!CGF.HaveInsertPoint())
6012    return;
6013
6014  auto *RTLoc = emitUpdateLocation(CGF, Loc);
6015  CodeGenFunction::RunCleanupsScope Scope(CGF);
6016
6017  // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
6018  llvm::Value *Args[] = {
6019      RTLoc,
6020      CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
6021      CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
6022  llvm::SmallVector<llvm::Value *, 16> RealArgs;
6023  RealArgs.append(std::begin(Args), std::end(Args));
6024  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
6025
6026  auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
6027  CGF.EmitRuntimeCall(RTLFn, RealArgs);
6028}
6029
6030void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
6031                                         const Expr *NumTeams,
6032                                         const Expr *ThreadLimit,
6033                                         SourceLocation Loc) {
6034  if (!CGF.HaveInsertPoint())
6035    return;
6036
6037  auto *RTLoc = emitUpdateLocation(CGF, Loc);
6038
6039  llvm::Value *NumTeamsVal =
6040      (NumTeams)
6041          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
6042                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
6043          : CGF.Builder.getInt32(0);
6044
6045  llvm::Value *ThreadLimitVal =
6046      (ThreadLimit)
6047          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
6048                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
6049          : CGF.Builder.getInt32(0);
6050
6051  // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
6052  llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
6053                                     ThreadLimitVal};
6054  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
6055                      PushNumTeamsArgs);
6056}
6057
6058void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF,
6059                                          const OMPExecutableDirective &D,
6060                                          const Expr *IfCond,
6061                                          const Expr *Device,
6062                                          const RegionCodeGenTy &CodeGen) {
6063
6064  if (!CGF.HaveInsertPoint())
6065    return;
6066
6067  llvm::Value *BasePointersArray = nullptr;
6068  llvm::Value *PointersArray = nullptr;
6069  llvm::Value *SizesArray = nullptr;
6070  llvm::Value *MapTypesArray = nullptr;
6071  unsigned NumOfPtrs = 0;
6072
6073  // Generate the code for the opening of the data environment. Capture all the
6074  // arguments of the runtime call by reference because they are used in the
6075  // closing of the region.
6076  auto &&BeginThenGen = [&D, &CGF, &BasePointersArray, &PointersArray,
6077                         &SizesArray, &MapTypesArray, Device,
6078                         &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
6079    // Fill up the arrays with all the mapped variables.
6080    MappableExprsHandler::MapValuesArrayTy BasePointers;
6081    MappableExprsHandler::MapValuesArrayTy Pointers;
6082    MappableExprsHandler::MapValuesArrayTy Sizes;
6083    MappableExprsHandler::MapFlagsArrayTy MapTypes;
6084
6085    // Get map clause information.
6086    MappableExprsHandler MCHandler(D, CGF);
6087    MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
6088    NumOfPtrs = BasePointers.size();
6089
6090    // Fill up the arrays and create the arguments.
6091    emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray,
6092                         MapTypesArray, BasePointers, Pointers, Sizes,
6093                         MapTypes);
6094
6095    llvm::Value *BasePointersArrayArg = nullptr;
6096    llvm::Value *PointersArrayArg = nullptr;
6097    llvm::Value *SizesArrayArg = nullptr;
6098    llvm::Value *MapTypesArrayArg = nullptr;
6099    emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
6100                                 SizesArrayArg, MapTypesArrayArg,
6101                                 BasePointersArray, PointersArray, SizesArray,
6102                                 MapTypesArray, NumOfPtrs);
6103
6104    // Emit device ID if any.
6105    llvm::Value *DeviceID = nullptr;
6106    if (Device)
6107      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6108                                           CGF.Int32Ty, /*isSigned=*/true);
6109    else
6110      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6111
6112    // Emit the number of elements in the offloading arrays.
6113    auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs);
6114
6115    llvm::Value *OffloadingArgs[] = {
6116        DeviceID,         PointerNum,    BasePointersArrayArg,
6117        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6118    auto &RT = CGF.CGM.getOpenMPRuntime();
6119    CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
6120                        OffloadingArgs);
6121  };
6122
6123  // Generate code for the closing of the data region.
6124  auto &&EndThenGen = [&CGF, &BasePointersArray, &PointersArray, &SizesArray,
6125                       &MapTypesArray, Device,
6126                       &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
6127    assert(BasePointersArray && PointersArray && SizesArray && MapTypesArray &&
6128           NumOfPtrs && "Invalid data environment closing arguments.");
6129
6130    llvm::Value *BasePointersArrayArg = nullptr;
6131    llvm::Value *PointersArrayArg = nullptr;
6132    llvm::Value *SizesArrayArg = nullptr;
6133    llvm::Value *MapTypesArrayArg = nullptr;
6134    emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
6135                                 SizesArrayArg, MapTypesArrayArg,
6136                                 BasePointersArray, PointersArray, SizesArray,
6137                                 MapTypesArray, NumOfPtrs);
6138
6139    // Emit device ID if any.
6140    llvm::Value *DeviceID = nullptr;
6141    if (Device)
6142      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6143                                           CGF.Int32Ty, /*isSigned=*/true);
6144    else
6145      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6146
6147    // Emit the number of elements in the offloading arrays.
6148    auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs);
6149
6150    llvm::Value *OffloadingArgs[] = {
6151        DeviceID,         PointerNum,    BasePointersArrayArg,
6152        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6153    auto &RT = CGF.CGM.getOpenMPRuntime();
6154    CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end),
6155                        OffloadingArgs);
6156  };
6157
6158  // In the event we get an if clause, we don't have to take any action on the
6159  // else side.
6160  auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
6161
6162  if (IfCond) {
6163    emitOMPIfClause(CGF, IfCond, BeginThenGen, ElseGen);
6164  } else {
6165    RegionCodeGenTy BeginThenRCG(BeginThenGen);
6166    BeginThenRCG(CGF);
6167  }
6168
6169  CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, CodeGen);
6170
6171  if (IfCond) {
6172    emitOMPIfClause(CGF, IfCond, EndThenGen, ElseGen);
6173  } else {
6174    RegionCodeGenTy EndThenRCG(EndThenGen);
6175    EndThenRCG(CGF);
6176  }
6177}
6178
6179void CGOpenMPRuntime::emitTargetDataStandAloneCall(
6180    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
6181    const Expr *Device) {
6182  if (!CGF.HaveInsertPoint())
6183    return;
6184
6185  assert((isa<OMPTargetEnterDataDirective>(D) ||
6186          isa<OMPTargetExitDataDirective>(D) ||
6187          isa<OMPTargetUpdateDirective>(D)) &&
6188         "Expecting either target enter, exit data, or update directives.");
6189
6190  // Generate the code for the opening of the data environment.
6191  auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
6192    // Fill up the arrays with all the mapped variables.
6193    MappableExprsHandler::MapValuesArrayTy BasePointers;
6194    MappableExprsHandler::MapValuesArrayTy Pointers;
6195    MappableExprsHandler::MapValuesArrayTy Sizes;
6196    MappableExprsHandler::MapFlagsArrayTy MapTypes;
6197
6198    // Get map clause information.
6199    MappableExprsHandler MEHandler(D, CGF);
6200    MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
6201
6202    llvm::Value *BasePointersArrayArg = nullptr;
6203    llvm::Value *PointersArrayArg = nullptr;
6204    llvm::Value *SizesArrayArg = nullptr;
6205    llvm::Value *MapTypesArrayArg = nullptr;
6206
6207    // Fill up the arrays and create the arguments.
6208    emitOffloadingArrays(CGF, BasePointersArrayArg, PointersArrayArg,
6209                         SizesArrayArg, MapTypesArrayArg, BasePointers,
6210                         Pointers, Sizes, MapTypes);
6211    emitOffloadingArraysArgument(
6212        CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
6213        MapTypesArrayArg, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
6214        MapTypesArrayArg, BasePointers.size());
6215
6216    // Emit device ID if any.
6217    llvm::Value *DeviceID = nullptr;
6218    if (Device)
6219      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6220                                           CGF.Int32Ty, /*isSigned=*/true);
6221    else
6222      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6223
6224    // Emit the number of elements in the offloading arrays.
6225    auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
6226
6227    llvm::Value *OffloadingArgs[] = {
6228        DeviceID,         PointerNum,    BasePointersArrayArg,
6229        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
6230
6231    auto &RT = CGF.CGM.getOpenMPRuntime();
6232    // Select the right runtime function call for each expected standalone
6233    // directive.
6234    OpenMPRTLFunction RTLFn;
6235    switch (D.getDirectiveKind()) {
6236    default:
6237      llvm_unreachable("Unexpected standalone target data directive.");
6238      break;
6239    case OMPD_target_enter_data:
6240      RTLFn = OMPRTL__tgt_target_data_begin;
6241      break;
6242    case OMPD_target_exit_data:
6243      RTLFn = OMPRTL__tgt_target_data_end;
6244      break;
6245    case OMPD_target_update:
6246      RTLFn = OMPRTL__tgt_target_data_update;
6247      break;
6248    }
6249    CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs);
6250  };
6251
6252  // In the event we get an if clause, we don't have to take any action on the
6253  // else side.
6254  auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
6255
6256  if (IfCond) {
6257    emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
6258  } else {
6259    RegionCodeGenTy ThenGenRCG(ThenGen);
6260    ThenGenRCG(CGF);
6261  }
6262}
6263
6264namespace {
6265  /// Kind of parameter in a function with 'declare simd' directive.
6266  enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
6267  /// Attribute set of the parameter.
6268  struct ParamAttrTy {
6269    ParamKindTy Kind = Vector;
6270    llvm::APSInt StrideOrArg;
6271    llvm::APSInt Alignment;
6272  };
6273} // namespace
6274
6275static unsigned evaluateCDTSize(const FunctionDecl *FD,
6276                                ArrayRef<ParamAttrTy> ParamAttrs) {
6277  // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
6278  // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
6279  // of that clause. The VLEN value must be power of 2.
6280  // In other case the notion of the function`s "characteristic data type" (CDT)
6281  // is used to compute the vector length.
6282  // CDT is defined in the following order:
6283  //   a) For non-void function, the CDT is the return type.
6284  //   b) If the function has any non-uniform, non-linear parameters, then the
6285  //   CDT is the type of the first such parameter.
6286  //   c) If the CDT determined by a) or b) above is struct, union, or class
6287  //   type which is pass-by-value (except for the type that maps to the
6288  //   built-in complex data type), the characteristic data type is int.
6289  //   d) If none of the above three cases is applicable, the CDT is int.
6290  // The VLEN is then determined based on the CDT and the size of vector
6291  // register of that ISA for which current vector version is generated. The
6292  // VLEN is computed using the formula below:
6293  //   VLEN  = sizeof(vector_register) / sizeof(CDT),
6294  // where vector register size specified in section 3.2.1 Registers and the
6295  // Stack Frame of original AMD64 ABI document.
6296  QualType RetType = FD->getReturnType();
6297  if (RetType.isNull())
6298    return 0;
6299  ASTContext &C = FD->getASTContext();
6300  QualType CDT;
6301  if (!RetType.isNull() && !RetType->isVoidType())
6302    CDT = RetType;
6303  else {
6304    unsigned Offset = 0;
6305    if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
6306      if (ParamAttrs[Offset].Kind == Vector)
6307        CDT = C.getPointerType(C.getRecordType(MD->getParent()));
6308      ++Offset;
6309    }
6310    if (CDT.isNull()) {
6311      for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
6312        if (ParamAttrs[I + Offset].Kind == Vector) {
6313          CDT = FD->getParamDecl(I)->getType();
6314          break;
6315        }
6316      }
6317    }
6318  }
6319  if (CDT.isNull())
6320    CDT = C.IntTy;
6321  CDT = CDT->getCanonicalTypeUnqualified();
6322  if (CDT->isRecordType() || CDT->isUnionType())
6323    CDT = C.IntTy;
6324  return C.getTypeSize(CDT);
6325}
6326
6327static void
6328emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
6329                           llvm::APSInt VLENVal,
6330                           ArrayRef<ParamAttrTy> ParamAttrs,
6331                           OMPDeclareSimdDeclAttr::BranchStateTy State) {
6332  struct ISADataTy {
6333    char ISA;
6334    unsigned VecRegSize;
6335  };
6336  ISADataTy ISAData[] = {
6337      {
6338          'b', 128
6339      }, // SSE
6340      {
6341          'c', 256
6342      }, // AVX
6343      {
6344          'd', 256
6345      }, // AVX2
6346      {
6347          'e', 512
6348      }, // AVX512
6349  };
6350  llvm::SmallVector<char, 2> Masked;
6351  switch (State) {
6352  case OMPDeclareSimdDeclAttr::BS_Undefined:
6353    Masked.push_back('N');
6354    Masked.push_back('M');
6355    break;
6356  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
6357    Masked.push_back('N');
6358    break;
6359  case OMPDeclareSimdDeclAttr::BS_Inbranch:
6360    Masked.push_back('M');
6361    break;
6362  }
6363  for (auto Mask : Masked) {
6364    for (auto &Data : ISAData) {
6365      SmallString<256> Buffer;
6366      llvm::raw_svector_ostream Out(Buffer);
6367      Out << "_ZGV" << Data.ISA << Mask;
6368      if (!VLENVal) {
6369        Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
6370                                         evaluateCDTSize(FD, ParamAttrs));
6371      } else
6372        Out << VLENVal;
6373      for (auto &ParamAttr : ParamAttrs) {
6374        switch (ParamAttr.Kind){
6375        case LinearWithVarStride:
6376          Out << 's' << ParamAttr.StrideOrArg;
6377          break;
6378        case Linear:
6379          Out << 'l';
6380          if (!!ParamAttr.StrideOrArg)
6381            Out << ParamAttr.StrideOrArg;
6382          break;
6383        case Uniform:
6384          Out << 'u';
6385          break;
6386        case Vector:
6387          Out << 'v';
6388          break;
6389        }
6390        if (!!ParamAttr.Alignment)
6391          Out << 'a' << ParamAttr.Alignment;
6392      }
6393      Out << '_' << Fn->getName();
6394      Fn->addFnAttr(Out.str());
6395    }
6396  }
6397}
6398
6399void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
6400                                              llvm::Function *Fn) {
6401  ASTContext &C = CGM.getContext();
6402  FD = FD->getCanonicalDecl();
6403  // Map params to their positions in function decl.
6404  llvm::DenseMap<const Decl *, unsigned> ParamPositions;
6405  if (isa<CXXMethodDecl>(FD))
6406    ParamPositions.insert({FD, 0});
6407  unsigned ParamPos = ParamPositions.size();
6408  for (auto *P : FD->parameters()) {
6409    ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
6410    ++ParamPos;
6411  }
6412  for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
6413    llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
6414    // Mark uniform parameters.
6415    for (auto *E : Attr->uniforms()) {
6416      E = E->IgnoreParenImpCasts();
6417      unsigned Pos;
6418      if (isa<CXXThisExpr>(E))
6419        Pos = ParamPositions[FD];
6420      else {
6421        auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6422                        ->getCanonicalDecl();
6423        Pos = ParamPositions[PVD];
6424      }
6425      ParamAttrs[Pos].Kind = Uniform;
6426    }
6427    // Get alignment info.
6428    auto NI = Attr->alignments_begin();
6429    for (auto *E : Attr->aligneds()) {
6430      E = E->IgnoreParenImpCasts();
6431      unsigned Pos;
6432      QualType ParmTy;
6433      if (isa<CXXThisExpr>(E)) {
6434        Pos = ParamPositions[FD];
6435        ParmTy = E->getType();
6436      } else {
6437        auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6438                        ->getCanonicalDecl();
6439        Pos = ParamPositions[PVD];
6440        ParmTy = PVD->getType();
6441      }
6442      ParamAttrs[Pos].Alignment =
6443          (*NI) ? (*NI)->EvaluateKnownConstInt(C)
6444                : llvm::APSInt::getUnsigned(
6445                      C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
6446                          .getQuantity());
6447      ++NI;
6448    }
6449    // Mark linear parameters.
6450    auto SI = Attr->steps_begin();
6451    auto MI = Attr->modifiers_begin();
6452    for (auto *E : Attr->linears()) {
6453      E = E->IgnoreParenImpCasts();
6454      unsigned Pos;
6455      if (isa<CXXThisExpr>(E))
6456        Pos = ParamPositions[FD];
6457      else {
6458        auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
6459                        ->getCanonicalDecl();
6460        Pos = ParamPositions[PVD];
6461      }
6462      auto &ParamAttr = ParamAttrs[Pos];
6463      ParamAttr.Kind = Linear;
6464      if (*SI) {
6465        if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
6466                                  Expr::SE_AllowSideEffects)) {
6467          if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
6468            if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
6469              ParamAttr.Kind = LinearWithVarStride;
6470              ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
6471                  ParamPositions[StridePVD->getCanonicalDecl()]);
6472            }
6473          }
6474        }
6475      }
6476      ++SI;
6477      ++MI;
6478    }
6479    llvm::APSInt VLENVal;
6480    if (const Expr *VLEN = Attr->getSimdlen())
6481      VLENVal = VLEN->EvaluateKnownConstInt(C);
6482    OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
6483    if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
6484        CGM.getTriple().getArch() == llvm::Triple::x86_64)
6485      emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
6486  }
6487}
6488
6489namespace {
6490/// Cleanup action for doacross support.
6491class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
6492public:
6493  static const int DoacrossFinArgs = 2;
6494
6495private:
6496  llvm::Value *RTLFn;
6497  llvm::Value *Args[DoacrossFinArgs];
6498
6499public:
6500  DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
6501      : RTLFn(RTLFn) {
6502    assert(CallArgs.size() == DoacrossFinArgs);
6503    std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
6504  }
6505  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
6506    if (!CGF.HaveInsertPoint())
6507      return;
6508    CGF.EmitRuntimeCall(RTLFn, Args);
6509  }
6510};
6511} // namespace
6512
6513void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
6514                                       const OMPLoopDirective &D) {
6515  if (!CGF.HaveInsertPoint())
6516    return;
6517
6518  ASTContext &C = CGM.getContext();
6519  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
6520  RecordDecl *RD;
6521  if (KmpDimTy.isNull()) {
6522    // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
6523    //  kmp_int64 lo; // lower
6524    //  kmp_int64 up; // upper
6525    //  kmp_int64 st; // stride
6526    // };
6527    RD = C.buildImplicitRecord("kmp_dim");
6528    RD->startDefinition();
6529    addFieldToRecordDecl(C, RD, Int64Ty);
6530    addFieldToRecordDecl(C, RD, Int64Ty);
6531    addFieldToRecordDecl(C, RD, Int64Ty);
6532    RD->completeDefinition();
6533    KmpDimTy = C.getRecordType(RD);
6534  } else
6535    RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
6536
6537  Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
6538  CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
6539  enum { LowerFD = 0, UpperFD, StrideFD };
6540  // Fill dims with data.
6541  LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
6542  // dims.upper = num_iterations;
6543  LValue UpperLVal =
6544      CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
6545  llvm::Value *NumIterVal = CGF.EmitScalarConversion(
6546      CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
6547      Int64Ty, D.getNumIterations()->getExprLoc());
6548  CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
6549  // dims.stride = 1;
6550  LValue StrideLVal =
6551      CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
6552  CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
6553                        StrideLVal);
6554
6555  // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
6556  // kmp_int32 num_dims, struct kmp_dim * dims);
6557  llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
6558                         getThreadID(CGF, D.getLocStart()),
6559                         llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
6560                         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6561                             DimsAddr.getPointer(), CGM.VoidPtrTy)};
6562
6563  llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
6564  CGF.EmitRuntimeCall(RTLFn, Args);
6565  llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
6566      emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
6567  llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
6568  CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
6569                                             llvm::makeArrayRef(FiniArgs));
6570}
6571
6572void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
6573                                          const OMPDependClause *C) {
6574  QualType Int64Ty =
6575      CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
6576  const Expr *CounterVal = C->getCounterValue();
6577  assert(CounterVal);
6578  llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
6579                                                 CounterVal->getType(), Int64Ty,
6580                                                 CounterVal->getExprLoc());
6581  Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
6582  CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
6583  llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
6584                         getThreadID(CGF, C->getLocStart()),
6585                         CntAddr.getPointer()};
6586  llvm::Value *RTLFn;
6587  if (C->getDependencyKind() == OMPC_DEPEND_source)
6588    RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
6589  else {
6590    assert(C->getDependencyKind() == OMPC_DEPEND_sink);
6591    RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
6592  }
6593  CGF.EmitRuntimeCall(RTLFn, Args);
6594}
6595
6596