1//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This contains code to emit OpenMP nodes as LLVM code.
11//
12//===----------------------------------------------------------------------===//
13
14#include "CGOpenMPRuntime.h"
15#include "CodeGenFunction.h"
16#include "CodeGenModule.h"
17#include "TargetInfo.h"
18#include "clang/AST/Stmt.h"
19#include "clang/AST/StmtOpenMP.h"
20using namespace clang;
21using namespace CodeGen;
22
23//===----------------------------------------------------------------------===//
24//                              OpenMP Directive Emission
25//===----------------------------------------------------------------------===//
26/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
27/// function. Here is the logic:
28/// if (Cond) {
29///   CodeGen(true);
30/// } else {
31///   CodeGen(false);
32/// }
33static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
34                            const std::function<void(bool)> &CodeGen) {
35  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
36
37  // If the condition constant folds and can be elided, try to avoid emitting
38  // the condition and the dead arm of the if/else.
39  bool CondConstant;
40  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
41    CodeGen(CondConstant);
42    return;
43  }
44
45  // Otherwise, the condition did not fold, or we couldn't elide it.  Just
46  // emit the conditional branch.
47  auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then");
48  auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else");
49  auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end");
50  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0);
51
52  // Emit the 'then' code.
53  CGF.EmitBlock(ThenBlock);
54  CodeGen(/*ThenBlock*/ true);
55  CGF.EmitBranch(ContBlock);
56  // Emit the 'else' code if present.
57  {
58    // There is no need to emit line number for unconditional branch.
59    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
60    CGF.EmitBlock(ElseBlock);
61  }
62  CodeGen(/*ThenBlock*/ false);
63  {
64    // There is no need to emit line number for unconditional branch.
65    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
66    CGF.EmitBranch(ContBlock);
67  }
68  // Emit the continuation block for code after the if.
69  CGF.EmitBlock(ContBlock, /*IsFinished*/ true);
70}
71
72void CodeGenFunction::EmitOMPAggregateAssign(
73    llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType,
74    const llvm::function_ref<void(llvm::Value *, llvm::Value *)> &CopyGen) {
75  // Perform element-by-element initialization.
76  QualType ElementTy;
77  auto SrcBegin = SrcAddr;
78  auto DestBegin = DestAddr;
79  auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
80  auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin);
81  // Cast from pointer to array type to pointer to single element.
82  SrcBegin = Builder.CreatePointerBitCastOrAddrSpaceCast(SrcBegin,
83                                                         DestBegin->getType());
84  auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
85  // The basic structure here is a while-do loop.
86  auto BodyBB = createBasicBlock("omp.arraycpy.body");
87  auto DoneBB = createBasicBlock("omp.arraycpy.done");
88  auto IsEmpty =
89      Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
90  Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
91
92  // Enter the loop body, making that address the current address.
93  auto EntryBB = Builder.GetInsertBlock();
94  EmitBlock(BodyBB);
95  auto SrcElementCurrent =
96      Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
97  SrcElementCurrent->addIncoming(SrcBegin, EntryBB);
98  auto DestElementCurrent = Builder.CreatePHI(DestBegin->getType(), 2,
99                                              "omp.arraycpy.destElementPast");
100  DestElementCurrent->addIncoming(DestBegin, EntryBB);
101
102  // Emit copy.
103  CopyGen(DestElementCurrent, SrcElementCurrent);
104
105  // Shift the address forward by one element.
106  auto DestElementNext = Builder.CreateConstGEP1_32(
107      DestElementCurrent, /*Idx0=*/1, "omp.arraycpy.dest.element");
108  auto SrcElementNext = Builder.CreateConstGEP1_32(
109      SrcElementCurrent, /*Idx0=*/1, "omp.arraycpy.src.element");
110  // Check whether we've reached the end.
111  auto Done =
112      Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
113  Builder.CreateCondBr(Done, DoneBB, BodyBB);
114  DestElementCurrent->addIncoming(DestElementNext, Builder.GetInsertBlock());
115  SrcElementCurrent->addIncoming(SrcElementNext, Builder.GetInsertBlock());
116
117  // Done.
118  EmitBlock(DoneBB, /*IsFinished=*/true);
119}
120
121void CodeGenFunction::EmitOMPCopy(CodeGenFunction &CGF,
122                                  QualType OriginalType, llvm::Value *DestAddr,
123                                  llvm::Value *SrcAddr, const VarDecl *DestVD,
124                                  const VarDecl *SrcVD, const Expr *Copy) {
125  if (OriginalType->isArrayType()) {
126    auto *BO = dyn_cast<BinaryOperator>(Copy);
127    if (BO && BO->getOpcode() == BO_Assign) {
128      // Perform simple memcpy for simple copying.
129      CGF.EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
130    } else {
131      // For arrays with complex element types perform element by element
132      // copying.
133      CGF.EmitOMPAggregateAssign(
134          DestAddr, SrcAddr, OriginalType,
135          [&CGF, Copy, SrcVD, DestVD](llvm::Value *DestElement,
136                                          llvm::Value *SrcElement) {
137            // Working with the single array element, so have to remap
138            // destination and source variables to corresponding array
139            // elements.
140            CodeGenFunction::OMPPrivateScope Remap(CGF);
141            Remap.addPrivate(DestVD, [DestElement]() -> llvm::Value *{
142              return DestElement;
143            });
144            Remap.addPrivate(
145                SrcVD, [SrcElement]() -> llvm::Value *{ return SrcElement; });
146            (void)Remap.Privatize();
147            CGF.EmitIgnoredExpr(Copy);
148          });
149    }
150  } else {
151    // Remap pseudo source variable to private copy.
152    CodeGenFunction::OMPPrivateScope Remap(CGF);
153    Remap.addPrivate(SrcVD, [SrcAddr]() -> llvm::Value *{ return SrcAddr; });
154    Remap.addPrivate(DestVD, [DestAddr]() -> llvm::Value *{ return DestAddr; });
155    (void)Remap.Privatize();
156    // Emit copying of the whole variable.
157    CGF.EmitIgnoredExpr(Copy);
158  }
159}
160
161bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
162                                                OMPPrivateScope &PrivateScope) {
163  auto FirstprivateFilter = [](const OMPClause *C) -> bool {
164    return C->getClauseKind() == OMPC_firstprivate;
165  };
166  llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
167  for (OMPExecutableDirective::filtered_clause_iterator<decltype(
168           FirstprivateFilter)> I(D.clauses(), FirstprivateFilter);
169       I; ++I) {
170    auto *C = cast<OMPFirstprivateClause>(*I);
171    auto IRef = C->varlist_begin();
172    auto InitsRef = C->inits().begin();
173    for (auto IInit : C->private_copies()) {
174      auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
175      if (EmittedAsFirstprivate.count(OrigVD) == 0) {
176        EmittedAsFirstprivate.insert(OrigVD);
177        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
178        auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
179        bool IsRegistered;
180        DeclRefExpr DRE(
181            const_cast<VarDecl *>(OrigVD),
182            /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
183                OrigVD) != nullptr,
184            (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
185        auto *OriginalAddr = EmitLValue(&DRE).getAddress();
186        if (OrigVD->getType()->isArrayType()) {
187          // Emit VarDecl with copy init for arrays.
188          // Get the address of the original variable captured in current
189          // captured region.
190          IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
191            auto Emission = EmitAutoVarAlloca(*VD);
192            auto *Init = VD->getInit();
193            if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
194              // Perform simple memcpy.
195              EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
196                                  (*IRef)->getType());
197            } else {
198              EmitOMPAggregateAssign(
199                  Emission.getAllocatedAddress(), OriginalAddr,
200                  (*IRef)->getType(),
201                  [this, VDInit, Init](llvm::Value *DestElement,
202                                       llvm::Value *SrcElement) {
203                    // Clean up any temporaries needed by the initialization.
204                    RunCleanupsScope InitScope(*this);
205                    // Emit initialization for single element.
206                    LocalDeclMap[VDInit] = SrcElement;
207                    EmitAnyExprToMem(Init, DestElement,
208                                     Init->getType().getQualifiers(),
209                                     /*IsInitializer*/ false);
210                    LocalDeclMap.erase(VDInit);
211                  });
212            }
213            EmitAutoVarCleanups(Emission);
214            return Emission.getAllocatedAddress();
215          });
216        } else {
217          IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
218            // Emit private VarDecl with copy init.
219            // Remap temp VDInit variable to the address of the original
220            // variable
221            // (for proper handling of captured global variables).
222            LocalDeclMap[VDInit] = OriginalAddr;
223            EmitDecl(*VD);
224            LocalDeclMap.erase(VDInit);
225            return GetAddrOfLocalVar(VD);
226          });
227        }
228        assert(IsRegistered &&
229               "firstprivate var already registered as private");
230        // Silence the warning about unused variable.
231        (void)IsRegistered;
232      }
233      ++IRef, ++InitsRef;
234    }
235  }
236  return !EmittedAsFirstprivate.empty();
237}
238
239void CodeGenFunction::EmitOMPPrivateClause(
240    const OMPExecutableDirective &D,
241    CodeGenFunction::OMPPrivateScope &PrivateScope) {
242  auto PrivateFilter = [](const OMPClause *C) -> bool {
243    return C->getClauseKind() == OMPC_private;
244  };
245  for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
246           I(D.clauses(), PrivateFilter); I; ++I) {
247    auto *C = cast<OMPPrivateClause>(*I);
248    auto IRef = C->varlist_begin();
249    for (auto IInit : C->private_copies()) {
250      auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
251      auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
252      bool IsRegistered =
253          PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
254            // Emit private VarDecl with copy init.
255            EmitDecl(*VD);
256            return GetAddrOfLocalVar(VD);
257          });
258      assert(IsRegistered && "private var already registered as private");
259      // Silence the warning about unused variable.
260      (void)IsRegistered;
261      ++IRef;
262    }
263  }
264}
265
266bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
267  // threadprivate_var1 = master_threadprivate_var1;
268  // operator=(threadprivate_var2, master_threadprivate_var2);
269  // ...
270  // __kmpc_barrier(&loc, global_tid);
271  auto CopyinFilter = [](const OMPClause *C) -> bool {
272    return C->getClauseKind() == OMPC_copyin;
273  };
274  llvm::DenseSet<const VarDecl *> CopiedVars;
275  llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
276  for (OMPExecutableDirective::filtered_clause_iterator<decltype(CopyinFilter)>
277           I(D.clauses(), CopyinFilter);
278       I; ++I) {
279    auto *C = cast<OMPCopyinClause>(*I);
280    auto IRef = C->varlist_begin();
281    auto ISrcRef = C->source_exprs().begin();
282    auto IDestRef = C->destination_exprs().begin();
283    for (auto *AssignOp : C->assignment_ops()) {
284      auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
285      if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
286        // Get the address of the master variable.
287        auto *MasterAddr = VD->isStaticLocal()
288                               ? CGM.getStaticLocalDeclAddress(VD)
289                               : CGM.GetAddrOfGlobal(VD);
290        // Get the address of the threadprivate variable.
291        auto *PrivateAddr = EmitLValue(*IRef).getAddress();
292        if (CopiedVars.size() == 1) {
293          // At first check if current thread is a master thread. If it is, no
294          // need to copy data.
295          CopyBegin = createBasicBlock("copyin.not.master");
296          CopyEnd = createBasicBlock("copyin.not.master.end");
297          Builder.CreateCondBr(
298              Builder.CreateICmpNE(
299                  Builder.CreatePtrToInt(MasterAddr, CGM.IntPtrTy),
300                  Builder.CreatePtrToInt(PrivateAddr, CGM.IntPtrTy)),
301              CopyBegin, CopyEnd);
302          EmitBlock(CopyBegin);
303        }
304        auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
305        auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
306        EmitOMPCopy(*this, (*IRef)->getType(), PrivateAddr, MasterAddr, DestVD,
307                    SrcVD, AssignOp);
308      }
309      ++IRef;
310      ++ISrcRef;
311      ++IDestRef;
312    }
313  }
314  if (CopyEnd) {
315    // Exit out of copying procedure for non-master thread.
316    EmitBlock(CopyEnd, /*IsFinished=*/true);
317    return true;
318  }
319  return false;
320}
321
322bool CodeGenFunction::EmitOMPLastprivateClauseInit(
323    const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
324  auto LastprivateFilter = [](const OMPClause *C) -> bool {
325    return C->getClauseKind() == OMPC_lastprivate;
326  };
327  bool HasAtLeastOneLastprivate = false;
328  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
329  for (OMPExecutableDirective::filtered_clause_iterator<decltype(
330           LastprivateFilter)> I(D.clauses(), LastprivateFilter);
331       I; ++I) {
332    auto *C = cast<OMPLastprivateClause>(*I);
333    auto IRef = C->varlist_begin();
334    auto IDestRef = C->destination_exprs().begin();
335    for (auto *IInit : C->private_copies()) {
336      // Keep the address of the original variable for future update at the end
337      // of the loop.
338      auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
339      if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
340        auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
341        PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> llvm::Value *{
342          DeclRefExpr DRE(
343              const_cast<VarDecl *>(OrigVD),
344              /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
345                  OrigVD) != nullptr,
346              (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
347          return EmitLValue(&DRE).getAddress();
348        });
349        // Check if the variable is also a firstprivate: in this case IInit is
350        // not generated. Initialization of this variable will happen in codegen
351        // for 'firstprivate' clause.
352        if (!IInit)
353          continue;
354        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
355        bool IsRegistered =
356            PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
357              // Emit private VarDecl with copy init.
358              EmitDecl(*VD);
359              return GetAddrOfLocalVar(VD);
360            });
361        assert(IsRegistered && "lastprivate var already registered as private");
362        HasAtLeastOneLastprivate = HasAtLeastOneLastprivate || IsRegistered;
363      }
364      ++IRef, ++IDestRef;
365    }
366  }
367  return HasAtLeastOneLastprivate;
368}
369
370void CodeGenFunction::EmitOMPLastprivateClauseFinal(
371    const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) {
372  // Emit following code:
373  // if (<IsLastIterCond>) {
374  //   orig_var1 = private_orig_var1;
375  //   ...
376  //   orig_varn = private_orig_varn;
377  // }
378  auto *ThenBB = createBasicBlock(".omp.lastprivate.then");
379  auto *DoneBB = createBasicBlock(".omp.lastprivate.done");
380  Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
381  EmitBlock(ThenBB);
382  {
383    auto LastprivateFilter = [](const OMPClause *C) -> bool {
384      return C->getClauseKind() == OMPC_lastprivate;
385    };
386    llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
387    for (OMPExecutableDirective::filtered_clause_iterator<decltype(
388             LastprivateFilter)> I(D.clauses(), LastprivateFilter);
389         I; ++I) {
390      auto *C = cast<OMPLastprivateClause>(*I);
391      auto IRef = C->varlist_begin();
392      auto ISrcRef = C->source_exprs().begin();
393      auto IDestRef = C->destination_exprs().begin();
394      for (auto *AssignOp : C->assignment_ops()) {
395        auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
396        if (AlreadyEmittedVars.insert(PrivateVD->getCanonicalDecl()).second) {
397          auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
398          auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
399          // Get the address of the original variable.
400          auto *OriginalAddr = GetAddrOfLocalVar(DestVD);
401          // Get the address of the private variable.
402          auto *PrivateAddr = GetAddrOfLocalVar(PrivateVD);
403          EmitOMPCopy(*this, (*IRef)->getType(), OriginalAddr, PrivateAddr,
404                      DestVD, SrcVD, AssignOp);
405        }
406        ++IRef;
407        ++ISrcRef;
408        ++IDestRef;
409      }
410    }
411  }
412  EmitBlock(DoneBB, /*IsFinished=*/true);
413}
414
415void CodeGenFunction::EmitOMPReductionClauseInit(
416    const OMPExecutableDirective &D,
417    CodeGenFunction::OMPPrivateScope &PrivateScope) {
418  auto ReductionFilter = [](const OMPClause *C) -> bool {
419    return C->getClauseKind() == OMPC_reduction;
420  };
421  for (OMPExecutableDirective::filtered_clause_iterator<decltype(
422           ReductionFilter)> I(D.clauses(), ReductionFilter);
423       I; ++I) {
424    auto *C = cast<OMPReductionClause>(*I);
425    auto ILHS = C->lhs_exprs().begin();
426    auto IRHS = C->rhs_exprs().begin();
427    for (auto IRef : C->varlists()) {
428      auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
429      auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
430      auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
431      // Store the address of the original variable associated with the LHS
432      // implicit variable.
433      PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{
434        DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
435                        CapturedStmtInfo->lookup(OrigVD) != nullptr,
436                        IRef->getType(), VK_LValue, IRef->getExprLoc());
437        return EmitLValue(&DRE).getAddress();
438      });
439      // Emit reduction copy.
440      bool IsRegistered =
441          PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{
442            // Emit private VarDecl with reduction init.
443            EmitDecl(*PrivateVD);
444            return GetAddrOfLocalVar(PrivateVD);
445          });
446      assert(IsRegistered && "private var already registered as private");
447      // Silence the warning about unused variable.
448      (void)IsRegistered;
449      ++ILHS, ++IRHS;
450    }
451  }
452}
453
454void CodeGenFunction::EmitOMPReductionClauseFinal(
455    const OMPExecutableDirective &D) {
456  llvm::SmallVector<const Expr *, 8> LHSExprs;
457  llvm::SmallVector<const Expr *, 8> RHSExprs;
458  llvm::SmallVector<const Expr *, 8> ReductionOps;
459  auto ReductionFilter = [](const OMPClause *C) -> bool {
460    return C->getClauseKind() == OMPC_reduction;
461  };
462  bool HasAtLeastOneReduction = false;
463  for (OMPExecutableDirective::filtered_clause_iterator<decltype(
464           ReductionFilter)> I(D.clauses(), ReductionFilter);
465       I; ++I) {
466    HasAtLeastOneReduction = true;
467    auto *C = cast<OMPReductionClause>(*I);
468    LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
469    RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
470    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
471  }
472  if (HasAtLeastOneReduction) {
473    // Emit nowait reduction if nowait clause is present or directive is a
474    // parallel directive (it always has implicit barrier).
475    CGM.getOpenMPRuntime().emitReduction(
476        *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps,
477        D.getSingleClause(OMPC_nowait) ||
478            isOpenMPParallelDirective(D.getDirectiveKind()));
479  }
480}
481
482/// \brief Emits code for OpenMP parallel directive in the parallel region.
483static void emitOMPParallelCall(CodeGenFunction &CGF,
484                                const OMPExecutableDirective &S,
485                                llvm::Value *OutlinedFn,
486                                llvm::Value *CapturedStruct) {
487  if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
488    CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
489    auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
490    auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
491                                         /*IgnoreResultAssign*/ true);
492    CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
493        CGF, NumThreads, NumThreadsClause->getLocStart());
494  }
495  CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
496                                              CapturedStruct);
497}
498
499static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
500                                           const OMPExecutableDirective &S,
501                                           const RegionCodeGenTy &CodeGen) {
502  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
503  auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS);
504  auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
505      S, *CS->getCapturedDecl()->param_begin(), CodeGen);
506  if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
507    auto Cond = cast<OMPIfClause>(C)->getCondition();
508    EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) {
509      if (ThenBlock)
510        emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
511      else
512        CGF.CGM.getOpenMPRuntime().emitSerialCall(CGF, S.getLocStart(),
513                                                  OutlinedFn, CapturedStruct);
514    });
515  } else
516    emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
517}
518
519void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
520  LexicalScope Scope(*this, S.getSourceRange());
521  // Emit parallel region as a standalone region.
522  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
523    OMPPrivateScope PrivateScope(CGF);
524    bool Copyins = CGF.EmitOMPCopyinClause(S);
525    bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope);
526    if (Copyins || Firstprivates) {
527      // Emit implicit barrier to synchronize threads and avoid data races on
528      // initialization of firstprivate variables or propagation master's thread
529      // values of threadprivate variables to local instances of that variables
530      // of all other implicit threads.
531      CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
532                                                 OMPD_unknown);
533    }
534    CGF.EmitOMPPrivateClause(S, PrivateScope);
535    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
536    (void)PrivateScope.Privatize();
537    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
538    CGF.EmitOMPReductionClauseFinal(S);
539    // Emit implicit barrier at the end of the 'parallel' directive.
540    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
541                                               OMPD_unknown);
542  };
543  emitCommonOMPParallelDirective(*this, S, CodeGen);
544}
545
546void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
547                                      bool SeparateIter) {
548  RunCleanupsScope BodyScope(*this);
549  // Update counters values on current iteration.
550  for (auto I : S.updates()) {
551    EmitIgnoredExpr(I);
552  }
553  // Update the linear variables.
554  for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
555    for (auto U : C->updates()) {
556      EmitIgnoredExpr(U);
557    }
558  }
559
560  // On a continue in the body, jump to the end.
561  auto Continue = getJumpDestInCurrentScope("omp.body.continue");
562  BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue));
563  // Emit loop body.
564  EmitStmt(S.getBody());
565  // The end (updates/cleanups).
566  EmitBlock(Continue.getBlock());
567  BreakContinueStack.pop_back();
568  if (SeparateIter) {
569    // TODO: Update lastprivates if the SeparateIter flag is true.
570    // This will be implemented in a follow-up OMPLastprivateClause patch, but
571    // result should be still correct without it, as we do not make these
572    // variables private yet.
573  }
574}
575
576void CodeGenFunction::EmitOMPInnerLoop(
577    const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
578    const Expr *IncExpr,
579    const llvm::function_ref<void(CodeGenFunction &)> &BodyGen) {
580  auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
581  auto Cnt = getPGORegionCounter(&S);
582
583  // Start the loop with a block that tests the condition.
584  auto CondBlock = createBasicBlock("omp.inner.for.cond");
585  EmitBlock(CondBlock);
586  LoopStack.push(CondBlock);
587
588  // If there are any cleanups between here and the loop-exit scope,
589  // create a block to stage a loop exit along.
590  auto ExitBlock = LoopExit.getBlock();
591  if (RequiresCleanup)
592    ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
593
594  auto LoopBody = createBasicBlock("omp.inner.for.body");
595
596  // Emit condition.
597  EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount());
598  if (ExitBlock != LoopExit.getBlock()) {
599    EmitBlock(ExitBlock);
600    EmitBranchThroughCleanup(LoopExit);
601  }
602
603  EmitBlock(LoopBody);
604  Cnt.beginRegion(Builder);
605
606  // Create a block for the increment.
607  auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
608  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
609
610  BodyGen(*this);
611
612  // Emit "IV = IV + 1" and a back-edge to the condition block.
613  EmitBlock(Continue.getBlock());
614  EmitIgnoredExpr(IncExpr);
615  BreakContinueStack.pop_back();
616  EmitBranch(CondBlock);
617  LoopStack.pop();
618  // Emit the fall-through block.
619  EmitBlock(LoopExit.getBlock());
620}
621
622void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) {
623  auto IC = S.counters().begin();
624  for (auto F : S.finals()) {
625    if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) {
626      EmitIgnoredExpr(F);
627    }
628    ++IC;
629  }
630  // Emit the final values of the linear variables.
631  for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
632    for (auto F : C->finals()) {
633      EmitIgnoredExpr(F);
634    }
635  }
636}
637
638static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM,
639                                 const OMPAlignedClause &Clause) {
640  unsigned ClauseAlignment = 0;
641  if (auto AlignmentExpr = Clause.getAlignment()) {
642    auto AlignmentCI =
643        cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
644    ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
645  }
646  for (auto E : Clause.varlists()) {
647    unsigned Alignment = ClauseAlignment;
648    if (Alignment == 0) {
649      // OpenMP [2.8.1, Description]
650      // If no optional parameter is specified, implementation-defined default
651      // alignments for SIMD instructions on the target platforms are assumed.
652      Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment(
653          E->getType());
654    }
655    assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
656           "alignment is not power of 2");
657    if (Alignment != 0) {
658      llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
659      CGF.EmitAlignmentAssumption(PtrValue, Alignment);
660    }
661  }
662}
663
664static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
665                                    CodeGenFunction::OMPPrivateScope &LoopScope,
666                                    ArrayRef<Expr *> Counters) {
667  for (auto *E : Counters) {
668    auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
669    bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * {
670      // Emit var without initialization.
671      auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
672      CGF.EmitAutoVarCleanups(VarEmission);
673      return VarEmission.getAllocatedAddress();
674    });
675    assert(IsRegistered && "counter already registered as private");
676    // Silence the warning about unused variable.
677    (void)IsRegistered;
678  }
679}
680
681static void
682EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
683                      CodeGenFunction::OMPPrivateScope &PrivateScope) {
684  for (auto Clause : OMPExecutableDirective::linear_filter(D.clauses())) {
685    for (auto *E : Clause->varlists()) {
686      auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
687      bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * {
688        // Emit var without initialization.
689        auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
690        CGF.EmitAutoVarCleanups(VarEmission);
691        return VarEmission.getAllocatedAddress();
692      });
693      assert(IsRegistered && "linear var already registered as private");
694      // Silence the warning about unused variable.
695      (void)IsRegistered;
696    }
697  }
698}
699
700void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
701  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
702    // Pragma 'simd' code depends on presence of 'lastprivate'.
703    // If present, we have to separate last iteration of the loop:
704    //
705    // if (LastIteration != 0) {
706    //   for (IV in 0..LastIteration-1) BODY;
707    //   BODY with updates of lastprivate vars;
708    //   <Final counter/linear vars updates>;
709    // }
710    //
711    // otherwise (when there's no lastprivate):
712    //
713    //   for (IV in 0..LastIteration) BODY;
714    //   <Final counter/linear vars updates>;
715    //
716
717    // Walk clauses and process safelen/lastprivate.
718    bool SeparateIter = false;
719    CGF.LoopStack.setParallel();
720    CGF.LoopStack.setVectorizerEnable(true);
721    for (auto C : S.clauses()) {
722      switch (C->getClauseKind()) {
723      case OMPC_safelen: {
724        RValue Len = CGF.EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
725                                     AggValueSlot::ignored(), true);
726        llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
727        CGF.LoopStack.setVectorizerWidth(Val->getZExtValue());
728        // In presence of finite 'safelen', it may be unsafe to mark all
729        // the memory instructions parallel, because loop-carried
730        // dependences of 'safelen' iterations are possible.
731        CGF.LoopStack.setParallel(false);
732        break;
733      }
734      case OMPC_aligned:
735        EmitOMPAlignedClause(CGF, CGF.CGM, cast<OMPAlignedClause>(*C));
736        break;
737      case OMPC_lastprivate:
738        SeparateIter = true;
739        break;
740      default:
741        // Not handled yet
742        ;
743      }
744    }
745
746    // Emit inits for the linear variables.
747    for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
748      for (auto Init : C->inits()) {
749        auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
750        CGF.EmitVarDecl(*D);
751      }
752    }
753
754    // Emit the loop iteration variable.
755    const Expr *IVExpr = S.getIterationVariable();
756    const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
757    CGF.EmitVarDecl(*IVDecl);
758    CGF.EmitIgnoredExpr(S.getInit());
759
760    // Emit the iterations count variable.
761    // If it is not a variable, Sema decided to calculate iterations count on
762    // each
763    // iteration (e.g., it is foldable into a constant).
764    if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
765      CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
766      // Emit calculation of the iterations count.
767      CGF.EmitIgnoredExpr(S.getCalcLastIteration());
768    }
769
770    // Emit the linear steps for the linear clauses.
771    // If a step is not constant, it is pre-calculated before the loop.
772    for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
773      if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
774        if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
775          CGF.EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
776          // Emit calculation of the linear step.
777          CGF.EmitIgnoredExpr(CS);
778        }
779    }
780
781    if (SeparateIter) {
782      // Emit: if (LastIteration > 0) - begin.
783      RegionCounter Cnt = CGF.getPGORegionCounter(&S);
784      auto ThenBlock = CGF.createBasicBlock("simd.if.then");
785      auto ContBlock = CGF.createBasicBlock("simd.if.end");
786      CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock,
787                               Cnt.getCount());
788      CGF.EmitBlock(ThenBlock);
789      Cnt.beginRegion(CGF.Builder);
790      // Emit 'then' code.
791      {
792        OMPPrivateScope LoopScope(CGF);
793        EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
794        EmitPrivateLinearVars(CGF, S, LoopScope);
795        CGF.EmitOMPPrivateClause(S, LoopScope);
796        (void)LoopScope.Privatize();
797        CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
798                             S.getCond(/*SeparateIter=*/true), S.getInc(),
799                             [&S](CodeGenFunction &CGF) {
800                               CGF.EmitOMPLoopBody(S);
801                               CGF.EmitStopPoint(&S);
802                             });
803        CGF.EmitOMPLoopBody(S, /* SeparateIter */ true);
804      }
805      CGF.EmitOMPSimdFinal(S);
806      // Emit: if (LastIteration != 0) - end.
807      CGF.EmitBranch(ContBlock);
808      CGF.EmitBlock(ContBlock, true);
809    } else {
810      {
811        OMPPrivateScope LoopScope(CGF);
812        EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
813        EmitPrivateLinearVars(CGF, S, LoopScope);
814        CGF.EmitOMPPrivateClause(S, LoopScope);
815        (void)LoopScope.Privatize();
816        CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
817                             S.getCond(/*SeparateIter=*/false), S.getInc(),
818                             [&S](CodeGenFunction &CGF) {
819                               CGF.EmitOMPLoopBody(S);
820                               CGF.EmitStopPoint(&S);
821                             });
822      }
823      CGF.EmitOMPSimdFinal(S);
824    }
825  };
826  CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
827}
828
829void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
830                                          const OMPLoopDirective &S,
831                                          OMPPrivateScope &LoopScope,
832                                          llvm::Value *LB, llvm::Value *UB,
833                                          llvm::Value *ST, llvm::Value *IL,
834                                          llvm::Value *Chunk) {
835  auto &RT = CGM.getOpenMPRuntime();
836
837  // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
838  const bool Dynamic = RT.isDynamic(ScheduleKind);
839
840  assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
841         "static non-chunked schedule does not need outer loop");
842
843  // Emit outer loop.
844  //
845  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
846  // When schedule(dynamic,chunk_size) is specified, the iterations are
847  // distributed to threads in the team in chunks as the threads request them.
848  // Each thread executes a chunk of iterations, then requests another chunk,
849  // until no chunks remain to be distributed. Each chunk contains chunk_size
850  // iterations, except for the last chunk to be distributed, which may have
851  // fewer iterations. When no chunk_size is specified, it defaults to 1.
852  //
853  // When schedule(guided,chunk_size) is specified, the iterations are assigned
854  // to threads in the team in chunks as the executing threads request them.
855  // Each thread executes a chunk of iterations, then requests another chunk,
856  // until no chunks remain to be assigned. For a chunk_size of 1, the size of
857  // each chunk is proportional to the number of unassigned iterations divided
858  // by the number of threads in the team, decreasing to 1. For a chunk_size
859  // with value k (greater than 1), the size of each chunk is determined in the
860  // same way, with the restriction that the chunks do not contain fewer than k
861  // iterations (except for the last chunk to be assigned, which may have fewer
862  // than k iterations).
863  //
864  // When schedule(auto) is specified, the decision regarding scheduling is
865  // delegated to the compiler and/or runtime system. The programmer gives the
866  // implementation the freedom to choose any possible mapping of iterations to
867  // threads in the team.
868  //
869  // When schedule(runtime) is specified, the decision regarding scheduling is
870  // deferred until run time, and the schedule and chunk size are taken from the
871  // run-sched-var ICV. If the ICV is set to auto, the schedule is
872  // implementation defined
873  //
874  // while(__kmpc_dispatch_next(&LB, &UB)) {
875  //   idx = LB;
876  //   while (idx <= UB) { BODY; ++idx; } // inner loop
877  // }
878  //
879  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
880  // When schedule(static, chunk_size) is specified, iterations are divided into
881  // chunks of size chunk_size, and the chunks are assigned to the threads in
882  // the team in a round-robin fashion in the order of the thread number.
883  //
884  // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
885  //   while (idx <= UB) { BODY; ++idx; } // inner loop
886  //   LB = LB + ST;
887  //   UB = UB + ST;
888  // }
889  //
890
891  const Expr *IVExpr = S.getIterationVariable();
892  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
893  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
894
895  RT.emitForInit(
896      *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
897      (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST,
898      Chunk);
899
900  auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
901
902  // Start the loop with a block that tests the condition.
903  auto CondBlock = createBasicBlock("omp.dispatch.cond");
904  EmitBlock(CondBlock);
905  LoopStack.push(CondBlock);
906
907  llvm::Value *BoolCondVal = nullptr;
908  if (!Dynamic) {
909    // UB = min(UB, GlobalUB)
910    EmitIgnoredExpr(S.getEnsureUpperBound());
911    // IV = LB
912    EmitIgnoredExpr(S.getInit());
913    // IV < UB
914    BoolCondVal = EvaluateExprAsBool(S.getCond(false));
915  } else {
916    BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
917                                    IL, LB, UB, ST);
918  }
919
920  // If there are any cleanups between here and the loop-exit scope,
921  // create a block to stage a loop exit along.
922  auto ExitBlock = LoopExit.getBlock();
923  if (LoopScope.requiresCleanups())
924    ExitBlock = createBasicBlock("omp.dispatch.cleanup");
925
926  auto LoopBody = createBasicBlock("omp.dispatch.body");
927  Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
928  if (ExitBlock != LoopExit.getBlock()) {
929    EmitBlock(ExitBlock);
930    EmitBranchThroughCleanup(LoopExit);
931  }
932  EmitBlock(LoopBody);
933
934  // Emit "IV = LB" (in case of static schedule, we have already calculated new
935  // LB for loop condition and emitted it above).
936  if (Dynamic)
937    EmitIgnoredExpr(S.getInit());
938
939  // Create a block for the increment.
940  auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
941  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
942
943  EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
944                   S.getCond(/*SeparateIter=*/false), S.getInc(),
945                   [&S](CodeGenFunction &CGF) {
946                     CGF.EmitOMPLoopBody(S);
947                     CGF.EmitStopPoint(&S);
948                   });
949
950  EmitBlock(Continue.getBlock());
951  BreakContinueStack.pop_back();
952  if (!Dynamic) {
953    // Emit "LB = LB + Stride", "UB = UB + Stride".
954    EmitIgnoredExpr(S.getNextLowerBound());
955    EmitIgnoredExpr(S.getNextUpperBound());
956  }
957
958  EmitBranch(CondBlock);
959  LoopStack.pop();
960  // Emit the fall-through block.
961  EmitBlock(LoopExit.getBlock());
962
963  // Tell the runtime we are done.
964  // FIXME: Also call fini for ordered loops with dynamic scheduling.
965  if (!Dynamic)
966    RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
967}
968
969/// \brief Emit a helper variable and return corresponding lvalue.
970static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
971                               const DeclRefExpr *Helper) {
972  auto VDecl = cast<VarDecl>(Helper->getDecl());
973  CGF.EmitVarDecl(*VDecl);
974  return CGF.EmitLValue(Helper);
975}
976
977bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
978  // Emit the loop iteration variable.
979  auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
980  auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
981  EmitVarDecl(*IVDecl);
982
983  // Emit the iterations count variable.
984  // If it is not a variable, Sema decided to calculate iterations count on each
985  // iteration (e.g., it is foldable into a constant).
986  if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
987    EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
988    // Emit calculation of the iterations count.
989    EmitIgnoredExpr(S.getCalcLastIteration());
990  }
991
992  auto &RT = CGM.getOpenMPRuntime();
993
994  bool HasLastprivateClause;
995  // Check pre-condition.
996  {
997    // Skip the entire loop if we don't meet the precondition.
998    RegionCounter Cnt = getPGORegionCounter(&S);
999    auto ThenBlock = createBasicBlock("omp.precond.then");
1000    auto ContBlock = createBasicBlock("omp.precond.end");
1001    EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
1002    EmitBlock(ThenBlock);
1003    Cnt.beginRegion(Builder);
1004    // Emit 'then' code.
1005    {
1006      // Emit helper vars inits.
1007      LValue LB =
1008          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1009      LValue UB =
1010          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1011      LValue ST =
1012          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
1013      LValue IL =
1014          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
1015
1016      OMPPrivateScope LoopScope(*this);
1017      if (EmitOMPFirstprivateClause(S, LoopScope)) {
1018        // Emit implicit barrier to synchronize threads and avoid data races on
1019        // initialization of firstprivate variables.
1020        CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
1021                                               OMPD_unknown);
1022      }
1023      HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
1024      EmitPrivateLoopCounters(*this, LoopScope, S.counters());
1025      (void)LoopScope.Privatize();
1026
1027      // Detect the loop schedule kind and chunk.
1028      auto ScheduleKind = OMPC_SCHEDULE_unknown;
1029      llvm::Value *Chunk = nullptr;
1030      if (auto C = cast_or_null<OMPScheduleClause>(
1031              S.getSingleClause(OMPC_schedule))) {
1032        ScheduleKind = C->getScheduleKind();
1033        if (auto Ch = C->getChunkSize()) {
1034          Chunk = EmitScalarExpr(Ch);
1035          Chunk = EmitScalarConversion(Chunk, Ch->getType(),
1036                                       S.getIterationVariable()->getType());
1037        }
1038      }
1039      const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1040      const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1041      if (RT.isStaticNonchunked(ScheduleKind,
1042                                /* Chunked */ Chunk != nullptr)) {
1043        // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1044        // When no chunk_size is specified, the iteration space is divided into
1045        // chunks that are approximately equal in size, and at most one chunk is
1046        // distributed to each thread. Note that the size of the chunks is
1047        // unspecified in this case.
1048        RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
1049                       IL.getAddress(), LB.getAddress(), UB.getAddress(),
1050                       ST.getAddress());
1051        // UB = min(UB, GlobalUB);
1052        EmitIgnoredExpr(S.getEnsureUpperBound());
1053        // IV = LB;
1054        EmitIgnoredExpr(S.getInit());
1055        // while (idx <= UB) { BODY; ++idx; }
1056        EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
1057                         S.getCond(/*SeparateIter=*/false), S.getInc(),
1058                         [&S](CodeGenFunction &CGF) {
1059                           CGF.EmitOMPLoopBody(S);
1060                           CGF.EmitStopPoint(&S);
1061                         });
1062        // Tell the runtime we are done.
1063        RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
1064      } else {
1065        // Emit the outer loop, which requests its work chunk [LB..UB] from
1066        // runtime and runs the inner loop to process it.
1067        EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(),
1068                            UB.getAddress(), ST.getAddress(), IL.getAddress(),
1069                            Chunk);
1070      }
1071      // Emit final copy of the lastprivate variables if IsLastIter != 0.
1072      if (HasLastprivateClause)
1073        EmitOMPLastprivateClauseFinal(
1074            S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
1075    }
1076    // We're now done with the loop, so jump to the continuation block.
1077    EmitBranch(ContBlock);
1078    EmitBlock(ContBlock, true);
1079  }
1080  return HasLastprivateClause;
1081}
1082
1083void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
1084  LexicalScope Scope(*this, S.getSourceRange());
1085  bool HasLastprivates = false;
1086  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
1087    HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
1088  };
1089  CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
1090
1091  // Emit an implicit barrier at the end.
1092  if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) {
1093    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
1094  }
1095}
1096
1097void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
1098  llvm_unreachable("CodeGen for 'omp for simd' is not supported yet.");
1099}
1100
1101static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
1102                                const Twine &Name,
1103                                llvm::Value *Init = nullptr) {
1104  auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
1105  if (Init)
1106    CGF.EmitScalarInit(Init, LVal);
1107  return LVal;
1108}
1109
1110static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF,
1111                                        const OMPExecutableDirective &S) {
1112  auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
1113  auto *CS = dyn_cast<CompoundStmt>(Stmt);
1114  if (CS && CS->size() > 1) {
1115    auto &&CodeGen = [&S, CS](CodeGenFunction &CGF) {
1116      auto &C = CGF.CGM.getContext();
1117      auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1118      // Emit helper vars inits.
1119      LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
1120                                    CGF.Builder.getInt32(0));
1121      auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1);
1122      LValue UB =
1123          createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
1124      LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
1125                                    CGF.Builder.getInt32(1));
1126      LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
1127                                    CGF.Builder.getInt32(0));
1128      // Loop counter.
1129      LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
1130      OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
1131      CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
1132      OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
1133      CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
1134      // Generate condition for loop.
1135      BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
1136                          OK_Ordinary, S.getLocStart(),
1137                          /*fpContractable=*/false);
1138      // Increment for loop counter.
1139      UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue,
1140                        OK_Ordinary, S.getLocStart());
1141      auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) {
1142        // Iterate through all sections and emit a switch construct:
1143        // switch (IV) {
1144        //   case 0:
1145        //     <SectionStmt[0]>;
1146        //     break;
1147        // ...
1148        //   case <NumSection> - 1:
1149        //     <SectionStmt[<NumSection> - 1]>;
1150        //     break;
1151        // }
1152        // .omp.sections.exit:
1153        auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
1154        auto *SwitchStmt = CGF.Builder.CreateSwitch(
1155            CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
1156            CS->size());
1157        unsigned CaseNumber = 0;
1158        for (auto C = CS->children(); C; ++C, ++CaseNumber) {
1159          auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
1160          CGF.EmitBlock(CaseBB);
1161          SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
1162          CGF.EmitStmt(*C);
1163          CGF.EmitBranch(ExitBB);
1164        }
1165        CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
1166      };
1167      // Emit static non-chunked loop.
1168      CGF.CGM.getOpenMPRuntime().emitForInit(
1169          CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
1170          /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
1171          ST.getAddress());
1172      // UB = min(UB, GlobalUB);
1173      auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
1174      auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
1175          CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
1176      CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
1177      // IV = LB;
1178      CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
1179      // while (idx <= UB) { BODY; ++idx; }
1180      CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen);
1181      // Tell the runtime we are done.
1182      CGF.CGM.getOpenMPRuntime().emitForFinish(CGF, S.getLocStart(),
1183                                               OMPC_SCHEDULE_static);
1184    };
1185
1186    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, CodeGen);
1187    return OMPD_sections;
1188  }
1189  // If only one section is found - no need to generate loop, emit as a single
1190  // region.
1191  auto &&CodeGen = [Stmt](CodeGenFunction &CGF) {
1192    CGF.EmitStmt(Stmt);
1193    CGF.EnsureInsertPoint();
1194  };
1195  CGF.CGM.getOpenMPRuntime().emitSingleRegion(CGF, CodeGen, S.getLocStart(),
1196                                              llvm::None, llvm::None,
1197                                              llvm::None, llvm::None);
1198  return OMPD_single;
1199}
1200
1201void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
1202  LexicalScope Scope(*this, S.getSourceRange());
1203  OpenMPDirectiveKind EmittedAs = emitSections(*this, S);
1204  // Emit an implicit barrier at the end.
1205  if (!S.getSingleClause(OMPC_nowait)) {
1206    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs);
1207  }
1208}
1209
1210void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
1211  LexicalScope Scope(*this, S.getSourceRange());
1212  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1213    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1214    CGF.EnsureInsertPoint();
1215  };
1216  CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
1217}
1218
1219void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
1220  llvm::SmallVector<const Expr *, 8> CopyprivateVars;
1221  llvm::SmallVector<const Expr *, 8> DestExprs;
1222  llvm::SmallVector<const Expr *, 8> SrcExprs;
1223  llvm::SmallVector<const Expr *, 8> AssignmentOps;
1224  // Check if there are any 'copyprivate' clauses associated with this
1225  // 'single'
1226  // construct.
1227  auto CopyprivateFilter = [](const OMPClause *C) -> bool {
1228    return C->getClauseKind() == OMPC_copyprivate;
1229  };
1230  // Build a list of copyprivate variables along with helper expressions
1231  // (<source>, <destination>, <destination>=<source> expressions)
1232  typedef OMPExecutableDirective::filtered_clause_iterator<decltype(
1233      CopyprivateFilter)> CopyprivateIter;
1234  for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) {
1235    auto *C = cast<OMPCopyprivateClause>(*I);
1236    CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
1237    DestExprs.append(C->destination_exprs().begin(),
1238                     C->destination_exprs().end());
1239    SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
1240    AssignmentOps.append(C->assignment_ops().begin(),
1241                         C->assignment_ops().end());
1242  }
1243  LexicalScope Scope(*this, S.getSourceRange());
1244  // Emit code for 'single' region along with 'copyprivate' clauses
1245  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1246    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1247    CGF.EnsureInsertPoint();
1248  };
1249  CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
1250                                          CopyprivateVars, DestExprs, SrcExprs,
1251                                          AssignmentOps);
1252  // Emit an implicit barrier at the end.
1253  if (!S.getSingleClause(OMPC_nowait)) {
1254    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single);
1255  }
1256}
1257
1258void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
1259  LexicalScope Scope(*this, S.getSourceRange());
1260  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1261    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1262    CGF.EnsureInsertPoint();
1263  };
1264  CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
1265}
1266
1267void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
1268  LexicalScope Scope(*this, S.getSourceRange());
1269  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1270    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1271    CGF.EnsureInsertPoint();
1272  };
1273  CGM.getOpenMPRuntime().emitCriticalRegion(
1274      *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart());
1275}
1276
1277void CodeGenFunction::EmitOMPParallelForDirective(
1278    const OMPParallelForDirective &S) {
1279  // Emit directive as a combined directive that consists of two implicit
1280  // directives: 'parallel' with 'for' directive.
1281  LexicalScope Scope(*this, S.getSourceRange());
1282  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1283    CGF.EmitOMPWorksharingLoop(S);
1284    // Emit implicit barrier at the end of parallel region, but this barrier
1285    // is at the end of 'for' directive, so emit it as the implicit barrier for
1286    // this 'for' directive.
1287    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
1288                                               OMPD_parallel);
1289  };
1290  emitCommonOMPParallelDirective(*this, S, CodeGen);
1291}
1292
1293void CodeGenFunction::EmitOMPParallelForSimdDirective(
1294    const OMPParallelForSimdDirective &) {
1295  llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet.");
1296}
1297
1298void CodeGenFunction::EmitOMPParallelSectionsDirective(
1299    const OMPParallelSectionsDirective &S) {
1300  // Emit directive as a combined directive that consists of two implicit
1301  // directives: 'parallel' with 'sections' directive.
1302  LexicalScope Scope(*this, S.getSourceRange());
1303  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1304    (void)emitSections(CGF, S);
1305    // Emit implicit barrier at the end of parallel region.
1306    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
1307                                               OMPD_parallel);
1308  };
1309  emitCommonOMPParallelDirective(*this, S, CodeGen);
1310}
1311
1312void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
1313  // Emit outlined function for task construct.
1314  LexicalScope Scope(*this, S.getSourceRange());
1315  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
1316  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
1317  auto *I = CS->getCapturedDecl()->param_begin();
1318  auto *PartId = std::next(I);
1319  // The first function argument for tasks is a thread id, the second one is a
1320  // part id (0 for tied tasks, >=0 for untied task).
1321  auto &&CodeGen = [PartId, &S](CodeGenFunction &CGF) {
1322    if (*PartId) {
1323      // TODO: emit code for untied tasks.
1324    }
1325    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1326  };
1327  auto OutlinedFn =
1328      CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen);
1329  // Check if we should emit tied or untied task.
1330  bool Tied = !S.getSingleClause(OMPC_untied);
1331  // Check if the task is final
1332  llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
1333  if (auto *Clause = S.getSingleClause(OMPC_final)) {
1334    // If the condition constant folds and can be elided, try to avoid emitting
1335    // the condition and the dead arm of the if/else.
1336    auto *Cond = cast<OMPFinalClause>(Clause)->getCondition();
1337    bool CondConstant;
1338    if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
1339      Final.setInt(CondConstant);
1340    else
1341      Final.setPointer(EvaluateExprAsBool(Cond));
1342  } else {
1343    // By default the task is not final.
1344    Final.setInt(/*IntVal=*/false);
1345  }
1346  auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
1347  CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final,
1348                                      OutlinedFn, SharedsTy, CapturedStruct);
1349}
1350
1351void CodeGenFunction::EmitOMPTaskyieldDirective(
1352    const OMPTaskyieldDirective &S) {
1353  CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
1354}
1355
1356void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
1357  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
1358}
1359
1360void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
1361  llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet.");
1362}
1363
1364void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
1365  CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
1366    if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
1367      auto FlushClause = cast<OMPFlushClause>(C);
1368      return llvm::makeArrayRef(FlushClause->varlist_begin(),
1369                                FlushClause->varlist_end());
1370    }
1371    return llvm::None;
1372  }(), S.getLocStart());
1373}
1374
1375void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
1376  llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
1377}
1378
1379static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
1380                                         QualType SrcType, QualType DestType) {
1381  assert(CGF.hasScalarEvaluationKind(DestType) &&
1382         "DestType must have scalar evaluation kind.");
1383  assert(!Val.isAggregate() && "Must be a scalar or complex.");
1384  return Val.isScalar()
1385             ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType)
1386             : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
1387                                                 DestType);
1388}
1389
1390static CodeGenFunction::ComplexPairTy
1391convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
1392                      QualType DestType) {
1393  assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
1394         "DestType must have complex evaluation kind.");
1395  CodeGenFunction::ComplexPairTy ComplexVal;
1396  if (Val.isScalar()) {
1397    // Convert the input element to the element type of the complex.
1398    auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
1399    auto ScalarVal =
1400        CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType);
1401    ComplexVal = CodeGenFunction::ComplexPairTy(
1402        ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
1403  } else {
1404    assert(Val.isComplex() && "Must be a scalar or complex.");
1405    auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
1406    auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
1407    ComplexVal.first = CGF.EmitScalarConversion(
1408        Val.getComplexVal().first, SrcElementType, DestElementType);
1409    ComplexVal.second = CGF.EmitScalarConversion(
1410        Val.getComplexVal().second, SrcElementType, DestElementType);
1411  }
1412  return ComplexVal;
1413}
1414
1415static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
1416                                  const Expr *X, const Expr *V,
1417                                  SourceLocation Loc) {
1418  // v = x;
1419  assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
1420  assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
1421  LValue XLValue = CGF.EmitLValue(X);
1422  LValue VLValue = CGF.EmitLValue(V);
1423  RValue Res = XLValue.isGlobalReg()
1424                   ? CGF.EmitLoadOfLValue(XLValue, Loc)
1425                   : CGF.EmitAtomicLoad(XLValue, Loc,
1426                                        IsSeqCst ? llvm::SequentiallyConsistent
1427                                                 : llvm::Monotonic,
1428                                        XLValue.isVolatile());
1429  // OpenMP, 2.12.6, atomic Construct
1430  // Any atomic construct with a seq_cst clause forces the atomically
1431  // performed operation to include an implicit flush operation without a
1432  // list.
1433  if (IsSeqCst)
1434    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1435  switch (CGF.getEvaluationKind(V->getType())) {
1436  case TEK_Scalar:
1437    CGF.EmitStoreOfScalar(
1438        convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue);
1439    break;
1440  case TEK_Complex:
1441    CGF.EmitStoreOfComplex(
1442        convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue,
1443        /*isInit=*/false);
1444    break;
1445  case TEK_Aggregate:
1446    llvm_unreachable("Must be a scalar or complex.");
1447  }
1448}
1449
1450static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
1451                                   const Expr *X, const Expr *E,
1452                                   SourceLocation Loc) {
1453  // x = expr;
1454  assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
1455  LValue XLValue = CGF.EmitLValue(X);
1456  RValue ExprRValue = CGF.EmitAnyExpr(E);
1457  if (XLValue.isGlobalReg())
1458    CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue);
1459  else
1460    CGF.EmitAtomicStore(ExprRValue, XLValue,
1461                        IsSeqCst ? llvm::SequentiallyConsistent
1462                                 : llvm::Monotonic,
1463                        XLValue.isVolatile(), /*IsInit=*/false);
1464  // OpenMP, 2.12.6, atomic Construct
1465  // Any atomic construct with a seq_cst clause forces the atomically
1466  // performed operation to include an implicit flush operation without a
1467  // list.
1468  if (IsSeqCst)
1469    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1470}
1471
1472bool emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update,
1473                      BinaryOperatorKind BO, llvm::AtomicOrdering AO,
1474                      bool IsXLHSInRHSPart) {
1475  auto &Context = CGF.CGM.getContext();
1476  // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
1477  // expression is simple and atomic is allowed for the given type for the
1478  // target platform.
1479  if (BO == BO_Comma || !Update.isScalar() ||
1480      !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
1481      (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
1482       (Update.getScalarVal()->getType() !=
1483        X.getAddress()->getType()->getPointerElementType())) ||
1484      !Context.getTargetInfo().hasBuiltinAtomic(
1485          Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
1486    return false;
1487
1488  llvm::AtomicRMWInst::BinOp RMWOp;
1489  switch (BO) {
1490  case BO_Add:
1491    RMWOp = llvm::AtomicRMWInst::Add;
1492    break;
1493  case BO_Sub:
1494    if (!IsXLHSInRHSPart)
1495      return false;
1496    RMWOp = llvm::AtomicRMWInst::Sub;
1497    break;
1498  case BO_And:
1499    RMWOp = llvm::AtomicRMWInst::And;
1500    break;
1501  case BO_Or:
1502    RMWOp = llvm::AtomicRMWInst::Or;
1503    break;
1504  case BO_Xor:
1505    RMWOp = llvm::AtomicRMWInst::Xor;
1506    break;
1507  case BO_LT:
1508    RMWOp = X.getType()->hasSignedIntegerRepresentation()
1509                ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
1510                                   : llvm::AtomicRMWInst::Max)
1511                : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
1512                                   : llvm::AtomicRMWInst::UMax);
1513    break;
1514  case BO_GT:
1515    RMWOp = X.getType()->hasSignedIntegerRepresentation()
1516                ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
1517                                   : llvm::AtomicRMWInst::Min)
1518                : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
1519                                   : llvm::AtomicRMWInst::UMin);
1520    break;
1521  case BO_Mul:
1522  case BO_Div:
1523  case BO_Rem:
1524  case BO_Shl:
1525  case BO_Shr:
1526  case BO_LAnd:
1527  case BO_LOr:
1528    return false;
1529  case BO_PtrMemD:
1530  case BO_PtrMemI:
1531  case BO_LE:
1532  case BO_GE:
1533  case BO_EQ:
1534  case BO_NE:
1535  case BO_Assign:
1536  case BO_AddAssign:
1537  case BO_SubAssign:
1538  case BO_AndAssign:
1539  case BO_OrAssign:
1540  case BO_XorAssign:
1541  case BO_MulAssign:
1542  case BO_DivAssign:
1543  case BO_RemAssign:
1544  case BO_ShlAssign:
1545  case BO_ShrAssign:
1546  case BO_Comma:
1547    llvm_unreachable("Unsupported atomic update operation");
1548  }
1549  auto *UpdateVal = Update.getScalarVal();
1550  if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
1551    UpdateVal = CGF.Builder.CreateIntCast(
1552        IC, X.getAddress()->getType()->getPointerElementType(),
1553        X.getType()->hasSignedIntegerRepresentation());
1554  }
1555  CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
1556  return true;
1557}
1558
1559void CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
1560    LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
1561    llvm::AtomicOrdering AO, SourceLocation Loc,
1562    const llvm::function_ref<RValue(RValue)> &CommonGen) {
1563  // Update expressions are allowed to have the following forms:
1564  // x binop= expr; -> xrval + expr;
1565  // x++, ++x -> xrval + 1;
1566  // x--, --x -> xrval - 1;
1567  // x = x binop expr; -> xrval binop expr
1568  // x = expr Op x; - > expr binop xrval;
1569  if (!emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart)) {
1570    if (X.isGlobalReg()) {
1571      // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
1572      // 'xrval'.
1573      EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
1574    } else {
1575      // Perform compare-and-swap procedure.
1576      EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
1577    }
1578  }
1579}
1580
1581static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
1582                                    const Expr *X, const Expr *E,
1583                                    const Expr *UE, bool IsXLHSInRHSPart,
1584                                    SourceLocation Loc) {
1585  assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
1586         "Update expr in 'atomic update' must be a binary operator.");
1587  auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
1588  // Update expressions are allowed to have the following forms:
1589  // x binop= expr; -> xrval + expr;
1590  // x++, ++x -> xrval + 1;
1591  // x--, --x -> xrval - 1;
1592  // x = x binop expr; -> xrval binop expr
1593  // x = expr Op x; - > expr binop xrval;
1594  assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
1595  LValue XLValue = CGF.EmitLValue(X);
1596  RValue ExprRValue = CGF.EmitAnyExpr(E);
1597  auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
1598  auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
1599  auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
1600  auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
1601  auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
1602  auto Gen =
1603      [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
1604        CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
1605        CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
1606        return CGF.EmitAnyExpr(UE);
1607      };
1608  CGF.EmitOMPAtomicSimpleUpdateExpr(XLValue, ExprRValue, BOUE->getOpcode(),
1609                                    IsXLHSInRHSPart, AO, Loc, Gen);
1610  // OpenMP, 2.12.6, atomic Construct
1611  // Any atomic construct with a seq_cst clause forces the atomically
1612  // performed operation to include an implicit flush operation without a
1613  // list.
1614  if (IsSeqCst)
1615    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
1616}
1617
1618static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
1619                              bool IsSeqCst, const Expr *X, const Expr *V,
1620                              const Expr *E, const Expr *UE,
1621                              bool IsXLHSInRHSPart, SourceLocation Loc) {
1622  switch (Kind) {
1623  case OMPC_read:
1624    EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
1625    break;
1626  case OMPC_write:
1627    EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
1628    break;
1629  case OMPC_unknown:
1630  case OMPC_update:
1631    EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
1632    break;
1633  case OMPC_capture:
1634    llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
1635  case OMPC_if:
1636  case OMPC_final:
1637  case OMPC_num_threads:
1638  case OMPC_private:
1639  case OMPC_firstprivate:
1640  case OMPC_lastprivate:
1641  case OMPC_reduction:
1642  case OMPC_safelen:
1643  case OMPC_collapse:
1644  case OMPC_default:
1645  case OMPC_seq_cst:
1646  case OMPC_shared:
1647  case OMPC_linear:
1648  case OMPC_aligned:
1649  case OMPC_copyin:
1650  case OMPC_copyprivate:
1651  case OMPC_flush:
1652  case OMPC_proc_bind:
1653  case OMPC_schedule:
1654  case OMPC_ordered:
1655  case OMPC_nowait:
1656  case OMPC_untied:
1657  case OMPC_threadprivate:
1658  case OMPC_mergeable:
1659    llvm_unreachable("Clause is not allowed in 'omp atomic'.");
1660  }
1661}
1662
1663void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
1664  bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
1665  OpenMPClauseKind Kind = OMPC_unknown;
1666  for (auto *C : S.clauses()) {
1667    // Find first clause (skip seq_cst clause, if it is first).
1668    if (C->getClauseKind() != OMPC_seq_cst) {
1669      Kind = C->getClauseKind();
1670      break;
1671    }
1672  }
1673
1674  const auto *CS =
1675      S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
1676  if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS))
1677    enterFullExpression(EWC);
1678
1679  LexicalScope Scope(*this, S.getSourceRange());
1680  auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) {
1681    EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
1682                      S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart());
1683  };
1684  CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
1685}
1686
1687void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
1688  llvm_unreachable("CodeGen for 'omp target' is not supported yet.");
1689}
1690
1691void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
1692  llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
1693}
1694
1695