[OPENMP] CodeGen for "omp atomic read [seq_cst]" directive.

"omp atomic read [seq_cst]" accepts expressions "v=x;". In this patch we perform an atomic load of "x" (using builtin atomic loading instructions or a call to "atomic_load()" for simple lvalues and "kmpc_atomic_start();load <x>;kmpc_atomic_end();" for other lvalues), convert the result of loading to type of "v" (using EmitScalarConversion() for simple types and EmitComplexToScalarConversion() for conversions from complex to scalar) and then store the result in "v". Differential Revision: http://reviews.llvm.org/D6431 llvm-svn: 226784
2024-12-03 19:32:35 +00:00 · 2015-01-22 05:29:28 +00:00 · 2015-01-22 05:29:28 +00:00 · 13c7c4930c
commit 13c7c4930c
parent a917458203
4 changed files with 596 additions and 68 deletions
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@ -13,6 +13,7 @@

 #include "CodeGenFunction.h"
 #include "CGCall.h"
+#include "CGRecordLayout.h"
 #include "CodeGenModule.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
@ -36,34 +37,69 @@ namespace {
    CharUnits LValueAlign;
    TypeEvaluationKind EvaluationKind;
    bool UseLibcall;
+    LValue LVal;
+    CGBitFieldInfo BFI;
  public:
-    AtomicInfo(CodeGenFunction &CGF, LValue &lvalue) : CGF(CGF) {
-      assert(lvalue.isSimple());
-
-      AtomicTy = lvalue.getType();
-      ValueTy = AtomicTy->castAs<AtomicType>()->getValueType();
-      EvaluationKind = CGF.getEvaluationKind(ValueTy);
-
+    AtomicInfo(CodeGenFunction &CGF, LValue &lvalue)
+        : CGF(CGF), AtomicSizeInBits(0), ValueSizeInBits(0), UseLibcall(true) {
+      assert(!lvalue.isGlobalReg());
      ASTContext &C = CGF.getContext();
+      if (lvalue.isSimple()) {
+        AtomicTy = lvalue.getType();
+        if (auto *ATy = AtomicTy->getAs<AtomicType>())
+          ValueTy = ATy->getValueType();
+        else
+          ValueTy = AtomicTy;
+        EvaluationKind = CGF.getEvaluationKind(ValueTy);

-      uint64_t ValueAlignInBits;
-      uint64_t AtomicAlignInBits;
-      TypeInfo ValueTI = C.getTypeInfo(ValueTy);
-      ValueSizeInBits = ValueTI.Width;
-      ValueAlignInBits = ValueTI.Align;
+        uint64_t ValueAlignInBits;
+        uint64_t AtomicAlignInBits;
+        TypeInfo ValueTI = C.getTypeInfo(ValueTy);
+        ValueSizeInBits = ValueTI.Width;
+        ValueAlignInBits = ValueTI.Align;

-      TypeInfo AtomicTI = C.getTypeInfo(AtomicTy);
-      AtomicSizeInBits = AtomicTI.Width;
-      AtomicAlignInBits = AtomicTI.Align;
+        TypeInfo AtomicTI = C.getTypeInfo(AtomicTy);
+        AtomicSizeInBits = AtomicTI.Width;
+        AtomicAlignInBits = AtomicTI.Align;

-      assert(ValueSizeInBits <= AtomicSizeInBits);
-      assert(ValueAlignInBits <= AtomicAlignInBits);
+        assert(ValueSizeInBits <= AtomicSizeInBits);
+        assert(ValueAlignInBits <= AtomicAlignInBits);

-      AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits);
-      ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits);
-      if (lvalue.getAlignment().isZero())
-        lvalue.setAlignment(AtomicAlign);
+        AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits);
+        ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits);
+        if (lvalue.getAlignment().isZero())
+          lvalue.setAlignment(AtomicAlign);

+        LVal = lvalue;
+      } else if (lvalue.isBitField()) {
+        auto &OrigBFI = lvalue.getBitFieldInfo();
+        auto Offset = OrigBFI.Offset % C.toBits(lvalue.getAlignment());
+        AtomicSizeInBits = C.toBits(
+            C.toCharUnitsFromBits(Offset + OrigBFI.Size + C.getCharWidth() - 1)
+                .RoundUpToAlignment(lvalue.getAlignment()));
+        auto VoidPtrAddr = CGF.EmitCastToVoidPtr(lvalue.getBitFieldAddr());
+        auto OffsetInChars =
+            (C.toCharUnitsFromBits(OrigBFI.Offset) / lvalue.getAlignment()) *
+            lvalue.getAlignment();
+        VoidPtrAddr = CGF.Builder.CreateConstGEP1_64(
+            VoidPtrAddr, OffsetInChars.getQuantity());
+        auto Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            VoidPtrAddr,
+            CGF.Builder.getIntNTy(AtomicSizeInBits)->getPointerTo(),
+            "atomic_bitfield_base");
+        BFI = OrigBFI;
+        BFI.Offset = Offset;
+        BFI.StorageSize = AtomicSizeInBits;
+        LVal = LValue::MakeBitfield(Addr, BFI, lvalue.getType(),
+                                    lvalue.getAlignment());
+      } else if (lvalue.isVectorElt()) {
+        AtomicSizeInBits = C.getTypeSize(lvalue.getType());
+        LVal = lvalue;
+      } else {
+        assert(lvalue.isExtVectorElt());
+        AtomicSizeInBits = C.getTypeSize(lvalue.getType());
+        LVal = lvalue;
+      }
      UseLibcall = !C.getTargetInfo().hasBuiltinAtomic(
          AtomicSizeInBits, C.toBits(lvalue.getAlignment()));
    }
@ -76,6 +112,7 @@ namespace {
    uint64_t getValueSizeInBits() const { return ValueSizeInBits; }
    TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
    bool shouldUseLibcall() const { return UseLibcall; }
+    const LValue &getAtomicLValue() const { return LVal; }

    /// Is the atomic size larger than the underlying value type?
    ///
@ -87,7 +124,7 @@ namespace {
      return (ValueSizeInBits != AtomicSizeInBits);
    }

-    bool emitMemSetZeroIfNecessary(LValue dest) const;
+    bool emitMemSetZeroIfNecessary() const;

    llvm::Value *getAtomicSizeValue() const {
      CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits);
@ -110,16 +147,17 @@ namespace {
                             SourceLocation Loc) const;

    /// Copy an atomic r-value into atomic-layout memory.
-    void emitCopyIntoMemory(RValue rvalue, LValue lvalue) const;
+    void emitCopyIntoMemory(RValue rvalue) const;

    /// Project an l-value down to the value field.
-    LValue projectValue(LValue lvalue) const {
-      llvm::Value *addr = lvalue.getAddress();
+    LValue projectValue() const {
+      assert(LVal.isSimple());
+      llvm::Value *addr = LVal.getAddress();
      if (hasPadding())
        addr = CGF.Builder.CreateStructGEP(addr, 0);

-      return LValue::MakeAddr(addr, getValueType(), lvalue.getAlignment(),
-                              CGF.getContext(), lvalue.getTBAAInfo());
+      return LValue::MakeAddr(addr, getValueType(), LVal.getAlignment(),
+                              CGF.getContext(), LVal.getTBAAInfo());
    }

    /// Materialize an atomic r-value in atomic-layout memory.
@ -172,14 +210,15 @@ bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const {
  llvm_unreachable("bad evaluation kind");
 }

-bool AtomicInfo::emitMemSetZeroIfNecessary(LValue dest) const {
-  llvm::Value *addr = dest.getAddress();
+bool AtomicInfo::emitMemSetZeroIfNecessary() const {
+  assert(LVal.isSimple());
+  llvm::Value *addr = LVal.getAddress();
  if (!requiresMemSetZero(addr->getType()->getPointerElementType()))
    return false;

  CGF.Builder.CreateMemSet(addr, llvm::ConstantInt::get(CGF.Int8Ty, 0),
                           AtomicSizeInBits / 8,
-                           dest.getAlignment().getQuantity());
+                           LVal.getAlignment().getQuantity());
  return true;
 }

@ -902,21 +941,34 @@ llvm::Value *AtomicInfo::emitCastToAtomicIntPointer(llvm::Value *addr) const {
 RValue AtomicInfo::convertTempToRValue(llvm::Value *addr,
                                       AggValueSlot resultSlot,
                                       SourceLocation loc) const {
-  if (EvaluationKind == TEK_Aggregate)
-    return resultSlot.asRValue();
+  if (LVal.isSimple()) {
+    if (EvaluationKind == TEK_Aggregate)
+      return resultSlot.asRValue();

-  // Drill into the padding structure if we have one.
-  if (hasPadding())
-    addr = CGF.Builder.CreateStructGEP(addr, 0);
+    // Drill into the padding structure if we have one.
+    if (hasPadding())
+      addr = CGF.Builder.CreateStructGEP(addr, 0);

-  // Otherwise, just convert the temporary to an r-value using the
-  // normal conversion routine.
-  return CGF.convertTempToRValue(addr, getValueType(), loc);
+    // Otherwise, just convert the temporary to an r-value using the
+    // normal conversion routine.
+    return CGF.convertTempToRValue(addr, getValueType(), loc);
+  } else if (LVal.isBitField())
+    return CGF.EmitLoadOfBitfieldLValue(LValue::MakeBitfield(
+        addr, LVal.getBitFieldInfo(), LVal.getType(), LVal.getAlignment()));
+  else if (LVal.isVectorElt())
+    return CGF.EmitLoadOfLValue(LValue::MakeVectorElt(addr, LVal.getVectorIdx(),
+                                                      LVal.getType(),
+                                                      LVal.getAlignment()),
+                                loc);
+  assert(LVal.isExtVectorElt());
+  return CGF.EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt(
+      addr, LVal.getExtVectorElts(), LVal.getType(), LVal.getAlignment()));
 }

 RValue AtomicInfo::convertIntToValue(llvm::Value *IntVal,
                                     AggValueSlot ResultSlot,
                                     SourceLocation Loc) const {
+  assert(LVal.isSimple());
  // Try not to in some easy cases.
  assert(IntVal->getType()->isIntegerTy() && "Expected integer value");
  if (getEvaluationKind() == TEK_Scalar && !hasPadding()) {
@ -958,25 +1010,43 @@ RValue AtomicInfo::convertIntToValue(llvm::Value *IntVal,
 RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc,
                                       AggValueSlot resultSlot) {
  AtomicInfo atomics(*this, src);
+  LValue LVal = atomics.getAtomicLValue();
+  llvm::Value *SrcAddr = nullptr;
+  llvm::AllocaInst *NonSimpleTempAlloca = nullptr;
+  if (LVal.isSimple())
+    SrcAddr = LVal.getAddress();
+  else {
+    if (LVal.isBitField())
+      SrcAddr = LVal.getBitFieldAddr();
+    else if (LVal.isVectorElt())
+      SrcAddr = LVal.getVectorAddr();
+    else {
+      assert(LVal.isExtVectorElt());
+      SrcAddr = LVal.getExtVectorAddr();
+    }
+    NonSimpleTempAlloca = CreateTempAlloca(
+        SrcAddr->getType()->getPointerElementType(), "atomic-load-temp");
+    NonSimpleTempAlloca->setAlignment(getContext().toBits(src.getAlignment()));
+  }

  // Check whether we should use a library call.
  if (atomics.shouldUseLibcall()) {
    llvm::Value *tempAddr;
-    if (!resultSlot.isIgnored()) {
-      assert(atomics.getEvaluationKind() == TEK_Aggregate);
-      tempAddr = resultSlot.getAddr();
-    } else {
-      tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
-    }
+    if (LVal.isSimple()) {
+      if (!resultSlot.isIgnored()) {
+        assert(atomics.getEvaluationKind() == TEK_Aggregate);
+        tempAddr = resultSlot.getAddr();
+      } else
+        tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
+    } else
+      tempAddr = NonSimpleTempAlloca;

    // void __atomic_load(size_t size, void *mem, void *return, int order);
    CallArgList args;
    args.add(RValue::get(atomics.getAtomicSizeValue()),
             getContext().getSizeType());
-    args.add(RValue::get(EmitCastToVoidPtr(src.getAddress())),
-             getContext().VoidPtrTy);
-    args.add(RValue::get(EmitCastToVoidPtr(tempAddr)),
-             getContext().VoidPtrTy);
+    args.add(RValue::get(EmitCastToVoidPtr(SrcAddr)), getContext().VoidPtrTy);
+    args.add(RValue::get(EmitCastToVoidPtr(tempAddr)), getContext().VoidPtrTy);
    args.add(RValue::get(llvm::ConstantInt::get(
                 IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)),
             getContext().IntTy);
@ -987,7 +1057,7 @@ RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc,
  }

  // Okay, we're doing this natively.
-  llvm::Value *addr = atomics.emitCastToAtomicIntPointer(src.getAddress());
+  llvm::Value *addr = atomics.emitCastToAtomicIntPointer(SrcAddr);
  llvm::LoadInst *load = Builder.CreateLoad(addr, "atomic-load");
  load->setAtomic(llvm::SequentiallyConsistent);

@ -1003,40 +1073,46 @@ RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc,
    return RValue::getAggregate(nullptr, false);

  // Okay, turn that back into the original value type.
-  return atomics.convertIntToValue(load, resultSlot, loc);
+  if (src.isSimple())
+    return atomics.convertIntToValue(load, resultSlot, loc);
+
+  auto *IntAddr = atomics.emitCastToAtomicIntPointer(NonSimpleTempAlloca);
+  Builder.CreateAlignedStore(load, IntAddr, src.getAlignment().getQuantity());
+  return atomics.convertTempToRValue(NonSimpleTempAlloca, resultSlot, loc);
 }



 /// Copy an r-value into memory as part of storing to an atomic type.
 /// This needs to create a bit-pattern suitable for atomic operations.
-void AtomicInfo::emitCopyIntoMemory(RValue rvalue, LValue dest) const {
+void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const {
+  assert(LVal.isSimple());
  // If we have an r-value, the rvalue should be of the atomic type,
  // which means that the caller is responsible for having zeroed
  // any padding.  Just do an aggregate copy of that type.
  if (rvalue.isAggregate()) {
-    CGF.EmitAggregateCopy(dest.getAddress(),
+    CGF.EmitAggregateCopy(LVal.getAddress(),
                          rvalue.getAggregateAddr(),
                          getAtomicType(),
                          (rvalue.isVolatileQualified()
-                           || dest.isVolatileQualified()),
-                          dest.getAlignment());
+                           || LVal.isVolatileQualified()),
+                          LVal.getAlignment());
    return;
  }

  // Okay, otherwise we're copying stuff.

  // Zero out the buffer if necessary.
-  emitMemSetZeroIfNecessary(dest);
+  emitMemSetZeroIfNecessary();

  // Drill past the padding if present.
-  dest = projectValue(dest);
+  LValue TempLVal = projectValue();

  // Okay, store the rvalue in.
  if (rvalue.isScalar()) {
-    CGF.EmitStoreOfScalar(rvalue.getScalarVal(), dest, /*init*/ true);
+    CGF.EmitStoreOfScalar(rvalue.getScalarVal(), TempLVal, /*init*/ true);
  } else {
-    CGF.EmitStoreOfComplex(rvalue.getComplexVal(), dest, /*init*/ true);
+    CGF.EmitStoreOfComplex(rvalue.getComplexVal(), TempLVal, /*init*/ true);
  }
 }

@ -1051,8 +1127,10 @@ llvm::Value *AtomicInfo::materializeRValue(RValue rvalue) const {

  // Otherwise, make a temporary and materialize into it.
  llvm::Value *temp = CGF.CreateMemTemp(getAtomicType(), "atomic-store-temp");
-  LValue tempLV = CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment());
-  emitCopyIntoMemory(rvalue, tempLV);
+  LValue tempLV =
+      CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment());
+  AtomicInfo Atomics(CGF, tempLV);
+  Atomics.emitCopyIntoMemory(rvalue);
  return temp;
 }

@ -1098,7 +1176,7 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, bool isInit) {

  // If this is an initialization, just put the value there normally.
  if (isInit) {
-    atomics.emitCopyIntoMemory(rvalue, dest);
+    atomics.emitCopyIntoMemory(rvalue);
    return;
  }

@ -1214,13 +1292,13 @@ void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
  switch (atomics.getEvaluationKind()) {
  case TEK_Scalar: {
    llvm::Value *value = EmitScalarExpr(init);
-    atomics.emitCopyIntoMemory(RValue::get(value), dest);
+    atomics.emitCopyIntoMemory(RValue::get(value));
    return;
  }

  case TEK_Complex: {
    ComplexPairTy value = EmitComplexExpr(init);
-    atomics.emitCopyIntoMemory(RValue::getComplex(value), dest);
+    atomics.emitCopyIntoMemory(RValue::getComplex(value));
    return;
  }

@ -1229,8 +1307,8 @@ void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
    // of atomic type.
    bool Zeroed = false;
    if (!init->getType()->isAtomicType()) {
-      Zeroed = atomics.emitMemSetZeroIfNecessary(dest);
-      dest = atomics.projectValue(dest);
+      Zeroed = atomics.emitMemSetZeroIfNecessary();
+      dest = atomics.projectValue();
    }

    // Evaluate the expression directly into the destination.
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@ -691,8 +691,125 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
  llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
 }

-void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &) {
-  llvm_unreachable("CodeGen for 'omp atomic' is not supported yet.");
+static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
+                                         QualType SrcType, QualType DestType) {
+  assert(CGF.hasScalarEvaluationKind(DestType) &&
+         "DestType must have scalar evaluation kind.");
+  assert(!Val.isAggregate() && "Must be a scalar or complex.");
+  return Val.isScalar()
+             ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType)
+             : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
+                                                 DestType);
+}
+
+static CodeGenFunction::ComplexPairTy
+convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
+                      QualType DestType) {
+  assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
+         "DestType must have complex evaluation kind.");
+  CodeGenFunction::ComplexPairTy ComplexVal;
+  if (Val.isScalar()) {
+    // Convert the input element to the element type of the complex.
+    auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
+    auto ScalarVal =
+        CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType);
+    ComplexVal = CodeGenFunction::ComplexPairTy(
+        ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
+  } else {
+    assert(Val.isComplex() && "Must be a scalar or complex.");
+    auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
+    auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
+    ComplexVal.first = CGF.EmitScalarConversion(
+        Val.getComplexVal().first, SrcElementType, DestElementType);
+    ComplexVal.second = CGF.EmitScalarConversion(
+        Val.getComplexVal().second, SrcElementType, DestElementType);
+  }
+  return ComplexVal;
+}
+
+static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
+                                  const Expr *X, const Expr *V,
+                                  SourceLocation Loc) {
+  // v = x;
+  assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
+  assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
+  LValue XLValue = CGF.EmitLValue(X);
+  LValue VLValue = CGF.EmitLValue(V);
+  RValue Res = XLValue.isGlobalReg() ? CGF.EmitLoadOfLValue(XLValue, Loc)
+                                     : CGF.EmitAtomicLoad(XLValue, Loc);
+  // OpenMP, 2.12.6, atomic Construct
+  // Any atomic construct with a seq_cst clause forces the atomically
+  // performed operation to include an implicit flush operation without a
+  // list.
+  if (IsSeqCst)
+    CGF.CGM.getOpenMPRuntime().EmitOMPFlush(CGF, llvm::None, Loc);
+  switch (CGF.getEvaluationKind(V->getType())) {
+  case TEK_Scalar:
+    CGF.EmitStoreOfScalar(
+        convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue);
+    break;
+  case TEK_Complex:
+    CGF.EmitStoreOfComplex(
+        convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue,
+        /*isInit=*/false);
+    break;
+  case TEK_Aggregate:
+    llvm_unreachable("Must be a scalar or complex.");
+  }
+}
+
+static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
+                              bool IsSeqCst, const Expr *X, const Expr *V,
+                              const Expr *, SourceLocation Loc) {
+  switch (Kind) {
+  case OMPC_read:
+    EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
+    break;
+  case OMPC_write:
+  case OMPC_update:
+  case OMPC_capture:
+    llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
+  case OMPC_if:
+  case OMPC_final:
+  case OMPC_num_threads:
+  case OMPC_private:
+  case OMPC_firstprivate:
+  case OMPC_lastprivate:
+  case OMPC_reduction:
+  case OMPC_safelen:
+  case OMPC_collapse:
+  case OMPC_default:
+  case OMPC_seq_cst:
+  case OMPC_shared:
+  case OMPC_linear:
+  case OMPC_aligned:
+  case OMPC_copyin:
+  case OMPC_copyprivate:
+  case OMPC_flush:
+  case OMPC_proc_bind:
+  case OMPC_schedule:
+  case OMPC_ordered:
+  case OMPC_nowait:
+  case OMPC_untied:
+  case OMPC_threadprivate:
+  case OMPC_mergeable:
+  case OMPC_unknown:
+    llvm_unreachable("Clause is not allowed in 'omp atomic'.");
+  }
+}
+
+void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
+  bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
+  OpenMPClauseKind Kind = OMPC_unknown;
+  for (auto *C : S.clauses()) {
+    // Find first clause (skip seq_cst clause, if it is first).
+    if (C->getClauseKind() != OMPC_seq_cst) {
+      Kind = C->getClauseKind();
+      break;
+    }
+  }
+  EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
+                    S.getLocStart());
 }

 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@ -2716,7 +2716,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
        // and not, for instance, a pointer to a function.
        if (D.getDeclSpec().containsPlaceholderType() &&
            !FTI.hasTrailingReturnType() && chunkIndex == 0 &&
-            !S.getLangOpts().CPlusPlus14) {
+            !S.getLangOpts().CPlusPlus14 && !S.getLangOpts().MSVCCompat) {
          S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(),
                 D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto
                     ? diag::err_auto_missing_trailing_return
--- a/clang/test/OpenMP/atomic_read_codegen.c
+++ b/clang/test/OpenMP/atomic_read_codegen.c
@ -0,0 +1,333 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -x c -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+_Bool bv, bx;
+char cv, cx;
+unsigned char ucv, ucx;
+short sv, sx;
+unsigned short usv, usx;
+int iv, ix;
+unsigned int uiv, uix;
+long lv, lx;
+unsigned long ulv, ulx;
+long long llv, llx;
+unsigned long long ullv, ullx;
+float fv, fx;
+double dv, dx;
+long double ldv, ldx;
+_Complex int civ, cix;
+_Complex float cfv, cfx;
+_Complex double cdv, cdx;
+
+typedef int int4 __attribute__((__vector_size__(16)));
+int4 int4x;
+
+struct BitFields {
+  int : 32;
+  int a : 31;
+} bfx;
+
+struct BitFields_packed {
+  int : 32;
+  int a : 31;
+} __attribute__ ((__packed__)) bfx_packed;
+
+struct BitFields2 {
+  int : 31;
+  int a : 1;
+} bfx2;
+
+struct BitFields2_packed {
+  int : 31;
+  int a : 1;
+} __attribute__ ((__packed__)) bfx2_packed;
+
+struct BitFields3 {
+  int : 11;
+  int a : 14;
+} bfx3;
+
+struct BitFields3_packed {
+  int : 11;
+  int a : 14;
+} __attribute__ ((__packed__)) bfx3_packed;
+
+struct BitFields4 {
+  short : 16;
+  int a: 1;
+  long b : 7;
+} bfx4;
+
+struct BitFields4_packed {
+  short : 16;
+  int a: 1;
+  long b : 7;
+} __attribute__ ((__packed__)) bfx4_packed;
+
+typedef float float2 __attribute__((ext_vector_type(2)));
+float2 float2x;
+
+register int rix __asm__("0");
+
+int main() {
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  bv = bx;
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  cv = cx;
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  ucv = ucx;
+// CHECK: load atomic i16*
+// CHECK: store i16
+#pragma omp atomic read
+  sv = sx;
+// CHECK: load atomic i16*
+// CHECK: store i16
+#pragma omp atomic read
+  usv = usx;
+// CHECK: load atomic i32*
+// CHECK: store i32
+#pragma omp atomic read
+  iv = ix;
+// CHECK: load atomic i32*
+// CHECK: store i32
+#pragma omp atomic read
+  uiv = uix;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  lv = lx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  ulv = ulx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  llv = llx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  ullv = ullx;
+// CHECK: load atomic i32* bitcast (float*
+// CHECK: bitcast i32 {{.*}} to float
+// CHECK: store float
+#pragma omp atomic read
+  fv = fx;
+// CHECK: load atomic i64* bitcast (double*
+// CHECK: bitcast i64 {{.*}} to double
+// CHECK: store double
+#pragma omp atomic read
+  dv = dx;
+// CHECK: [[LD:%.+]] = load atomic i128* bitcast (x86_fp80*
+// CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i128*
+// CHECK: store i128 [[LD]], i128* [[BITCAST]]
+// CHECK: [[LD:%.+]] = load x86_fp80* [[LDTEMP]]
+// CHECK: store x86_fp80 [[LD]]
+#pragma omp atomic read
+  ldv = ldx;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store i32
+// CHECK: store i32
+#pragma omp atomic read
+  civ = cix;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store float
+// CHECK: store float
+#pragma omp atomic read
+  cfv = cfx;
+// CHECK: call{{.*}} void @__atomic_load(i64 16,
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store double
+// CHECK: store double
+#pragma omp atomic seq_cst read
+  cdv = cdx;
+// CHECK: load atomic i64*
+// CHECK: store i8
+#pragma omp atomic read
+  bv = ulx;
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  cv = bx;
+// CHECK: load atomic i8*
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store i8
+#pragma omp atomic read, seq_cst
+  ucv = cx;
+// CHECK: load atomic i64*
+// CHECK: store i16
+#pragma omp atomic read
+  sv = ulx;
+// CHECK: load atomic i64*
+// CHECK: store i16
+#pragma omp atomic read
+  usv = lx;
+// CHECK: load atomic i32*
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store i32
+#pragma omp atomic seq_cst, read
+  iv = uix;
+// CHECK: load atomic i32*
+// CHECK: store i32
+#pragma omp atomic read
+  uiv = ix;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store i64
+#pragma omp atomic read
+  lv = cix;
+// CHECK: load atomic i32*
+// CHECK: store i64
+#pragma omp atomic read
+  ulv = fx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  llv = dx;
+// CHECK: load atomic i128*
+// CHECK: store i64
+#pragma omp atomic read
+  ullv = ldx;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store float
+#pragma omp atomic read
+  fv = cix;
+// CHECK: load atomic i16*
+// CHECK: store double
+#pragma omp atomic read
+  dv = sx;
+// CHECK: load atomic i8*
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bx;
+// CHECK: load atomic i8*
+// CHECK: store i32
+// CHECK: store i32
+#pragma omp atomic read
+  civ = bx;
+// CHECK: load atomic i16*
+// CHECK: store float
+// CHECK: store float
+#pragma omp atomic read
+  cfv = usx;
+// CHECK: load atomic i64*
+// CHECK: store double
+// CHECK: store double
+#pragma omp atomic read
+  cdv = llx;
+// CHECK: [[I128VAL:%.+]] = load atomic i128* bitcast (<4 x i32>* @{{.+}} to i128*) seq_cst
+// CHECK: [[I128PTR:%.+]] = bitcast <4 x i32>* [[LDTEMP:%.+]] to i128*
+// CHECK: store i128 [[I128VAL]], i128* [[I128PTR]]
+// CHECK: [[LD:%.+]] = load <4 x i32>* [[LDTEMP]]
+// CHECK: extractelement <4 x i32> [[LD]]
+// CHECK: store i8
+#pragma omp atomic read
+  bv = int4x[0];
+// CHECK: [[LD:%.+]] = load atomic i32* bitcast (i8* getelementptr (i8* bitcast (%{{.+}}* @{{.+}} to i8*), i64 4) to i32*) seq_cst
+// CHECK: store i32 [[LD]], i32* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i32 [[LD]], 1
+// CHECK: ashr i32 [[SHL]], 1
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx.a;
+// CHECK: [[LDTEMP_VOID_PTR:%.+]] = bitcast i32* [[LDTEMP:%.+]] to i8*
+// CHECK: call void @__atomic_load(i64 4, i8* getelementptr (i8* bitcast (%struct.BitFields_packed* @bfx_packed to i8*), i64 4), i8* [[LDTEMP_VOID_PTR]], i32 5)
+// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i32 [[LD]], 1
+// CHECK: ashr i32 [[SHL]], 1
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx_packed.a;
+// CHECK: [[LD:%.+]] = load atomic i32* getelementptr inbounds (%struct.BitFields2* @bfx2, i32 0, i32 0) seq_cst
+// CHECK: store i32 [[LD]], i32* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]]
+// CHECK: ashr i32 [[LD]], 31
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx2.a;
+// CHECK: [[LD:%.+]] = load atomic i8* getelementptr (i8* bitcast (%struct.BitFields2_packed* @bfx2_packed to i8*), i64 3) seq_cst
+// CHECK: store i8 [[LD]], i8* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i8* [[LDTEMP]]
+// CHECK: ashr i8 [[LD]], 7
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx2_packed.a;
+// CHECK: [[LD:%.+]] = load atomic i32* getelementptr inbounds (%struct.BitFields3* @bfx3, i32 0, i32 0) seq_cst
+// CHECK: store i32 [[LD]], i32* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i32 [[LD]], 7
+// CHECK: ashr i32 [[SHL]], 18
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx3.a;
+// CHECK: [[LDTEMP_VOID_PTR:%.+]] = bitcast i24* [[LDTEMP:%.+]] to i8*
+// CHECK: call void @__atomic_load(i64 3, i8* getelementptr (i8* bitcast (%struct.BitFields3_packed* @bfx3_packed to i8*), i64 1), i8* [[LDTEMP_VOID_PTR]], i32 5)
+// CHECK: [[LD:%.+]] = load i24* [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i24 [[LD]], 7
+// CHECK: [[ASHR:%.+]] = ashr i24 [[SHL]], 10
+// CHECK: sext i24 [[ASHR]] to i32
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx3_packed.a;
+// CHECK: [[LD:%.+]] = load atomic i64* bitcast (%struct.BitFields4* @bfx4 to i64*) seq_cst
+// CHECK: store i64 [[LD]], i64* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i64* [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i64 [[LD]], 47
+// CHECK: [[ASHR:%.+]] = ashr i64 [[SHL]], 63
+// CHECK: trunc i64 [[ASHR]] to i32
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx4.a;
+// CHECK: [[LD:%.+]] = load atomic i8* getelementptr inbounds (%struct.BitFields4_packed* @bfx4_packed, i32 0, i32 0, i64 2) seq_cst
+// CHECK: store i8 [[LD]], i8* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i8* [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i8 [[LD]], 7
+// CHECK: [[ASHR:%.+]] = ashr i8 [[SHL]], 7
+// CHECK: sext i8 [[ASHR]] to i32
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx4_packed.a;
+// CHECK: [[LD:%.+]] = load atomic i64* bitcast (%struct.BitFields4* @bfx4 to i64*) seq_cst
+// CHECK: store i64 [[LD]], i64* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i64* [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i64 [[LD]], 40
+// CHECK: [[ASHR:%.+]] = ashr i64 [[SHL]], 57
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx4.b;
+// CHECK: [[LD:%.+]] = load atomic i8* getelementptr inbounds (%struct.BitFields4_packed* @bfx4_packed, i32 0, i32 0, i64 2) seq_cst
+// CHECK: store i8 [[LD]], i8* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i8* [[LDTEMP]]
+// CHECK: [[ASHR:%.+]] = ashr i8 [[LD]], 1
+// CHECK: sext i8 [[ASHR]] to i64
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx4_packed.b;
+// CHECK: [[LD:%.+]] = load atomic i32* bitcast (<2 x float>* @{{.+}} to i32*) seq_cst
+// CHECK: [[BITCAST:%.+]] = bitcast <2 x float>* [[LDTEMP:%.+]] to i32*
+// CHECK: store i32 [[LD]], i32* [[BITCAST]]
+// CHECK: [[LD:%.+]] = load <2 x float>* [[LDTEMP]]
+// CHECK: extractelement <2 x float> [[LD]]
+// CHECK: store i64
+#pragma omp atomic read
+  ulv = float2x.x;
+// CHECK: call{{.*}} i{{[0-9]+}} @llvm.read_register
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store double
+#pragma omp atomic read seq_cst
+  dv = rix;
+  return 0;
+}
+
+#endif