diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index daac174c8e0c..361682f8b797 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -13,6 +13,7 @@
 
 #include "CodeGenFunction.h"
 #include "CGCall.h"
+#include "CGRecordLayout.h"
 #include "CodeGenModule.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
@@ -36,34 +37,69 @@ namespace {
     CharUnits LValueAlign;
     TypeEvaluationKind EvaluationKind;
     bool UseLibcall;
+    LValue LVal;
+    CGBitFieldInfo BFI;
   public:
-    AtomicInfo(CodeGenFunction &CGF, LValue &lvalue) : CGF(CGF) {
-      assert(lvalue.isSimple());
-
-      AtomicTy = lvalue.getType();
-      ValueTy = AtomicTy->castAs<AtomicType>()->getValueType();
-      EvaluationKind = CGF.getEvaluationKind(ValueTy);
-
+    AtomicInfo(CodeGenFunction &CGF, LValue &lvalue)
+        : CGF(CGF), AtomicSizeInBits(0), ValueSizeInBits(0), UseLibcall(true) {
+      assert(!lvalue.isGlobalReg());
       ASTContext &C = CGF.getContext();
+      if (lvalue.isSimple()) {
+        AtomicTy = lvalue.getType();
+        if (auto *ATy = AtomicTy->getAs<AtomicType>())
+          ValueTy = ATy->getValueType();
+        else
+          ValueTy = AtomicTy;
+        EvaluationKind = CGF.getEvaluationKind(ValueTy);
 
-      uint64_t ValueAlignInBits;
-      uint64_t AtomicAlignInBits;
-      TypeInfo ValueTI = C.getTypeInfo(ValueTy);
-      ValueSizeInBits = ValueTI.Width;
-      ValueAlignInBits = ValueTI.Align;
+        uint64_t ValueAlignInBits;
+        uint64_t AtomicAlignInBits;
+        TypeInfo ValueTI = C.getTypeInfo(ValueTy);
+        ValueSizeInBits = ValueTI.Width;
+        ValueAlignInBits = ValueTI.Align;
 
-      TypeInfo AtomicTI = C.getTypeInfo(AtomicTy);
-      AtomicSizeInBits = AtomicTI.Width;
-      AtomicAlignInBits = AtomicTI.Align;
+        TypeInfo AtomicTI = C.getTypeInfo(AtomicTy);
+        AtomicSizeInBits = AtomicTI.Width;
+        AtomicAlignInBits = AtomicTI.Align;
 
-      assert(ValueSizeInBits <= AtomicSizeInBits);
-      assert(ValueAlignInBits <= AtomicAlignInBits);
+        assert(ValueSizeInBits <= AtomicSizeInBits);
+        assert(ValueAlignInBits <= AtomicAlignInBits);
 
-      AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits);
-      ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits);
-      if (lvalue.getAlignment().isZero())
-        lvalue.setAlignment(AtomicAlign);
+        AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits);
+        ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits);
+        if (lvalue.getAlignment().isZero())
+          lvalue.setAlignment(AtomicAlign);
 
+        LVal = lvalue;
+      } else if (lvalue.isBitField()) {
+        auto &OrigBFI = lvalue.getBitFieldInfo();
+        auto Offset = OrigBFI.Offset % C.toBits(lvalue.getAlignment());
+        AtomicSizeInBits = C.toBits(
+            C.toCharUnitsFromBits(Offset + OrigBFI.Size + C.getCharWidth() - 1)
+                .RoundUpToAlignment(lvalue.getAlignment()));
+        auto VoidPtrAddr = CGF.EmitCastToVoidPtr(lvalue.getBitFieldAddr());
+        auto OffsetInChars =
+            (C.toCharUnitsFromBits(OrigBFI.Offset) / lvalue.getAlignment()) *
+            lvalue.getAlignment();
+        VoidPtrAddr = CGF.Builder.CreateConstGEP1_64(
+            VoidPtrAddr, OffsetInChars.getQuantity());
+        auto Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            VoidPtrAddr,
+            CGF.Builder.getIntNTy(AtomicSizeInBits)->getPointerTo(),
+            "atomic_bitfield_base");
+        BFI = OrigBFI;
+        BFI.Offset = Offset;
+        BFI.StorageSize = AtomicSizeInBits;
+        LVal = LValue::MakeBitfield(Addr, BFI, lvalue.getType(),
+                                    lvalue.getAlignment());
+      } else if (lvalue.isVectorElt()) {
+        AtomicSizeInBits = C.getTypeSize(lvalue.getType());
+        LVal = lvalue;
+      } else {
+        assert(lvalue.isExtVectorElt());
+        AtomicSizeInBits = C.getTypeSize(lvalue.getType());
+        LVal = lvalue;
+      }
       UseLibcall = !C.getTargetInfo().hasBuiltinAtomic(
           AtomicSizeInBits, C.toBits(lvalue.getAlignment()));
     }
@@ -76,6 +112,7 @@ namespace {
     uint64_t getValueSizeInBits() const { return ValueSizeInBits; }
     TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
     bool shouldUseLibcall() const { return UseLibcall; }
+    const LValue &getAtomicLValue() const { return LVal; }
 
     /// Is the atomic size larger than the underlying value type?
     ///
@@ -87,7 +124,7 @@ namespace {
       return (ValueSizeInBits != AtomicSizeInBits);
     }
 
-    bool emitMemSetZeroIfNecessary(LValue dest) const;
+    bool emitMemSetZeroIfNecessary() const;
 
     llvm::Value *getAtomicSizeValue() const {
       CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits);
@@ -110,16 +147,17 @@ namespace {
                              SourceLocation Loc) const;
 
     /// Copy an atomic r-value into atomic-layout memory.
-    void emitCopyIntoMemory(RValue rvalue, LValue lvalue) const;
+    void emitCopyIntoMemory(RValue rvalue) const;
 
     /// Project an l-value down to the value field.
-    LValue projectValue(LValue lvalue) const {
-      llvm::Value *addr = lvalue.getAddress();
+    LValue projectValue() const {
+      assert(LVal.isSimple());
+      llvm::Value *addr = LVal.getAddress();
       if (hasPadding())
         addr = CGF.Builder.CreateStructGEP(addr, 0);
 
-      return LValue::MakeAddr(addr, getValueType(), lvalue.getAlignment(),
-                              CGF.getContext(), lvalue.getTBAAInfo());
+      return LValue::MakeAddr(addr, getValueType(), LVal.getAlignment(),
+                              CGF.getContext(), LVal.getTBAAInfo());
     }
 
     /// Materialize an atomic r-value in atomic-layout memory.
@@ -172,14 +210,15 @@ bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const {
   llvm_unreachable("bad evaluation kind");
 }
 
-bool AtomicInfo::emitMemSetZeroIfNecessary(LValue dest) const {
-  llvm::Value *addr = dest.getAddress();
+bool AtomicInfo::emitMemSetZeroIfNecessary() const {
+  assert(LVal.isSimple());
+  llvm::Value *addr = LVal.getAddress();
   if (!requiresMemSetZero(addr->getType()->getPointerElementType()))
     return false;
 
   CGF.Builder.CreateMemSet(addr, llvm::ConstantInt::get(CGF.Int8Ty, 0),
                            AtomicSizeInBits / 8,
-                           dest.getAlignment().getQuantity());
+                           LVal.getAlignment().getQuantity());
   return true;
 }
 
@@ -902,21 +941,34 @@ llvm::Value *AtomicInfo::emitCastToAtomicIntPointer(llvm::Value *addr) const {
 RValue AtomicInfo::convertTempToRValue(llvm::Value *addr,
                                        AggValueSlot resultSlot,
                                        SourceLocation loc) const {
-  if (EvaluationKind == TEK_Aggregate)
-    return resultSlot.asRValue();
+  if (LVal.isSimple()) {
+    if (EvaluationKind == TEK_Aggregate)
+      return resultSlot.asRValue();
 
-  // Drill into the padding structure if we have one.
-  if (hasPadding())
-    addr = CGF.Builder.CreateStructGEP(addr, 0);
+    // Drill into the padding structure if we have one.
+    if (hasPadding())
+      addr = CGF.Builder.CreateStructGEP(addr, 0);
 
-  // Otherwise, just convert the temporary to an r-value using the
-  // normal conversion routine.
-  return CGF.convertTempToRValue(addr, getValueType(), loc);
+    // Otherwise, just convert the temporary to an r-value using the
+    // normal conversion routine.
+    return CGF.convertTempToRValue(addr, getValueType(), loc);
+  } else if (LVal.isBitField())
+    return CGF.EmitLoadOfBitfieldLValue(LValue::MakeBitfield(
+        addr, LVal.getBitFieldInfo(), LVal.getType(), LVal.getAlignment()));
+  else if (LVal.isVectorElt())
+    return CGF.EmitLoadOfLValue(LValue::MakeVectorElt(addr, LVal.getVectorIdx(),
+                                                      LVal.getType(),
+                                                      LVal.getAlignment()),
+                                loc);
+  assert(LVal.isExtVectorElt());
+  return CGF.EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt(
+      addr, LVal.getExtVectorElts(), LVal.getType(), LVal.getAlignment()));
 }
 
 RValue AtomicInfo::convertIntToValue(llvm::Value *IntVal,
                                      AggValueSlot ResultSlot,
                                      SourceLocation Loc) const {
+  assert(LVal.isSimple());
   // Try not to in some easy cases.
   assert(IntVal->getType()->isIntegerTy() && "Expected integer value");
   if (getEvaluationKind() == TEK_Scalar && !hasPadding()) {
@@ -958,25 +1010,43 @@ RValue AtomicInfo::convertIntToValue(llvm::Value *IntVal,
 RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc,
                                        AggValueSlot resultSlot) {
   AtomicInfo atomics(*this, src);
+  LValue LVal = atomics.getAtomicLValue();
+  llvm::Value *SrcAddr = nullptr;
+  llvm::AllocaInst *NonSimpleTempAlloca = nullptr;
+  if (LVal.isSimple())
+    SrcAddr = LVal.getAddress();
+  else {
+    if (LVal.isBitField())
+      SrcAddr = LVal.getBitFieldAddr();
+    else if (LVal.isVectorElt())
+      SrcAddr = LVal.getVectorAddr();
+    else {
+      assert(LVal.isExtVectorElt());
+      SrcAddr = LVal.getExtVectorAddr();
+    }
+    NonSimpleTempAlloca = CreateTempAlloca(
+        SrcAddr->getType()->getPointerElementType(), "atomic-load-temp");
+    NonSimpleTempAlloca->setAlignment(getContext().toBits(src.getAlignment()));
+  }
 
   // Check whether we should use a library call.
   if (atomics.shouldUseLibcall()) {
     llvm::Value *tempAddr;
-    if (!resultSlot.isIgnored()) {
-      assert(atomics.getEvaluationKind() == TEK_Aggregate);
-      tempAddr = resultSlot.getAddr();
-    } else {
-      tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
-    }
+    if (LVal.isSimple()) {
+      if (!resultSlot.isIgnored()) {
+        assert(atomics.getEvaluationKind() == TEK_Aggregate);
+        tempAddr = resultSlot.getAddr();
+      } else
+        tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
+    } else
+      tempAddr = NonSimpleTempAlloca;
 
     // void __atomic_load(size_t size, void *mem, void *return, int order);
     CallArgList args;
     args.add(RValue::get(atomics.getAtomicSizeValue()),
              getContext().getSizeType());
-    args.add(RValue::get(EmitCastToVoidPtr(src.getAddress())),
-             getContext().VoidPtrTy);
-    args.add(RValue::get(EmitCastToVoidPtr(tempAddr)),
-             getContext().VoidPtrTy);
+    args.add(RValue::get(EmitCastToVoidPtr(SrcAddr)), getContext().VoidPtrTy);
+    args.add(RValue::get(EmitCastToVoidPtr(tempAddr)), getContext().VoidPtrTy);
     args.add(RValue::get(llvm::ConstantInt::get(
                  IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)),
              getContext().IntTy);
@@ -987,7 +1057,7 @@ RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc,
   }
 
   // Okay, we're doing this natively.
-  llvm::Value *addr = atomics.emitCastToAtomicIntPointer(src.getAddress());
+  llvm::Value *addr = atomics.emitCastToAtomicIntPointer(SrcAddr);
   llvm::LoadInst *load = Builder.CreateLoad(addr, "atomic-load");
   load->setAtomic(llvm::SequentiallyConsistent);
 
@@ -1003,40 +1073,46 @@ RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc,
     return RValue::getAggregate(nullptr, false);
 
   // Okay, turn that back into the original value type.
-  return atomics.convertIntToValue(load, resultSlot, loc);
+  if (src.isSimple())
+    return atomics.convertIntToValue(load, resultSlot, loc);
+
+  auto *IntAddr = atomics.emitCastToAtomicIntPointer(NonSimpleTempAlloca);
+  Builder.CreateAlignedStore(load, IntAddr, src.getAlignment().getQuantity());
+  return atomics.convertTempToRValue(NonSimpleTempAlloca, resultSlot, loc);
 }
 
 
 
 /// Copy an r-value into memory as part of storing to an atomic type.
 /// This needs to create a bit-pattern suitable for atomic operations.
-void AtomicInfo::emitCopyIntoMemory(RValue rvalue, LValue dest) const {
+void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const {
+  assert(LVal.isSimple());
   // If we have an r-value, the rvalue should be of the atomic type,
   // which means that the caller is responsible for having zeroed
   // any padding.  Just do an aggregate copy of that type.
   if (rvalue.isAggregate()) {
-    CGF.EmitAggregateCopy(dest.getAddress(),
+    CGF.EmitAggregateCopy(LVal.getAddress(),
                           rvalue.getAggregateAddr(),
                           getAtomicType(),
                           (rvalue.isVolatileQualified()
-                           || dest.isVolatileQualified()),
-                          dest.getAlignment());
+                           || LVal.isVolatileQualified()),
+                          LVal.getAlignment());
     return;
   }
 
   // Okay, otherwise we're copying stuff.
 
   // Zero out the buffer if necessary.
-  emitMemSetZeroIfNecessary(dest);
+  emitMemSetZeroIfNecessary();
 
   // Drill past the padding if present.
-  dest = projectValue(dest);
+  LValue TempLVal = projectValue();
 
   // Okay, store the rvalue in.
   if (rvalue.isScalar()) {
-    CGF.EmitStoreOfScalar(rvalue.getScalarVal(), dest, /*init*/ true);
+    CGF.EmitStoreOfScalar(rvalue.getScalarVal(), TempLVal, /*init*/ true);
   } else {
-    CGF.EmitStoreOfComplex(rvalue.getComplexVal(), dest, /*init*/ true);
+    CGF.EmitStoreOfComplex(rvalue.getComplexVal(), TempLVal, /*init*/ true);
   }
 }
 
@@ -1051,8 +1127,10 @@ llvm::Value *AtomicInfo::materializeRValue(RValue rvalue) const {
 
   // Otherwise, make a temporary and materialize into it.
   llvm::Value *temp = CGF.CreateMemTemp(getAtomicType(), "atomic-store-temp");
-  LValue tempLV = CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment());
-  emitCopyIntoMemory(rvalue, tempLV);
+  LValue tempLV =
+      CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment());
+  AtomicInfo Atomics(CGF, tempLV);
+  Atomics.emitCopyIntoMemory(rvalue);
   return temp;
 }
 
@@ -1098,7 +1176,7 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, bool isInit) {
 
   // If this is an initialization, just put the value there normally.
   if (isInit) {
-    atomics.emitCopyIntoMemory(rvalue, dest);
+    atomics.emitCopyIntoMemory(rvalue);
     return;
   }
 
@@ -1214,13 +1292,13 @@ void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
   switch (atomics.getEvaluationKind()) {
   case TEK_Scalar: {
     llvm::Value *value = EmitScalarExpr(init);
-    atomics.emitCopyIntoMemory(RValue::get(value), dest);
+    atomics.emitCopyIntoMemory(RValue::get(value));
     return;
   }
 
   case TEK_Complex: {
     ComplexPairTy value = EmitComplexExpr(init);
-    atomics.emitCopyIntoMemory(RValue::getComplex(value), dest);
+    atomics.emitCopyIntoMemory(RValue::getComplex(value));
     return;
   }
 
@@ -1229,8 +1307,8 @@ void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
     // of atomic type.
     bool Zeroed = false;
     if (!init->getType()->isAtomicType()) {
-      Zeroed = atomics.emitMemSetZeroIfNecessary(dest);
-      dest = atomics.projectValue(dest);
+      Zeroed = atomics.emitMemSetZeroIfNecessary();
+      dest = atomics.projectValue();
     }
 
     // Evaluate the expression directly into the destination.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 78fd37ce6562..101c3e717e8a 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -691,8 +691,125 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
   llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
 }
 
-void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &) {
-  llvm_unreachable("CodeGen for 'omp atomic' is not supported yet.");
+static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
+                                         QualType SrcType, QualType DestType) {
+  assert(CGF.hasScalarEvaluationKind(DestType) &&
+         "DestType must have scalar evaluation kind.");
+  assert(!Val.isAggregate() && "Must be a scalar or complex.");
+  return Val.isScalar()
+             ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType)
+             : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
+                                                 DestType);
+}
+
+static CodeGenFunction::ComplexPairTy
+convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
+                      QualType DestType) {
+  assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
+         "DestType must have complex evaluation kind.");
+  CodeGenFunction::ComplexPairTy ComplexVal;
+  if (Val.isScalar()) {
+    // Convert the input element to the element type of the complex.
+    auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
+    auto ScalarVal =
+        CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType);
+    ComplexVal = CodeGenFunction::ComplexPairTy(
+        ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
+  } else {
+    assert(Val.isComplex() && "Must be a scalar or complex.");
+    auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
+    auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
+    ComplexVal.first = CGF.EmitScalarConversion(
+        Val.getComplexVal().first, SrcElementType, DestElementType);
+    ComplexVal.second = CGF.EmitScalarConversion(
+        Val.getComplexVal().second, SrcElementType, DestElementType);
+  }
+  return ComplexVal;
+}
+
+static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
+                                  const Expr *X, const Expr *V,
+                                  SourceLocation Loc) {
+  // v = x;
+  assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
+  assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
+  LValue XLValue = CGF.EmitLValue(X);
+  LValue VLValue = CGF.EmitLValue(V);
+  RValue Res = XLValue.isGlobalReg() ? CGF.EmitLoadOfLValue(XLValue, Loc)
+                                     : CGF.EmitAtomicLoad(XLValue, Loc);
+  // OpenMP, 2.12.6, atomic Construct
+  // Any atomic construct with a seq_cst clause forces the atomically
+  // performed operation to include an implicit flush operation without a
+  // list.
+  if (IsSeqCst)
+    CGF.CGM.getOpenMPRuntime().EmitOMPFlush(CGF, llvm::None, Loc);
+  switch (CGF.getEvaluationKind(V->getType())) {
+  case TEK_Scalar:
+    CGF.EmitStoreOfScalar(
+        convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue);
+    break;
+  case TEK_Complex:
+    CGF.EmitStoreOfComplex(
+        convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue,
+        /*isInit=*/false);
+    break;
+  case TEK_Aggregate:
+    llvm_unreachable("Must be a scalar or complex.");
+  }
+}
+
+static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
+                              bool IsSeqCst, const Expr *X, const Expr *V,
+                              const Expr *, SourceLocation Loc) {
+  switch (Kind) {
+  case OMPC_read:
+    EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
+    break;
+  case OMPC_write:
+  case OMPC_update:
+  case OMPC_capture:
+    llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
+  case OMPC_if:
+  case OMPC_final:
+  case OMPC_num_threads:
+  case OMPC_private:
+  case OMPC_firstprivate:
+  case OMPC_lastprivate:
+  case OMPC_reduction:
+  case OMPC_safelen:
+  case OMPC_collapse:
+  case OMPC_default:
+  case OMPC_seq_cst:
+  case OMPC_shared:
+  case OMPC_linear:
+  case OMPC_aligned:
+  case OMPC_copyin:
+  case OMPC_copyprivate:
+  case OMPC_flush:
+  case OMPC_proc_bind:
+  case OMPC_schedule:
+  case OMPC_ordered:
+  case OMPC_nowait:
+  case OMPC_untied:
+  case OMPC_threadprivate:
+  case OMPC_mergeable:
+  case OMPC_unknown:
+    llvm_unreachable("Clause is not allowed in 'omp atomic'.");
+  }
+}
+
+void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
+  bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
+  OpenMPClauseKind Kind = OMPC_unknown;
+  for (auto *C : S.clauses()) {
+    // Find first clause (skip seq_cst clause, if it is first).
+    if (C->getClauseKind() != OMPC_seq_cst) {
+      Kind = C->getClauseKind();
+      break;
+    }
+  }
+  EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
+                    S.getLocStart());
 }
 
 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 5624f709c9aa..d1ab98510f78 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -2716,7 +2716,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
         // and not, for instance, a pointer to a function.
         if (D.getDeclSpec().containsPlaceholderType() &&
             !FTI.hasTrailingReturnType() && chunkIndex == 0 &&
-            !S.getLangOpts().CPlusPlus14) {
+            !S.getLangOpts().CPlusPlus14 && !S.getLangOpts().MSVCCompat) {
           S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(),
                  D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto
                      ? diag::err_auto_missing_trailing_return
diff --git a/clang/test/OpenMP/atomic_read_codegen.c b/clang/test/OpenMP/atomic_read_codegen.c
new file mode 100644
index 000000000000..af3d3920b5e1
--- /dev/null
+++ b/clang/test/OpenMP/atomic_read_codegen.c
@@ -0,0 +1,333 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -x c -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+_Bool bv, bx;
+char cv, cx;
+unsigned char ucv, ucx;
+short sv, sx;
+unsigned short usv, usx;
+int iv, ix;
+unsigned int uiv, uix;
+long lv, lx;
+unsigned long ulv, ulx;
+long long llv, llx;
+unsigned long long ullv, ullx;
+float fv, fx;
+double dv, dx;
+long double ldv, ldx;
+_Complex int civ, cix;
+_Complex float cfv, cfx;
+_Complex double cdv, cdx;
+
+typedef int int4 __attribute__((__vector_size__(16)));
+int4 int4x;
+
+struct BitFields {
+  int : 32;
+  int a : 31;
+} bfx;
+
+struct BitFields_packed {
+  int : 32;
+  int a : 31;
+} __attribute__ ((__packed__)) bfx_packed;
+
+struct BitFields2 {
+  int : 31;
+  int a : 1;
+} bfx2;
+
+struct BitFields2_packed {
+  int : 31;
+  int a : 1;
+} __attribute__ ((__packed__)) bfx2_packed;
+
+struct BitFields3 {
+  int : 11;
+  int a : 14;
+} bfx3;
+
+struct BitFields3_packed {
+  int : 11;
+  int a : 14;
+} __attribute__ ((__packed__)) bfx3_packed;
+
+struct BitFields4 {
+  short : 16;
+  int a: 1;
+  long b : 7;
+} bfx4;
+
+struct BitFields4_packed {
+  short : 16;
+  int a: 1;
+  long b : 7;
+} __attribute__ ((__packed__)) bfx4_packed;
+
+typedef float float2 __attribute__((ext_vector_type(2)));
+float2 float2x;
+
+register int rix __asm__("0");
+
+int main() {
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  bv = bx;
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  cv = cx;
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  ucv = ucx;
+// CHECK: load atomic i16*
+// CHECK: store i16
+#pragma omp atomic read
+  sv = sx;
+// CHECK: load atomic i16*
+// CHECK: store i16
+#pragma omp atomic read
+  usv = usx;
+// CHECK: load atomic i32*
+// CHECK: store i32
+#pragma omp atomic read
+  iv = ix;
+// CHECK: load atomic i32*
+// CHECK: store i32
+#pragma omp atomic read
+  uiv = uix;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  lv = lx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  ulv = ulx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  llv = llx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  ullv = ullx;
+// CHECK: load atomic i32* bitcast (float*
+// CHECK: bitcast i32 {{.*}} to float
+// CHECK: store float
+#pragma omp atomic read
+  fv = fx;
+// CHECK: load atomic i64* bitcast (double*
+// CHECK: bitcast i64 {{.*}} to double
+// CHECK: store double
+#pragma omp atomic read
+  dv = dx;
+// CHECK: [[LD:%.+]] = load atomic i128* bitcast (x86_fp80*
+// CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i128*
+// CHECK: store i128 [[LD]], i128* [[BITCAST]]
+// CHECK: [[LD:%.+]] = load x86_fp80* [[LDTEMP]]
+// CHECK: store x86_fp80 [[LD]]
+#pragma omp atomic read
+  ldv = ldx;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store i32
+// CHECK: store i32
+#pragma omp atomic read
+  civ = cix;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store float
+// CHECK: store float
+#pragma omp atomic read
+  cfv = cfx;
+// CHECK: call{{.*}} void @__atomic_load(i64 16,
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store double
+// CHECK: store double
+#pragma omp atomic seq_cst read
+  cdv = cdx;
+// CHECK: load atomic i64*
+// CHECK: store i8
+#pragma omp atomic read
+  bv = ulx;
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  cv = bx;
+// CHECK: load atomic i8*
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store i8
+#pragma omp atomic read, seq_cst
+  ucv = cx;
+// CHECK: load atomic i64*
+// CHECK: store i16
+#pragma omp atomic read
+  sv = ulx;
+// CHECK: load atomic i64*
+// CHECK: store i16
+#pragma omp atomic read
+  usv = lx;
+// CHECK: load atomic i32*
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store i32
+#pragma omp atomic seq_cst, read
+  iv = uix;
+// CHECK: load atomic i32*
+// CHECK: store i32
+#pragma omp atomic read
+  uiv = ix;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store i64
+#pragma omp atomic read
+  lv = cix;
+// CHECK: load atomic i32*
+// CHECK: store i64
+#pragma omp atomic read
+  ulv = fx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  llv = dx;
+// CHECK: load atomic i128*
+// CHECK: store i64
+#pragma omp atomic read
+  ullv = ldx;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store float
+#pragma omp atomic read
+  fv = cix;
+// CHECK: load atomic i16*
+// CHECK: store double
+#pragma omp atomic read
+  dv = sx;
+// CHECK: load atomic i8*
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bx;
+// CHECK: load atomic i8*
+// CHECK: store i32
+// CHECK: store i32
+#pragma omp atomic read
+  civ = bx;
+// CHECK: load atomic i16*
+// CHECK: store float
+// CHECK: store float
+#pragma omp atomic read
+  cfv = usx;
+// CHECK: load atomic i64*
+// CHECK: store double
+// CHECK: store double
+#pragma omp atomic read
+  cdv = llx;
+// CHECK: [[I128VAL:%.+]] = load atomic i128* bitcast (<4 x i32>* @{{.+}} to i128*) seq_cst
+// CHECK: [[I128PTR:%.+]] = bitcast <4 x i32>* [[LDTEMP:%.+]] to i128*
+// CHECK: store i128 [[I128VAL]], i128* [[I128PTR]]
+// CHECK: [[LD:%.+]] = load <4 x i32>* [[LDTEMP]]
+// CHECK: extractelement <4 x i32> [[LD]]
+// CHECK: store i8
+#pragma omp atomic read
+  bv = int4x[0];
+// CHECK: [[LD:%.+]] = load atomic i32* bitcast (i8* getelementptr (i8* bitcast (%{{.+}}* @{{.+}} to i8*), i64 4) to i32*) seq_cst
+// CHECK: store i32 [[LD]], i32* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i32 [[LD]], 1
+// CHECK: ashr i32 [[SHL]], 1
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx.a;
+// CHECK: [[LDTEMP_VOID_PTR:%.+]] = bitcast i32* [[LDTEMP:%.+]] to i8*
+// CHECK: call void @__atomic_load(i64 4, i8* getelementptr (i8* bitcast (%struct.BitFields_packed* @bfx_packed to i8*), i64 4), i8* [[LDTEMP_VOID_PTR]], i32 5)
+// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i32 [[LD]], 1
+// CHECK: ashr i32 [[SHL]], 1
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx_packed.a;
+// CHECK: [[LD:%.+]] = load atomic i32* getelementptr inbounds (%struct.BitFields2* @bfx2, i32 0, i32 0) seq_cst
+// CHECK: store i32 [[LD]], i32* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]]
+// CHECK: ashr i32 [[LD]], 31
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx2.a;
+// CHECK: [[LD:%.+]] = load atomic i8* getelementptr (i8* bitcast (%struct.BitFields2_packed* @bfx2_packed to i8*), i64 3) seq_cst
+// CHECK: store i8 [[LD]], i8* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i8* [[LDTEMP]]
+// CHECK: ashr i8 [[LD]], 7
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx2_packed.a;
+// CHECK: [[LD:%.+]] = load atomic i32* getelementptr inbounds (%struct.BitFields3* @bfx3, i32 0, i32 0) seq_cst
+// CHECK: store i32 [[LD]], i32* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i32 [[LD]], 7
+// CHECK: ashr i32 [[SHL]], 18
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx3.a;
+// CHECK: [[LDTEMP_VOID_PTR:%.+]] = bitcast i24* [[LDTEMP:%.+]] to i8*
+// CHECK: call void @__atomic_load(i64 3, i8* getelementptr (i8* bitcast (%struct.BitFields3_packed* @bfx3_packed to i8*), i64 1), i8* [[LDTEMP_VOID_PTR]], i32 5)
+// CHECK: [[LD:%.+]] = load i24* [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i24 [[LD]], 7
+// CHECK: [[ASHR:%.+]] = ashr i24 [[SHL]], 10
+// CHECK: sext i24 [[ASHR]] to i32
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx3_packed.a;
+// CHECK: [[LD:%.+]] = load atomic i64* bitcast (%struct.BitFields4* @bfx4 to i64*) seq_cst
+// CHECK: store i64 [[LD]], i64* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i64* [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i64 [[LD]], 47
+// CHECK: [[ASHR:%.+]] = ashr i64 [[SHL]], 63
+// CHECK: trunc i64 [[ASHR]] to i32
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx4.a;
+// CHECK: [[LD:%.+]] = load atomic i8* getelementptr inbounds (%struct.BitFields4_packed* @bfx4_packed, i32 0, i32 0, i64 2) seq_cst
+// CHECK: store i8 [[LD]], i8* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i8* [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i8 [[LD]], 7
+// CHECK: [[ASHR:%.+]] = ashr i8 [[SHL]], 7
+// CHECK: sext i8 [[ASHR]] to i32
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx4_packed.a;
+// CHECK: [[LD:%.+]] = load atomic i64* bitcast (%struct.BitFields4* @bfx4 to i64*) seq_cst
+// CHECK: store i64 [[LD]], i64* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i64* [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i64 [[LD]], 40
+// CHECK: [[ASHR:%.+]] = ashr i64 [[SHL]], 57
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx4.b;
+// CHECK: [[LD:%.+]] = load atomic i8* getelementptr inbounds (%struct.BitFields4_packed* @bfx4_packed, i32 0, i32 0, i64 2) seq_cst
+// CHECK: store i8 [[LD]], i8* [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i8* [[LDTEMP]]
+// CHECK: [[ASHR:%.+]] = ashr i8 [[LD]], 1
+// CHECK: sext i8 [[ASHR]] to i64
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx4_packed.b;
+// CHECK: [[LD:%.+]] = load atomic i32* bitcast (<2 x float>* @{{.+}} to i32*) seq_cst
+// CHECK: [[BITCAST:%.+]] = bitcast <2 x float>* [[LDTEMP:%.+]] to i32*
+// CHECK: store i32 [[LD]], i32* [[BITCAST]]
+// CHECK: [[LD:%.+]] = load <2 x float>* [[LDTEMP]]
+// CHECK: extractelement <2 x float> [[LD]]
+// CHECK: store i64
+#pragma omp atomic read
+  ulv = float2x.x;
+// CHECK: call{{.*}} i{{[0-9]+}} @llvm.read_register
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store double
+#pragma omp atomic read seq_cst
+  dv = rix;
+  return 0;
+}
+
+#endif