AMDGPU: Remove denormal subtarget features

Switch to using the denormal-fp-math/denormal-fp-math-f32 attributes.
2024-12-03 08:51:43 +00:00 · 2019-11-18 16:48:07 +05:30 · 2019-11-18 16:48:07 +05:30 · 40d3e7765c
commit 40d3e7765c
parent 8bdcb1c2a9
71 changed files with 447 additions and 421 deletions
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@ -494,30 +494,6 @@ def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
 // Subtarget Features (options and debugging)
 //===------------------------------------------------------------===//

-// Denormal handling for fp64 and fp16 is controlled by the same
-// config register when fp16 supported.
-// TODO: Do we need a separate f16 setting when not legal?
-def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
-  "FP64FP16Denormals",
-  "true",
-  "Enable double and half precision denormal handling",
-  [FeatureFP64]
->;
-
-def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
-  "FP64FP16Denormals",
-  "true",
-  "Enable double and half precision denormal handling",
-  [FeatureFP64, FeatureFP64FP16Denormals]
->;
-
-def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
-  "FP64FP16Denormals",
-  "true",
-  "Enable half precision denormal handling",
-  [FeatureFP64FP16Denormals]
->;
-
 def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
  "FPExceptions",
  "true",
--- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@ -15,6 +15,7 @@
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "AMDGPUTargetMachine.h"
+#include "llvm/ADT/FloatingPointMode.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ConstantFolding.h"
@ -1387,7 +1388,9 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
  DT = DTWP ? &DTWP->getDomTree() : nullptr;

  HasUnsafeFPMath = hasUnsafeFPMath(F);
-  HasFP32Denormals = ST->hasFP32Denormals(F);
+
+  AMDGPU::SIModeRegisterDefaults Mode(F);
+  HasFP32Denormals = Mode.allFP32Denormals();

  bool MadeChange = false;

--- a/lib/Target/AMDGPU/AMDGPUFeatures.td
+++ b/lib/Target/AMDGPU/AMDGPUFeatures.td
@ -18,15 +18,6 @@ def FeatureFMA : SubtargetFeature<"fmaf",
  "Enable single precision FMA (not as fast as mul+add, but fused)"
 >;

-// Some instructions do not support denormals despite this flag. Using
-// fp32 denormals also causes instructions to run at the double
-// precision rate for the device.
-def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
-  "FP32Denormals",
-  "true",
-  "Enable single precision denormal handling"
->;
-
 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
  "localmemorysize"#Value,
  "LocalMemorySize",
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@ -402,7 +402,7 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
  }
 #endif
  Subtarget = &MF.getSubtarget<GCNSubtarget>();
-  Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction(), *Subtarget);
+  Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
  return SelectionDAGISel::runOnMachineFunction(MF);
 }

--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@ -1644,9 +1644,10 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
  const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();

  // float fr = mad(fqneg, fb, fa);
-  unsigned OpCode = MFI->getMode().allFP32Denormals() ?
-                    (unsigned)AMDGPUISD::FMAD_FTZ :
-                    (unsigned)ISD::FMAD;
+  unsigned OpCode = !MFI->getMode().allFP32Denormals() ?
+                    (unsigned)ISD::FMAD :
+                    (unsigned)AMDGPUISD::FMAD_FTZ;
+
  SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);

  // int iq = (int)fq;
@ -1729,9 +1730,10 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

    // Compute denominator reciprocal.
-    unsigned FMAD = MFI->getMode().allFP32Denormals() ?
-                    (unsigned)AMDGPUISD::FMAD_FTZ :
-                    (unsigned)ISD::FMAD;
+    unsigned FMAD = !MFI->getMode().allFP32Denormals() ?
+                    (unsigned)ISD::FMAD :
+                    (unsigned)AMDGPUISD::FMAD_FTZ;
+

    SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo);
    SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi);
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@ -807,3 +807,8 @@ def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
  [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
   (fmaxnum_oneuse node:$src0, node:$src1)]
 >;
+
+def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+  [(fmad node:$src0, node:$src1, node:$src2),
+   (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)]
+>;
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@ -18,7 +18,7 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
  LocalMemoryObjects(),
  ExplicitKernArgSize(0),
  LDSSize(0),
-  Mode(MF.getFunction(), MF.getSubtarget<GCNSubtarget>()),
+  Mode(MF.getFunction()),
  IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
  NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath),
  MemoryBound(false),
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@ -59,13 +59,6 @@ R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
  FullFS += FS;
  ParseSubtargetFeatures(GPU, FullFS);

-  // FIXME: I don't think think Evergreen has any useful support for
-  // denormals, but should be checked. Should we issue a warning somewhere
-  // if someone tries to enable these?
-  if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
-    FP32Denormals = false;
-  }
-
  HasMulU24 = getGeneration() >= EVERGREEN;
  HasMulI24 = hasCaymanISA();

@ -76,9 +69,6 @@ GCNSubtarget &
 GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
                                              StringRef GPU, StringRef FS) {
  // Determine default and user-specified characteristics
-  // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
-  // enabled, but some instructions do not respect them and they run at the
-  // double precision rate, so don't enable by default.
  //
  // We want to be able to turn these off, but making this a subtarget feature
  // for SI has the unhelpful behavior that it unsets everything else if you
@ -93,15 +83,6 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
  if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
    FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";

-  // FIXME: I don't think think Evergreen has any useful support for
-  // denormals, but should be checked. Should we issue a warning somewhere
-  // if someone tries to enable these?
-  if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-    FullFS += "+fp64-fp16-denormals,+fp32-denormals,";
-  } else {
-    FullFS += "-fp32-denormals,";
-  }
-
  FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS

  // Disable mutually exclusive bits.
@ -172,7 +153,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
  TargetTriple(TT),
  Has16BitInsts(false),
  HasMadMixInsts(false),
-  FP32Denormals(false),
  FPExceptions(false),
  HasSDWA(false),
  HasVOP3PInsts(false),
@ -200,7 +180,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
    FastFMAF32(false),
    HalfRate64Ops(false),

-    FP64FP16Denormals(false),
    FlatForGlobal(false),
    AutoWaitcntBeforeBarrier(false),
    CodeObjectV3(false),
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@ -66,7 +66,6 @@ private:
 protected:
  bool Has16BitInsts;
  bool HasMadMixInsts;
-  bool FP32Denormals;
  bool FPExceptions;
  bool HasSDWA;
  bool HasVOP3PInsts;
@ -149,15 +148,6 @@ public:
    return HasMadMixInsts;
  }

-  bool hasFP32Denormals(const Function &F) const {
-    // FIXME: This should not be a property of the subtarget. This should be a
-    // property with a default set by the calling convention which can be
-    // overridden by attributes. For now, use the subtarget feature as a
-    // placeholder attribute. The function arguments only purpose is to
-    // discourage use without a function context until this is removed.
-    return FP32Denormals;
-  }
-
  bool hasFPExceptions() const {
    return FPExceptions;
  }
@ -304,7 +294,6 @@ protected:
  bool HalfRate64Ops;

  // Dynamially set bits that enable features.
-  bool FP64FP16Denormals;
  bool FlatForGlobal;
  bool AutoWaitcntBeforeBarrier;
  bool CodeObjectV3;
@ -636,20 +625,6 @@ public:
  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
                                           const Function &) const;

-  /// Alias for hasFP64FP16Denormals
-  bool hasFP16Denormals(const Function &F) const {
-    return FP64FP16Denormals;
-  }
-
-  /// Alias for hasFP64FP16Denormals
-  bool hasFP64Denormals(const Function &F) const {
-    return FP64FP16Denormals;
-  }
-
-  bool hasFP64FP16Denormals(const Function &F) const {
-    return FP64FP16Denormals;
-  }
-
  bool supportsMinMaxDenormModes() const {
    return getGeneration() >= AMDGPUSubtarget::GFX9;
  }
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@ -941,8 +941,8 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,

  // FIXME: dx10_clamp can just take the caller setting, but there seems to be
  // no way to support merge for backend defined attributes.
-  AMDGPU::SIModeRegisterDefaults CallerMode(*Caller, *CallerST);
-  AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee, *CalleeST);
+  AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
+  AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
  return CallerMode.isInlineCompatible(CalleeMode);
 }

--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@ -133,7 +133,7 @@ public:
      TLI(ST->getTargetLowering()),
      CommonTTI(TM, F),
      IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())),
-      HasFP32Denormals(ST->hasFP32Denormals(F)) { }
+      HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()) {}

  bool hasBranchDivergence() { return true; }
  bool useGPUDivergenceAnalysis() const;
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@ -1006,7 +1006,7 @@ class MULADD_Common <bits<5> inst> : R600_3OP <

 class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
  inst, "MULADD_IEEE",
-  [(set f32:$dst, (fmad f32:$src0, f32:$src1, f32:$src2))]
+  [(set f32:$dst, (any_fmad f32:$src0, f32:$src1, f32:$src2))]
 >;

 class FMA_Common <bits<5> inst> : R600_3OP <
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@ -1333,8 +1333,7 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
  return true;
 }

-SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
-                                               const GCNSubtarget &ST) {
+SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
  *this = getDefaultForCallingConv(F.getCallingConv());

  StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
@ -1346,11 +1345,25 @@ SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
  if (!DX10ClampAttr.empty())
    DX10Clamp = DX10ClampAttr == "true";

-  // FIXME: Split this when denormal-fp-math is used
-  FP32InputDenormals = ST.hasFP32Denormals(F);
-  FP32OutputDenormals = FP32InputDenormals;
-  FP64FP16InputDenormals = ST.hasFP64FP16Denormals(F);
-  FP64FP16OutputDenormals = FP64FP16InputDenormals;
+  StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
+  if (!DenormF32Attr.empty()) {
+    DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr);
+    FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+    FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+  }
+
+  StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString();
+  if (!DenormAttr.empty()) {
+    DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
+
+    if (DenormF32Attr.empty()) {
+      FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+      FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+    }
+
+    FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+    FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+  }
 }

 namespace {
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@ -695,19 +695,13 @@ struct SIModeRegisterDefaults {
    FP64FP16InputDenormals(true),
    FP64FP16OutputDenormals(true) {}

-  // FIXME: Should not depend on the subtarget
-  SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
+  SIModeRegisterDefaults(const Function &F);

  static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
    const bool IsCompute = AMDGPU::isCompute(CC);

    SIModeRegisterDefaults Mode;
-    Mode.DX10Clamp = true;
    Mode.IEEE = IsCompute;
-    Mode.FP32InputDenormals = true;
-    Mode.FP32OutputDenormals = true;
-    Mode.FP64FP16InputDenormals = true;
-    Mode.FP64FP16OutputDenormals = true;
    return Mode;
  }

--- a/test/Analysis/CostModel/AMDGPU/fdiv.ll
+++ b/test/Analysis/CostModel/AMDGPU/fdiv.ll
@ -278,5 +278,5 @@ define amdgpu_kernel void @rcp_v2f16_f32_ftzdaz(<2 x half> addrspace(1)* %out, <
  ret void
 }

-attributes #0 = { nounwind "target-features"="+fp32-denormals" }
-attributes #1 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
--- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir
+++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir
@ -1,8 +1,8 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=tahiti  -denormal-fp-math-f32=preserve-sign -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s

 ---
 name: fmad_ftz_s32_vvvv
--- a/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s

 ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.

--- a/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s

 ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.

--- a/test/CodeGen/AMDGPU/amdgcn-ieee.ll
+++ b/test/CodeGen/AMDGPU/amdgcn-ieee.ll
@ -150,7 +150,6 @@ define amdgpu_ps void @ps_ieee_mode_default() #0 {
  ret void
 }

-; FIXME: Should have denormals off by default.
 ; GCN-LABEL: {{^}}ps_ieee_mode_on:
 ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
 ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
--- a/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
+++ b/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
@ -338,8 +338,8 @@ define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, f
 }

 attributes #0 = { nounwind optnone noinline }
-attributes #1 = { nounwind "target-features"="-fp32-denormals" }
-attributes #2 = { nounwind "target-features"="+fp32-denormals" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }

 !0 = !{float 2.500000e+00}
 !1 = !{float 5.000000e-01}
--- a/test/CodeGen/AMDGPU/clamp-modifier.ll
+++ b/test/CodeGen/AMDGPU/clamp-modifier.ll
@ -389,10 +389,10 @@ declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1

 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1

-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "target-features"="+fp32-denormals" }
-attributes #3 = { nounwind "target-features"="-fp64-fp16-denormals" }
+attributes #2 = { nounwind "denormal-fp-math-f32"="ieee.ieee" }
+attributes #3 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" }

 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!2, !3}
--- a/test/CodeGen/AMDGPU/clamp.ll
+++ b/test/CodeGen/AMDGPU/clamp.ll
@ -767,8 +767,8 @@ declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1

-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp32-denormals,-fp-exceptions" "no-nans-fp-math"="false" }
-attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="-fp32-denormals,+fp-exceptions" "no-nans-fp-math"="false" }
-attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp32-denormals,+fp-exceptions" "no-nans-fp-math"="false" }
+attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp-exceptions" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
+attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="+fp-exceptions" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
+attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="+fp-exceptions" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
--- a/test/CodeGen/AMDGPU/debug-value.ll
+++ b/test/CodeGen/AMDGPU/debug-value.ll
@ -89,7 +89,7 @@ declare float @barney() #2
 declare void @eggs(float) #2
 declare void @llvm.dbg.value(metadata, metadata, metadata) #1

-attributes #0 = { convergent nounwind "target-cpu"="gfx900" "target-features"="+fp32-denormals" }
+attributes #0 = { convergent nounwind "target-cpu"="gfx900" }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { nounwind }

--- a/test/CodeGen/AMDGPU/default-fp-mode.ll
+++ b/test/CodeGen/AMDGPU/default-fp-mode.ll
@ -82,7 +82,15 @@ define amdgpu_kernel void @test_f32_f16_f64_denormals(half addrspace(1)* %out0,
  ret void
 }

-; FIXME: Denormals should be off by default
+; GCN-LABEL: {{^}}test_just_f32_attr_flush
+; GCN: FloatMode: 192
+; GCN: IeeeMode: 1
+define amdgpu_kernel void @test_just_f32_attr_flush(float addrspace(1)* %out0, double addrspace(1)* %out1) #9 {
+  store float 0.0, float addrspace(1)* %out0
+  store double 0.0, double addrspace(1)* %out1
+  ret void
+}
+
 ; GCN-LABEL: {{^}}kill_gs_const:
 ; GCN: FloatMode: 240
 ; GCN: IeeeMode: 0
@ -110,10 +118,11 @@ declare void @llvm.amdgcn.kill(i1)

 attributes #0 = { nounwind "target-cpu"="tahiti" }
 attributes #1 = { nounwind "target-cpu"="fiji" }
-attributes #2 = { nounwind "target-features"="+fp64-denormals" }
-attributes #3 = { nounwind "target-features"="+fp32-denormals" }
-attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
-attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
-attributes #6 = { nounwind "target-features"="+fp64-fp16-denormals" }
-attributes #7 = { nounwind "target-features"="-fp64-fp16-denormals" }
-attributes #8 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
+attributes #2 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #3 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
+attributes #4 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #5 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #6 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #7 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #8 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #9 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
--- a/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
+++ b/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fast-fmaf,-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fast-fmaf,-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
+; RUN: llc -march=amdgcn -mattr=+fast-fmaf -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s
+; RUN: llc -march=amdgcn -mattr=-fast-fmaf -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s

-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fast-fmaf,+fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fast-fmaf,+fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s
+; RUN: llc -march=amdgcn -mattr=+fast-fmaf -denormal-fp-math-f32=ieee  -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s
+; RUN: llc -march=amdgcn -mattr=-fast-fmaf -denormal-fp-math-f32=ieee -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s

 ; FIXME: This should also fold when fma is actually fast if an FMA
 ; exists in the original program.
--- a/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
-; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,VI-FLUSH,GCN-FLUSH %s
-; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=+fp-exceptions -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,VI-FLUSH,GCN-FLUSH %s
+; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -denormal-fp-math-f32=ieee < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -denormal-fp-math-f32=ieee < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s

 ; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32:
 ; GCN-FLUSH:   v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
@ -907,4 +907,4 @@ declare float @llvm.amdgcn.frexp.mant.f32(float) #0

 attributes #0 = { nounwind readnone }
 attributes #1 = { "no-nans-fp-math"="true" }
-attributes #2 = { "target-features"="-fp64-fp16-denormals" }
+attributes #2 = { "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" }
--- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@ -739,6 +739,6 @@ define <4 x half> @v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half
 }

 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "target-features"="-fp32-denormals" }
-attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
-attributes #3 = { nounwind "target-features"="-fp32-denormals,+fp64-fp16-denormals" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #3 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
--- a/test/CodeGen/AMDGPU/fcanonicalize.ll
+++ b/test/CodeGen/AMDGPU/fcanonicalize.ll
@ -625,9 +625,9 @@ define <4 x double> @v_test_canonicalize_v4f64(<4 x double> %arg) #1 {
 }

 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "target-features"="-fp32-denormals" }
-attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
-attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
-attributes #4 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="tonga" }
-attributes #5 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" "target-cpu"="gfx900" }
-attributes #6 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="gfx900" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #3 = { nounwind "denormal-fp-math"="ieee,ieee" }
+attributes #4 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "target-cpu"="tonga" }
+attributes #5 = { nounwind "denormal-fp-math"="ieee,ieee" "target-cpu"="gfx900" }
+attributes #6 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "target-cpu"="gfx900" }
--- a/test/CodeGen/AMDGPU/fdiv.f16.ll
+++ b/test/CodeGen/AMDGPU/fdiv.f16.ll
@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s

 ; Make sure fdiv is promoted to f32.

@ -263,9 +263,9 @@ define amdgpu_kernel void @div_afn_neg_k_x_pat_f16(half addrspace(1)* %out) #0 {
  ret void
 }

-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare half @llvm.sqrt.f16(half) #1
-declare half @llvm.fabs.f16(half) #1
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+declare half @llvm.sqrt.f16(half) #2
+declare half @llvm.fabs.f16(half) #2

 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
--- a/test/CodeGen/AMDGPU/fdiv.ll
+++ b/test/CodeGen/AMDGPU/fdiv.ll
@ -345,9 +345,8 @@ entry:
  ret void
 }

-
-attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals,+fp64-fp16-denormals,-flat-for-global" }
-attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "target-features"="-fp32-denormals,-flat-for-global" }
-attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="+fp32-denormals,-flat-for-global" }
+attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="-flat-for-global" }
+attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="-flat-for-global" }
+attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" "target-features"="-flat-for-global" }

 !0 = !{float 2.500000e+00}
--- a/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
+++ b/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals < %s | FileCheck --check-prefixes=GCN,GCN-DENORM %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals < %s | FileCheck --check-prefixes=GCN,GCN-FLUSH %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefixes=GCN,GCN-DENORM %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefixes=GCN,GCN-FLUSH %s

 ; GCN-LABEL: {{^}}div_1_by_x_25ulp:
 ; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
--- a/test/CodeGen/AMDGPU/fdot2.ll
+++ b/test/CodeGen/AMDGPU/fdot2.ll
@ -1,10 +1,10 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX900
-; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
-; RUN: llc -march=amdgcn -mcpu=gfx1011 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
-; RUN: llc -march=amdgcn -mcpu=gfx1012 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
-; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906
-; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp64-fp16-denormals,-fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-CONTRACT
-; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+fp64-fp16-denormals,+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-DENORM-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX900
+; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
+; RUN: llc -march=amdgcn -mcpu=gfx1011 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx1012 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906
+; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-DENORM-CONTRACT
 ; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z)

 ; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions
--- a/test/CodeGen/AMDGPU/fma-combine.ll
+++ b/test/CodeGen/AMDGPU/fma-combine.ll
@ -1,6 +1,6 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=verde -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s

 ; FIXME: Remove enable-unsafe-fp-math in RUN line and add flags to IR instrs

--- a/test/CodeGen/AMDGPU/fmaxnum.ll
+++ b/test/CodeGen/AMDGPU/fmaxnum.ll
@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s

 ; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_on:
 ; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}}
@ -205,7 +205,7 @@ define amdgpu_ps float @fmax_literal_var_f32_no_ieee(float inreg %a) #0 {
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
 ; GCN-NOT: v_max_f32
-define <3 x float> @test_func_fmax_v3f32(<3 x float> %a, <3 x float> %b) nounwind {
+define <3 x float> @test_func_fmax_v3f32(<3 x float> %a, <3 x float> %b) #0 {
  %val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) #0
  ret <3 x float> %val
 }
@ -218,5 +218,5 @@ declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #1
 declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #1
 declare double @llvm.maxnum.f64(double, double)

-attributes #0 = { nounwind }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
--- a/test/CodeGen/AMDGPU/fminnum.ll
+++ b/test/CodeGen/AMDGPU/fminnum.ll
@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s

 ; GCN-LABEL: {{^}}test_fmin_f32_ieee_mode_on:
 ; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}}
@ -225,5 +225,5 @@ declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1
 declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #1
 declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #1

-attributes #0 = { nounwind }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
--- a/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
+++ b/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
@ -1,8 +1,8 @@
-; XUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-DENORM %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-FLUSH %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-DENORM %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-FLUSH %s
+; XUN: llc -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-DENORM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-FLUSH %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-DENORM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-FLUSH %s

 ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
 ; make add an instruction if the fadd has more than one use.
--- a/test/CodeGen/AMDGPU/fmuladd.f16.ll
+++ b/test/CodeGen/AMDGPU/fmuladd.f16.ll
@ -1,13 +1,13 @@
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s

-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GCN-DENORM,GCN-DENORM-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GCN-DENORM,GCN-DENORM-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GCN-DENORM,GCN-DENORM-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GCN-DENORM,GCN-DENORM-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s

-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GFX10-FLUSH,GFX10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GFX10-FLUSH,GFX10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GCN-DENORM,GCN-DENORM-STRICT,GFX10-DENORM-STRICT,GFX10-DENORM,GFX10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GCN-DENORM,GCN-DENORM-CONTRACT,GFX10-DENORM-CONTRACT,GFX10-DENORM,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GFX10-FLUSH,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GFX10-FLUSH,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-STRICT,GCN-DENORM,GCN-DENORM-STRICT,GFX10-DENORM-STRICT,GFX10-DENORM,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-CONTRACT,GCN-DENORM,GCN-DENORM-CONTRACT,GFX10-DENORM-CONTRACT,GFX10-DENORM,GFX10 %s

 declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare half @llvm.fmuladd.f16(half, half, half) #1
--- a/test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ b/test/CodeGen/AMDGPU/fmuladd.f32.ll
@ -1,21 +1,21 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-STRICT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-STRICT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -mattr=+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=ieee -mattr=+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=preserve-sign -mattr=-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-STRICT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=ieee -mattr=-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-STRICT,SI %s

-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-CONTRACT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-CONTRACT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-CONTRACT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-CONTRACT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -mattr=+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-CONTRACT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-CONTRACT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=ieee -mattr=+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=preserve-sign -mattr=-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-CONTRACT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-CONTRACT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=ieee -mattr=-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s


-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -mattr=-fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,GFX9-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX900 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -mattr=+fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX900 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,GFX9-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX900 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX900 %s

-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -mattr=-fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-FMAC,GFX9-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX906 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-FMAC,GFX9-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX906 %s

 ; FIXME: Should probably test this, but sometimes selecting fmac is painful to match.
-; XUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -mattr=+fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX906 %s
+; XUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX906 %s


 ; Test all permutations of: fp32 denormals, fast fp contract, fp contract enabled for fmuladd, fmaf fast/slow.
--- a/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
+++ b/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
@ -1,12 +1,12 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s

-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s

 declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #1
--- a/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/test/CodeGen/AMDGPU/fneg-combines.ll
@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=-fp32-denormals,+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=-fp32-denormals,+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s

-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
-; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=fiji -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
+; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s

 ; --------------------------------------------------------------------------------
 ; fadd tests
@ -2562,6 +2562,6 @@ declare float @llvm.amdgcn.fmul.legacy(float, float) #1
 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0

-attributes #0 = { nounwind }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "unsafe-fp-math"="true" }
--- a/test/CodeGen/AMDGPU/fpext-free.ll
+++ b/test/CodeGen/AMDGPU/fpext-free.ll
@ -1,7 +1,7 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s
-; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32FLUSH %s
-; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32DENORM %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s
+; RUN: llc -march=amdgcn -mcpu=gfx803 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32FLUSH %s
+; RUN: llc -march=amdgcn -mcpu=gfx803 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32DENORM %s

 ;  fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)

--- a/test/CodeGen/AMDGPU/frem.ll
+++ b/test/CodeGen/AMDGPU/frem.ll
@ -1,6 +1,6 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs  < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=bonaire -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs  < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s

 ; FUNC-LABEL: {{^}}frem_f32:
 ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
@ -109,5 +109,5 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
   ret void
 }

-attributes #0 = { nounwind "unsafe-fp-math"="false" }
-attributes #1 = { nounwind "unsafe-fp-math"="true" }
+attributes #0 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #1 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
--- a/test/CodeGen/AMDGPU/hsa-fp-mode.ll
+++ b/test/CodeGen/AMDGPU/hsa-fp-mode.ll
@ -92,10 +92,10 @@ define amdgpu_kernel void @test_no_ieee_mode_no_dx10_clamp_vi(float addrspace(1)

 attributes #0 = { nounwind "target-cpu"="kaveri" "target-features"="-code-object-v3" }
 attributes #1 = { nounwind "target-cpu"="fiji" "target-features"="-code-object-v3" }
-attributes #2 = { nounwind "target-features"="-code-object-v3,-fp32-denormals,+fp64-fp16-denormals" }
-attributes #3 = { nounwind "target-features"="-code-object-v3,+fp32-denormals,-fp64-fp16-denormals" }
-attributes #4 = { nounwind "target-features"="-code-object-v3,+fp32-denormals,+fp64-fp16-denormals" }
-attributes #5 = { nounwind "target-features"="-code-object-v3,-fp32-denormals,-fp64-fp16-denormals" }
+attributes #2 = { nounwind "target-features"="-code-object-v3" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #3 = { nounwind "target-features"="-code-object-v3" "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #4 = { nounwind "target-features"="-code-object-v3" "denormal-fp-math"="ieee,ieee" }
+attributes #5 = { nounwind "target-features"="-code-object-v3" "denormal-fp-math"="preserve-sign,preserve-sign" }
 attributes #6 = { nounwind "amdgpu-dx10-clamp"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }
 attributes #7 = { nounwind "amdgpu-ieee"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }
 attributes #8 = { nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }
--- a/test/CodeGen/AMDGPU/known-never-snan.ll
+++ b/test/CodeGen/AMDGPU/known-never-snan.ll
@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s

 ; Mostly overlaps with fmed3.ll to stress specific cases of
 ; isKnownNeverSNaN.
@ -667,7 +667,7 @@ declare float @llvm.amdgcn.rsq.f32(float) #1
 declare float @llvm.amdgcn.fract.f32(float) #1
 declare float @llvm.amdgcn.cubeid(float, float, float) #0

-attributes #0 = { nounwind }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone speculatable }

 !0 = !{float 2.500000e+00}
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8 %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8 %s
+; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s

 declare half @llvm.amdgcn.fmad.ftz.f16(half %a, half %b, half %c)

--- a/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll
@ -1,7 +1,7 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s

 declare float @llvm.amdgcn.fmad.ftz.f32(float %a, float %b, float %c)

--- a/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
@ -139,9 +139,9 @@ define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, dou
 }

 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "unsafe-fp-math"="false" "target-features"="-fp32-denormals" }
-attributes #2 = { nounwind "unsafe-fp-math"="true" "target-features"="-fp32-denormals" }
-attributes #3 = { nounwind "unsafe-fp-math"="false" "target-features"="+fp32-denormals" }
-attributes #4 = { nounwind "unsafe-fp-math"="true" "target-features"="+fp32-denormals" }
+attributes #1 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #2 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #3 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" }
+attributes #4 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="ieee,ieee" }

 !0 = !{float 2.500000e+00}
--- a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@ -1,9 +1,9 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-FLUSH %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-DENORM %s

 declare half @llvm.fmuladd.f16(half %a, half %b, half %c)
 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
--- a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,SIVI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,SIVI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s

 declare half @llvm.maxnum.f16(half %a, half %b)
 declare <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
@ -88,7 +88,7 @@ define amdgpu_kernel void @maxnum_f16(
 ; GFX9-NEXT:    s_endpgm
    half addrspace(1)* %r,
    half addrspace(1)* %a,
-    half addrspace(1)* %b) {
+    half addrspace(1)* %b) #0 {
 entry:
  %a.val = load volatile half, half addrspace(1)* %a
  %b.val = load volatile half, half addrspace(1)* %b
@ -157,7 +157,7 @@ define amdgpu_kernel void @maxnum_f16_imm_a(
 ; GFX9-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
    half addrspace(1)* %r,
-    half addrspace(1)* %b) {
+    half addrspace(1)* %b) #0 {
 entry:
  %b.val = load half, half addrspace(1)* %b
  %r.val = call half @llvm.maxnum.f16(half 3.0, half %b.val)
@ -225,7 +225,7 @@ define amdgpu_kernel void @maxnum_f16_imm_b(
 ; GFX9-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
    half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    half addrspace(1)* %a) #0 {
 entry:
  %a.val = load half, half addrspace(1)* %a
  %r.val = call half @llvm.maxnum.f16(half %a.val, half 4.0)
@ -308,7 +308,7 @@ define amdgpu_kernel void @maxnum_v2f16(
 ; GFX9-NEXT:    s_endpgm
    <2 x half> addrspace(1)* %r,
    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b) {
+    <2 x half> addrspace(1)* %b) #0 {
 entry:
  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
@ -376,7 +376,7 @@ define amdgpu_kernel void @maxnum_v2f16_imm_a(
 ; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %b) {
+    <2 x half> addrspace(1)* %b) #0 {
 entry:
  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
  %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 3.0, half 4.0>, <2 x half> %b.val)
@ -443,7 +443,7 @@ define amdgpu_kernel void @maxnum_v2f16_imm_b(
 ; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    <2 x half> addrspace(1)* %a) #0 {
 entry:
  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
  %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a.val, <2 x half> <half 4.0, half 3.0>)
@ -542,7 +542,7 @@ define amdgpu_kernel void @maxnum_v3f16(
 ; GFX9-NEXT:    s_endpgm
    <3 x half> addrspace(1)* %r,
    <3 x half> addrspace(1)* %a,
-    <3 x half> addrspace(1)* %b) {
+    <3 x half> addrspace(1)* %b) #0 {
 entry:
  %a.val = load <3 x half>, <3 x half> addrspace(1)* %a
  %b.val = load <3 x half>, <3 x half> addrspace(1)* %b
@ -655,7 +655,7 @@ define amdgpu_kernel void @maxnum_v4f16(
 ; GFX9-NEXT:    s_endpgm
    <4 x half> addrspace(1)* %r,
    <4 x half> addrspace(1)* %a,
-    <4 x half> addrspace(1)* %b) {
+    <4 x half> addrspace(1)* %b) #0 {
 entry:
  %a.val = load <4 x half>, <4 x half> addrspace(1)* %a
  %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
@ -746,10 +746,12 @@ define amdgpu_kernel void @fmax_v4f16_imm_a(
 ; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
    <4 x half> addrspace(1)* %r,
-    <4 x half> addrspace(1)* %b) {
+    <4 x half> addrspace(1)* %b) #0 {
 entry:
  %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
  %r.val = call <4 x half> @llvm.maxnum.v4f16(<4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, <4 x half> %b.val)
  store <4 x half> %r.val, <4 x half> addrspace(1)* %r
  ret void
 }
+
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
--- a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s

 declare half @llvm.minnum.f16(half %a, half %b)
 declare <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
@ -88,7 +88,7 @@ define amdgpu_kernel void @minnum_f16_ieee(
 ; GFX9-NEXT:    s_endpgm
    half addrspace(1)* %r,
    half addrspace(1)* %a,
-    half addrspace(1)* %b) {
+    half addrspace(1)* %b) #0 {
 entry:
  %a.val = load volatile half, half addrspace(1)* %a
  %b.val = load volatile half, half addrspace(1)* %b
@ -97,7 +97,7 @@ entry:
  ret void
 }

-define amdgpu_ps half @minnum_f16_no_ieee(half %a, half %b) {
+define amdgpu_ps half @minnum_f16_no_ieee(half %a, half %b) #0 {
 ; SI-LABEL: minnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
@ -180,7 +180,7 @@ define amdgpu_kernel void @minnum_f16_imm_a(
 ; GFX9-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
    half addrspace(1)* %r,
-    half addrspace(1)* %b) {
+    half addrspace(1)* %b) #0 {
 entry:
  %b.val = load half, half addrspace(1)* %b
  %r.val = call half @llvm.minnum.f16(half 3.0, half %b.val)
@ -248,7 +248,7 @@ define amdgpu_kernel void @minnum_f16_imm_b(
 ; GFX9-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
    half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    half addrspace(1)* %a) #0 {
 entry:
  %a.val = load half, half addrspace(1)* %a
  %r.val = call half @llvm.minnum.f16(half %a.val, half 4.0)
@ -331,7 +331,7 @@ define amdgpu_kernel void @minnum_v2f16_ieee(
 ; GFX9-NEXT:    s_endpgm
    <2 x half> addrspace(1)* %r,
    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b) {
+    <2 x half> addrspace(1)* %b) #0 {
 entry:
  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
@ -340,7 +340,7 @@ entry:
  ret void
 }

-define amdgpu_ps <2 x half> @minnum_v2f16_no_ieee(<2 x half> %a, <2 x half> %b) {
+define amdgpu_ps <2 x half> @minnum_v2f16_no_ieee(<2 x half> %a, <2 x half> %b) #0 {
 ; SI-LABEL: minnum_v2f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
@ -429,7 +429,7 @@ define amdgpu_kernel void @minnum_v2f16_imm_a(
 ; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %b) {
+    <2 x half> addrspace(1)* %b) #0 {
 entry:
  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
  %r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> <half 3.0, half 4.0>, <2 x half> %b.val)
@ -496,7 +496,7 @@ define amdgpu_kernel void @minnum_v2f16_imm_b(
 ; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    <2 x half> addrspace(1)* %a) #0 {
 entry:
  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
  %r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a.val, <2 x half> <half 4.0, half 3.0>)
@ -595,7 +595,7 @@ define amdgpu_kernel void @minnum_v3f16(
 ; GFX9-NEXT:    s_endpgm
    <3 x half> addrspace(1)* %r,
    <3 x half> addrspace(1)* %a,
-    <3 x half> addrspace(1)* %b) {
+    <3 x half> addrspace(1)* %b) #0 {
 entry:
  %a.val = load <3 x half>, <3 x half> addrspace(1)* %a
  %b.val = load <3 x half>, <3 x half> addrspace(1)* %b
@ -708,7 +708,7 @@ define amdgpu_kernel void @minnum_v4f16(
 ; GFX9-NEXT:    s_endpgm
    <4 x half> addrspace(1)* %r,
    <4 x half> addrspace(1)* %a,
-    <4 x half> addrspace(1)* %b) {
+    <4 x half> addrspace(1)* %b) #0 {
 entry:
  %a.val = load <4 x half>, <4 x half> addrspace(1)* %a
  %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
@ -799,10 +799,12 @@ define amdgpu_kernel void @fmin_v4f16_imm_a(
 ; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
    <4 x half> addrspace(1)* %r,
-    <4 x half> addrspace(1)* %b) {
+    <4 x half> addrspace(1)* %b) #0 {
 entry:
  %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
  %r.val = call <4 x half> @llvm.minnum.v4f16(<4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, <4 x half> %b.val)
  store <4 x half> %r.val, <4 x half> addrspace(1)* %r
  ret void
 }
+
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
--- a/test/CodeGen/AMDGPU/mad-combine.ll
+++ b/test/CodeGen/AMDGPU/mad-combine.ll
@ -1,14 +1,14 @@
 ; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma.

-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD  -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD  -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s

 ; FIXME: Remove enable-unsafe-fp-math in RUN line and add flags to IR instrs

 ; Make sure we don't form mad with denormals
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s

 declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare float @llvm.fabs.f32(float) #0
--- a/test/CodeGen/AMDGPU/mad-mix-hi.ll
+++ b/test/CodeGen/AMDGPU/mad-mix-hi.ll
@ -143,5 +143,5 @@ declare float @llvm.maxnum.f32(float, float) #1
 declare float @llvm.fmuladd.f32(float, float, float) #1
 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1

-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone speculatable }
--- a/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/test/CodeGen/AMDGPU/mad-mix-lo.ll
@ -310,5 +310,5 @@ declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #
 declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #1
 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1

-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone speculatable }
--- a/test/CodeGen/AMDGPU/mad-mix.ll
+++ b/test/CodeGen/AMDGPU/mad-mix.ll
@ -557,6 +557,6 @@ declare float @llvm.maxnum.f32(float, float) #2
 declare float @llvm.fmuladd.f32(float, float, float) #2
 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2

-attributes #0 = { nounwind "target-features"="-fp32-denormals" }
-attributes #1 = { nounwind "target-features"="+fp32-denormals" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
 attributes #2 = { nounwind readnone speculatable }
--- a/test/CodeGen/AMDGPU/madak.ll
+++ b/test/CodeGen/AMDGPU/madak.ll
@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs  -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs  -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s

 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare float @llvm.fabs.f32(float) nounwind readnone
@ -19,7 +19,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
 ; MAD:   v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 ; GFX10-MAD:   v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 ; FMA:   v_fmaak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
-define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
  %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@ -53,7 +53,7 @@ define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float add
 ; MAD-DAG:      v_mac_f32_e32 [[VK]], [[VA]], [[VC]]
 ; FMA-DAG:      v_fmac_f32_e32 [[VK]], [[VA]], [[VC]]
 ; GCN:          s_endpgm
-define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone

  %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@ -82,7 +82,7 @@ define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, flo
 ; MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
 ; GFX10-MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
 ; FMA: v_fmaak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
-define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
+define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@ -110,7 +110,7 @@ define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %o
 ; MAD:   v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
 ; GFX10-MAD:   v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
 ; FMA:   v_fma_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
-define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
  %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@ -134,7 +134,7 @@ define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out
 ; GFX6_8_9:     v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
 ; GFX10-MAD:    v_mad_f32 v{{[0-9]+}}, [[VA]], [[SB]], 0x41200000
 ; FMA:          v_fma_f32 v{{[0-9]+}}, [[VA]], [[SB]], 0x41200000
-define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind {
+define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@ -155,7 +155,7 @@ define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float
 ; GFX6_8_9:      v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
 ; GFX10-MAD:     v_madak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
 ; FMA:           v_fmaak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
-define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@ -173,7 +173,7 @@ define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float
 ; GFX8_9:  v_mac_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; GFX10-MAD: v_mac_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
 ; FMA:       v_fmac_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) #0 {
  %mul = fmul float %a, %b
  %madak = fadd float %mul, 10.0
  store float %madak, float addrspace(1)* %out, align 4
@ -189,7 +189,7 @@ define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, flo
 ; GFX10-MAD: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, 0x41200000
 ; FMA:       v_fma_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, 0x41200000
 ; GCN:       s_endpgm
-define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
  %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@ -215,7 +215,7 @@ define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalia
 ; GFX10-MAD: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, 0x41200000
 ; FMA:       v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, 0x41200000
 ; GCN:       s_endpgm
-define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
  %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@ -265,4 +265,4 @@ bb4:
  ret void
 }

-attributes #0 = { nounwind}
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
--- a/test/CodeGen/AMDGPU/madmk.ll
+++ b/test/CodeGen/AMDGPU/madmk.ll
@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s

 ; FIXME: None of these trigger madmk emission anymore. It is still
 ; possible, but requires the correct registers to be used which is
@ -12,7 +12,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_mac_f32_e32 [[VB]], 0x41200000, [[VA]]
-define amdgpu_kernel void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -35,7 +35,7 @@ define amdgpu_kernel void @madmk_f32(float addrspace(1)* noalias %out, float add
 ; GCN-DAG: v_mac_f32_e32 [[VB]], [[SK]], [[VA]]
 ; GCN-DAG: v_mac_f32_e32 [[VC]], [[SK]], [[VA]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone

  %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@ -64,7 +64,7 @@ define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, flo
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_mac_f32_e32 [[VB]], 4.0, [[VA]]
-define amdgpu_kernel void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -83,7 +83,7 @@ define amdgpu_kernel void @madmk_inline_imm_f32(float addrspace(1)* noalias %out
 ; GCN-NOT: v_madmk_f32
 ; GCN: v_mac_f32_e32
 ; GCN: s_endpgm
-define amdgpu_kernel void @s_s_madmk_f32(float addrspace(1)* noalias %out, [8 x i32], float %a, [8 x i32], float %b) nounwind {
+define amdgpu_kernel void @s_s_madmk_f32(float addrspace(1)* noalias %out, [8 x i32], float %a, [8 x i32], float %b) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid

@ -99,7 +99,7 @@ define amdgpu_kernel void @s_s_madmk_f32(float addrspace(1)* noalias %out, [8 x
 ; GCN: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG]]
 ; GCN: v_mac_f32_e32 [[VREG2]], 0x41200000, [[VREG1]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind {
+define amdgpu_kernel void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@ -115,7 +115,7 @@ define amdgpu_kernel void @v_s_madmk_f32(float addrspace(1)* noalias %out, float
 ; GCN-NOT: v_madmk_f32
 ; GCN: v_mac_f32_e32
 ; GCN: s_endpgm
-define amdgpu_kernel void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind {
+define amdgpu_kernel void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@ -132,7 +132,7 @@ define amdgpu_kernel void @scalar_vector_madmk_f32(float addrspace(1)* noalias %
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN-DAG: s_mov_b32 [[SK:s[0-9]+]], 0x41200000
 ; GCN: v_mad_f32 {{v[0-9]+}}, |[[VA]]|, [[SK]], [[VB]]
-define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -153,7 +153,7 @@ define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalia
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{[sv][0-9]+}}, |{{v[0-9]+}}|
-define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -174,7 +174,7 @@ define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalia
 ; GCN: buffer_load_dword [[A:v[0-9]+]]
 ; GCN: s_mov_b32 [[SK:s[0-9]+]], 0x41200000
 ; GCN: v_mad_f32 {{v[0-9]+}}, [[A]], [[SK]], 2.0
-define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@ -191,7 +191,7 @@ define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias
 ; SI: s_or_b64
 ; SI: s_xor_b64
 ; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
-define amdgpu_kernel void @kill_madmk_verifier_error() nounwind {
+define amdgpu_kernel void @kill_madmk_verifier_error() #0 {
 bb:
  br label %bb2

@ -214,4 +214,5 @@ bb6:                                              ; preds = %bb2

 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1

+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
--- a/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
+++ b/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s

 ; Make sure that AMDGPUCodeGenPrepare introduces mul24 intrinsics
 ; after SLSR, as the intrinsics would interfere. It's unclear if these
@ -254,7 +254,7 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
 declare void @foo(i32) #0
 declare float @llvm.fmuladd.f32(float, float, float) #1

-attributes #0 = { nounwind willreturn }
+attributes #0 = { nounwind willreturn "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone speculatable }

 !0 = !{float 2.500000e+00}
--- a/test/CodeGen/AMDGPU/omod.ll
+++ b/test/CodeGen/AMDGPU/omod.ll
@ -275,10 +275,10 @@ declare half @llvm.minnum.f16(half, half) #1
 declare half @llvm.maxnum.f16(half, half) #1
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1

-attributes #0 = { nounwind "target-features"="-fp32-denormals" "no-signed-zeros-fp-math"="true" }
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "target-features"="+fp32-denormals" "no-signed-zeros-fp-math"="true" }
-attributes #3 = { nounwind "target-features"="-fp64-fp16-denormals" "no-signed-zeros-fp-math"="true" }
+attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "no-signed-zeros-fp-math"="true" }
+attributes #3 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" }
 attributes #4 = { nounwind "no-signed-zeros-fp-math"="false" }

 !llvm.dbg.cu = !{!0}
--- a/test/CodeGen/AMDGPU/operand-folding.ll
+++ b/test/CodeGen/AMDGPU/operand-folding.ll
@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s

 ; CHECK-LABEL: {{^}}fold_sgpr:
 ; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s
-define amdgpu_kernel void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) {
+define amdgpu_kernel void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) #1 {
 entry:
  %tmp0 = icmp ne i32 %fold, 0
  br i1 %tmp0, label %if, label %endif
@ -20,7 +20,7 @@ endif:

 ; CHECK-LABEL: {{^}}fold_imm:
 ; CHECK: v_or_b32_e32 v{{[0-9]+}}, 5
-define amdgpu_kernel void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) {
+define amdgpu_kernel void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) #1 {
 entry:
  %fold = add i32 3, 2
  %tmp0 = icmp ne i32 %cmp, 0
@ -46,7 +46,7 @@ endif:
 ; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[HI]]
 ; CHECK: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}},

-define amdgpu_kernel void @fold_64bit_constant_add(i64 addrspace(1)* %out, i32 %cmp, i64 %val) {
+define amdgpu_kernel void @fold_64bit_constant_add(i64 addrspace(1)* %out, i32 %cmp, i64 %val) #1 {
 entry:
  %tmp0 = add i64 %val, 1
  store i64 %tmp0, i64 addrspace(1)* %out
@ -61,7 +61,7 @@ entry:
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}

-define amdgpu_kernel void @vector_inline(<4 x i32> addrspace(1)* %out) {
+define amdgpu_kernel void @vector_inline(<4 x i32> addrspace(1)* %out) #1 {
 entry:
  %tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
  %tmp1 = add i32 %tmp0, 1
@ -80,7 +80,7 @@ entry:
 ; CHECK-LABEL: {{^}}imm_one_use:
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0x64, v{{[0-9]+}}

-define amdgpu_kernel void @imm_one_use(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @imm_one_use(i32 addrspace(1)* %out) #1 {
 entry:
  %tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
  %tmp1 = xor i32 %tmp0, 100
@ -94,7 +94,7 @@ entry:
 ; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
 ; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}

-define amdgpu_kernel void @vector_imm(<4 x i32> addrspace(1)* %out) {
+define amdgpu_kernel void @vector_imm(<4 x i32> addrspace(1)* %out) #1 {
 entry:
  %tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
  %tmp1 = add i32 %tmp0, 1
@ -114,7 +114,7 @@ entry:
 ; CHECK: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
 ; CHECK: v_mac_f32_e32 v[[LO]], 0x41200000, v[[HI]]
 ; CHECK: buffer_store_dword v[[LO]]
-define amdgpu_kernel void @no_fold_tied_subregister() {
+define amdgpu_kernel void @no_fold_tied_subregister() #1 {
  %tmp1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
  %tmp2 = extractelement <2 x float> %tmp1, i32 0
  %tmp3 = extractelement <2 x float> %tmp1, i32 1
@ -128,7 +128,7 @@ define amdgpu_kernel void @no_fold_tied_subregister() {
 ; CHECK-LABEL: {{^}}no_extra_fold_on_same_opnd
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @no_extra_fold_on_same_opnd() {
+define void @no_extra_fold_on_same_opnd() #1 {
 entry:
  %s0 = load i32, i32 addrspace(5)* undef, align 4
  %s0.i64= zext i32 %s0 to i64
@ -151,3 +151,4 @@ if.else:
 declare i32 @llvm.amdgcn.workitem.id.x() #0

 attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
--- a/test/CodeGen/AMDGPU/rcp-pattern.ll
+++ b/test/CodeGen/AMDGPU/rcp-pattern.ll
@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s

@ -150,8 +150,8 @@ define amdgpu_kernel void @div_arcp_neg_k_x_pat_f32(float addrspace(1)* %out) #0
 declare float @llvm.fabs.f32(float) #1
 declare float @llvm.sqrt.f32(float) #1

-attributes #0 = { nounwind "unsafe-fp-math"="false" }
+attributes #0 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
+attributes #2 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }

 !0 = !{float 2.500000e+00}
--- a/test/CodeGen/AMDGPU/rcp_iflag.ll
+++ b/test/CodeGen/AMDGPU/rcp_iflag.ll
@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s

 ; GCN-LABEL: {{^}}rcp_uint:
 ; GCN: v_rcp_iflag_f32_e32
-define amdgpu_kernel void @rcp_uint(i32 addrspace(1)* %in, float addrspace(1)* %out) {
+define amdgpu_kernel void @rcp_uint(i32 addrspace(1)* %in, float addrspace(1)* %out) #0 {
  %load = load i32, i32 addrspace(1)* %in, align 4
  %cvt = uitofp i32 %load to float
  %div = fdiv float 1.000000e+00, %cvt, !fpmath !0
@ -12,7 +12,7 @@ define amdgpu_kernel void @rcp_uint(i32 addrspace(1)* %in, float addrspace(1)* %

 ; GCN-LABEL: {{^}}rcp_sint:
 ; GCN: v_rcp_iflag_f32_e32
-define amdgpu_kernel void @rcp_sint(i32 addrspace(1)* %in, float addrspace(1)* %out) {
+define amdgpu_kernel void @rcp_sint(i32 addrspace(1)* %in, float addrspace(1)* %out) #0 {
  %load = load i32, i32 addrspace(1)* %in, align 4
  %cvt = sitofp i32 %load to float
  %div = fdiv float 1.000000e+00, %cvt, !fpmath !0
@ -20,4 +20,27 @@ define amdgpu_kernel void @rcp_sint(i32 addrspace(1)* %in, float addrspace(1)* %
  ret void
 }

+; GCN-LABEL: {{^}}rcp_uint_denorm:
+; GCN-NOT: v_rcp_iflag_f32
+define amdgpu_kernel void @rcp_uint_denorm(i32 addrspace(1)* %in, float addrspace(1)* %out) #1 {
+  %load = load i32, i32 addrspace(1)* %in, align 4
+  %cvt = uitofp i32 %load to float
+  %div = fdiv float 1.000000e+00, %cvt
+  store float %div, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}rcp_sint_denorm:
+; GCN-NOT: v_rcp_iflag_f32
+define amdgpu_kernel void @rcp_sint_denorm(i32 addrspace(1)* %in, float addrspace(1)* %out) #1 {
+  %load = load i32, i32 addrspace(1)* %in, align 4
+  %cvt = sitofp i32 %load to float
+  %div = fdiv float 1.000000e+00, %cvt
+  store float %div, float addrspace(1)* %out, align 4
+  ret void
+}
+
 !0 = !{float 2.500000e+00}
+
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #1 = { "denormal-fp-math-f32"="ieee,ieee" }
--- a/test/CodeGen/AMDGPU/rsq.ll
+++ b/test/CodeGen/AMDGPU/rsq.ll
@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s

 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare float @llvm.sqrt.f32(float) nounwind readnone
@ -8,7 +8,7 @@ declare double @llvm.sqrt.f64(double) nounwind readnone
 ; SI-LABEL: {{^}}rsq_f32:
 ; SI: v_rsq_f32_e32
 ; SI: s_endpgm
-define amdgpu_kernel void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
  %val = load float, float addrspace(1)* %in, align 4
  %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
  %div = fdiv float 1.0, %sqrt, !fpmath !0
@ -20,7 +20,7 @@ define amdgpu_kernel void @rsq_f32(float addrspace(1)* noalias %out, float addrs
 ; SI-UNSAFE: v_rsq_f64_e32
 ; SI-SAFE: v_sqrt_f64_e32
 ; SI: s_endpgm
-define amdgpu_kernel void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #0 {
  %val = load double, double addrspace(1)* %in, align 4
  %sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
  %div = fdiv double 1.0, %sqrt
@ -31,7 +31,7 @@ define amdgpu_kernel void @rsq_f64(double addrspace(1)* noalias %out, double add
 ; SI-LABEL: {{^}}rsq_f32_sgpr:
 ; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
 ; SI: s_endpgm
-define amdgpu_kernel void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind {
+define amdgpu_kernel void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) #0 {
  %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
  %div = fdiv float 1.0, %sqrt, !fpmath !0
  store float %div, float addrspace(1)* %out, align 4
@ -57,7 +57,7 @@ define amdgpu_kernel void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float
 ; SI-SAFE-NOT: v_rsq_f32

 ; SI: s_endpgm
-define amdgpu_kernel void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) {
+define amdgpu_kernel void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@ -83,7 +83,7 @@ define amdgpu_kernel void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(
 ; SI-UNSAFE: v_sqrt_f32_e32 [[SQRT:v[0-9]+]], v{{[0-9]+}}
 ; SI-UNSAFE: v_rcp_f32_e64 [[RSQ:v[0-9]+]], -[[SQRT]]
 ; SI-UNSAFE: buffer_store_dword [[RSQ]]
-define amdgpu_kernel void @neg_rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
  %val = load float, float addrspace(1)* %in, align 4
  %sqrt = call float @llvm.sqrt.f32(float %val)
  %div = fdiv float -1.0, %sqrt, !fpmath !0
@ -98,7 +98,7 @@ define amdgpu_kernel void @neg_rsq_f32(float addrspace(1)* noalias %out, float a
 ; SI-UNSAFE: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}
 ; SI-UNSAFE: v_rcp_f64_e64 [[RCP:v\[[0-9]+:[0-9]+\]]], -[[SQRT]]
 ; SI-UNSAFE: buffer_store_dwordx2 [[RCP]]
-define amdgpu_kernel void @neg_rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #0 {
  %val = load double, double addrspace(1)* %in, align 4
  %sqrt = call double @llvm.sqrt.f64(double %val)
  %div = fdiv double -1.0, %sqrt
@ -114,7 +114,7 @@ define amdgpu_kernel void @neg_rsq_f64(double addrspace(1)* noalias %out, double
 ; SI-UNSAFE: v_sqrt_f32_e64 [[SQRT:v[0-9]+]], -v{{[0-9]+}}
 ; SI-UNSAFE: v_rcp_f32_e64 [[RSQ:v[0-9]+]], -[[SQRT]]
 ; SI-UNSAFE: buffer_store_dword [[RSQ]]
-define amdgpu_kernel void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
  %val = load float, float addrspace(1)* %in, align 4
  %val.fneg = fsub float -0.0, %val
  %sqrt = call float @llvm.sqrt.f32(float %val.fneg)
@ -130,7 +130,7 @@ define amdgpu_kernel void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, flo
 ; SI-UNSAFE: v_sqrt_f64_e64 [[SQRT:v\[[0-9]+:[0-9]+\]]], -v{{\[[0-9]+:[0-9]+\]}}
 ; SI-UNSAFE: v_rcp_f64_e64 [[RCP:v\[[0-9]+:[0-9]+\]]], -[[SQRT]]
 ; SI-UNSAFE: buffer_store_dwordx2 [[RCP]]
-define amdgpu_kernel void @neg_rsq_neg_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_neg_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #0 {
  %val = load double, double addrspace(1)* %in, align 4
  %val.fneg = fsub double -0.0, %val
  %sqrt = call double @llvm.sqrt.f64(double %val.fneg)
@ -140,3 +140,5 @@ define amdgpu_kernel void @neg_rsq_neg_f64(double addrspace(1)* noalias %out, do
 }

 !0 = !{float 2.500000e+00}
+
+attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
--- a/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@ -149,7 +149,7 @@

  attributes #0 = { argmemonly nounwind }
  attributes #1 = { nounwind readnone speculatable }
-  attributes #2 = { convergent nounwind "amdgpu-dispatch-ptr" "amdgpu-flat-scratch" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="gfx900" "target-features"="+fp32-denormals" }
+  attributes #2 = { convergent nounwind "amdgpu-dispatch-ptr" "amdgpu-flat-scratch" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="gfx900" }
  attributes #3 = { nounwind }

  !llvm.dbg.cu = !{!0}
--- a/test/CodeGen/AMDGPU/sdwa-peephole.ll
+++ b/test/CodeGen/AMDGPU/sdwa-peephole.ll
@ -1,7 +1,7 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=NOSDWA,GCN %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,GFX89,SDWA,GCN %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9_10,SDWA,GCN %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10,GFX9_10,SDWA,GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=NOSDWA,GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,GFX89,SDWA,GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -amdgpu-sdwa-peephole -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9_10,SDWA,GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -amdgpu-sdwa-peephole -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10,GFX9_10,SDWA,GCN %s

 ; GCN-LABEL: {{^}}add_shr_i32:
 ; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}}
@ -12,7 +12,7 @@
 ; GFX9: v_add_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX10: v_add_nc_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1

-define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
  %a = load i32, i32 addrspace(1)* %in, align 4
  %shr = lshr i32 %a, 16
  %add = add i32 %a, %shr
@ -28,7 +28,7 @@ define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)*
 ; VI: v_subrev_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9: v_sub_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; GFX10: v_sub_nc_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
  %a = load i32, i32 addrspace(1)* %in, align 4
  %shr = lshr i32 %a, 16
  %sub = sub i32 %shr, %a
@ -44,7 +44,7 @@ define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)*

 ; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1

-define amdgpu_kernel void @mul_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in1, i32 addrspace(1)* %in2) {
+define amdgpu_kernel void @mul_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in1, i32 addrspace(1)* %in2) #0 {
  %a = load i32, i32 addrspace(1)* %in1, align 4
  %b = load i32, i32 addrspace(1)* %in2, align 4
  %shra = lshr i32 %a, 16
@ -61,7 +61,7 @@ define amdgpu_kernel void @mul_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)*
 ; GFX10: v_mul_lo_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; SDWA-NOT: v_mul_u32_u24_sdwa

-define amdgpu_kernel void @mul_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %ina, i16 addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %ina, i16 addrspace(1)* %inb) #0 {
 entry:
  %a = load i16, i16 addrspace(1)* %ina, align 4
  %b = load i16, i16 addrspace(1)* %inb, align 4
@ -84,7 +84,7 @@ entry:

 ; GFX9_10: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}

-define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) #0 {
 entry:
  %a = load <2 x i16>, <2 x i16> addrspace(1)* %ina, align 4
  %b = load <2 x i16>, <2 x i16> addrspace(1)* %inb, align 4
@ -111,7 +111,7 @@ entry:
 ; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}

-define amdgpu_kernel void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) #0 {
 entry:
  %a = load <4 x i16>, <4 x i16> addrspace(1)* %ina, align 4
  %b = load <4 x i16>, <4 x i16> addrspace(1)* %inb, align 4
@ -146,7 +146,7 @@ entry:
 ; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX9_10-DAG: v_pk_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}

-define amdgpu_kernel void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) #0 {
 entry:
  %a = load <8 x i16>, <8 x i16> addrspace(1)* %ina, align 4
  %b = load <8 x i16>, <8 x i16> addrspace(1)* %inb, align 4
@ -161,7 +161,7 @@ entry:
 ; SDWA: v_mul_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; SDWA-NOT: v_mul_f16_sdwa

-define amdgpu_kernel void @mul_half(half addrspace(1)* %out, half addrspace(1)* %ina, half addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_half(half addrspace(1)* %out, half addrspace(1)* %ina, half addrspace(1)* %inb) #0 {
 entry:
  %a = load half, half addrspace(1)* %ina, align 4
  %b = load half, half addrspace(1)* %inb, align 4
@ -184,7 +184,7 @@ entry:

 ; GFX9_10: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}

-define amdgpu_kernel void @mul_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) #0 {
 entry:
  %a = load <2 x half>, <2 x half> addrspace(1)* %ina, align 4
  %b = load <2 x half>, <2 x half> addrspace(1)* %inb, align 4
@ -209,7 +209,7 @@ entry:
 ; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}

-define amdgpu_kernel void @mul_v4half(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %ina, <4 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v4half(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %ina, <4 x half> addrspace(1)* %inb) #0 {
 entry:
  %a = load <4 x half>, <4 x half> addrspace(1)* %ina, align 4
  %b = load <4 x half>, <4 x half> addrspace(1)* %inb, align 4
@ -240,7 +240,7 @@ entry:
 ; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX9_10-DAG: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}

-define amdgpu_kernel void @mul_v8half(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %ina, <8 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v8half(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %ina, <8 x half> addrspace(1)* %inb) #0 {
 entry:
  %a = load <8 x half>, <8 x half> addrspace(1)* %ina, align 4
  %b = load <8 x half>, <8 x half> addrspace(1)* %inb, align 4
@ -256,7 +256,7 @@ entry:
 ; GFX10: v_mul_lo_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; SDWA-NOT: v_mul_u32_u24_sdwa

-define amdgpu_kernel void @mul_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %ina, i8 addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %ina, i8 addrspace(1)* %inb) #0 {
 entry:
  %a = load i8, i8 addrspace(1)* %ina, align 4
  %b = load i8, i8 addrspace(1)* %inb, align 4
@ -285,7 +285,7 @@ entry:

 ; GFX10: v_lshlrev_b16_e64 v{{[0-9]+}}, 8, v
 ; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-define amdgpu_kernel void @mul_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %ina, <2 x i8> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %ina, <2 x i8> addrspace(1)* %inb) #0 {
 entry:
  %a = load <2 x i8>, <2 x i8> addrspace(1)* %ina, align 4
  %b = load <2 x i8>, <2 x i8> addrspace(1)* %inb, align 4
@ -315,7 +315,7 @@ entry:
 ; GFX10-DAG: v_mul_lo_u16_e64
 ; GFX10-DAG: v_mul_lo_u16_e64

-define amdgpu_kernel void @mul_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %ina, <4 x i8> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %ina, <4 x i8> addrspace(1)* %inb) #0 {
 entry:
  %a = load <4 x i8>, <4 x i8> addrspace(1)* %ina, align 4
  %b = load <4 x i8>, <4 x i8> addrspace(1)* %inb, align 4
@ -355,7 +355,7 @@ entry:
 ; GFX10-DAG: v_mul_lo_u16_e64
 ; GFX10-DAG: v_mul_lo_u16_e64

-define amdgpu_kernel void @mul_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %ina, <8 x i8> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %ina, <8 x i8> addrspace(1)* %inb) #0 {
 entry:
  %a = load <8 x i8>, <8 x i8> addrspace(1)* %ina, align 4
  %b = load <8 x i8>, <8 x i8> addrspace(1)* %inb, align 4
@ -376,7 +376,7 @@ entry:
 ; FIXME: Should be able to avoid or
 define amdgpu_kernel void @sitofp_v2i16_to_v2f16(
    <2 x half> addrspace(1)* %r,
-    <2 x i16> addrspace(1)* %a) {
+    <2 x i16> addrspace(1)* %a) #0 {
 entry:
  %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
  %r.val = sitofp <2 x i16> %a.val to <2 x half>
@ -399,7 +399,7 @@ entry:
 ; GFX9_10: v_pk_mul_f16 v[[DST_MUL:[0-9]+]], v{{[0-9]+}}, v[[SRC:[0-9]+]]
 ; GFX9_10: v_pk_add_f16 v{{[0-9]+}}, v[[DST_MUL]], v[[SRC]]

-define amdgpu_kernel void @mac_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mac_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) #0 {
 entry:
  %a = load <2 x half>, <2 x half> addrspace(1)* %ina, align 4
  %b = load <2 x half>, <2 x half> addrspace(1)* %inb, align 4
@ -421,7 +421,7 @@ entry:

 ; GFX10: v_pk_mul_lo_u16 v{{[0-9]+}}, 0x141007b, v{{[0-9]+}}

-define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
 entry:
  %a = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
  %mul = mul <2 x i16> %a, <i16 123, i16 321>
@ -443,7 +443,7 @@ entry:
 ; GFX9_10: v_pk_mul_lo_u16 v[[DST1:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX9_10: v_pk_mul_lo_u16 v{{[0-9]+}}, v[[DST1]], v{{[0-9]+}}

-define amdgpu_kernel void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) #0 {
 entry:
  %a = load <2 x i16>, <2 x i16> addrspace(1)* %ina, align 4
  %b = load <2 x i16>, <2 x i16> addrspace(1)* %inb, align 4
@ -460,7 +460,7 @@ entry:

 ; GFX9_10: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}

-define amdgpu_kernel void @add_bb_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @add_bb_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) #0 {
 entry:
  %a = load <2 x i16>, <2 x i16> addrspace(1)* %ina, align 4
  %b = load <2 x i16>, <2 x i16> addrspace(1)* %inb, align 4
@ -503,7 +503,7 @@ store_label:
 ; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD

-define amdgpu_kernel void @pulled_out_test(<8 x i8> addrspace(1)* %sourceA, <8 x i8> addrspace(1)* %destValues) {
+define amdgpu_kernel void @pulled_out_test(<8 x i8> addrspace(1)* %sourceA, <8 x i8> addrspace(1)* %destValues) #0 {
 entry:
  %idxprom = ashr exact i64 15, 32
  %arrayidx = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %sourceA, i64 %idxprom
@ -564,3 +564,5 @@ bb11:                                             ; preds = %bb10, %bb2
  store volatile <2 x i32> %tmp12, <2 x i32> addrspace(1)* undef
  br label %bb1
 }
+
+attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
--- a/test/CodeGen/AMDGPU/udiv.ll
+++ b/test/CodeGen/AMDGPU/udiv.ll
@ -1,7 +1,7 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=SI -check-prefix=FUNC -check-prefix=VI %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefix=SI -check-prefix=FUNC -check-prefix=VI %s

-; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=fiji -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s

 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s

--- a/test/CodeGen/AMDGPU/udivrem24.ll
+++ b/test/CodeGen/AMDGPU/udivrem24.ll
@ -21,6 +21,63 @@ define amdgpu_kernel void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in
  ret void
 }

+; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_in_out:
+; SI: v_cvt_f32_ubyte
+; SI-DAG: v_cvt_f32_ubyte
+; SI-DAG: v_rcp_iflag_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define amdgpu_kernel void @udiv24_i8_denorm_flush_in_out(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+  %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+  %num = load i8, i8 addrspace(1) * %in
+  %den = load i8, i8 addrspace(1) * %den_ptr
+  %result = udiv i8 %num, %den
+  store i8 %result, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_in:
+; SI: v_cvt_f32_ubyte
+; SI-DAG: v_cvt_f32_ubyte
+; SI-DAG: v_rcp_iflag_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define amdgpu_kernel void @udiv24_i8_denorm_flush_in(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
+  %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+  %num = load i8, i8 addrspace(1) * %in
+  %den = load i8, i8 addrspace(1) * %den_ptr
+  %result = udiv i8 %num, %den
+  store i8 %result, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_out:
+; SI: v_cvt_f32_ubyte
+; SI-DAG: v_cvt_f32_ubyte
+; SI-DAG: v_rcp_iflag_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define amdgpu_kernel void @udiv24_i8_denorm_flush_out(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #2 {
+  %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+  %num = load i8, i8 addrspace(1) * %in
+  %den = load i8, i8 addrspace(1) * %den_ptr
+  %result = udiv i8 %num, %den
+  store i8 %result, i8 addrspace(1)* %out
+  ret void
+}
+
 ; FUNC-LABEL: {{^}}udiv24_i16:
 ; SI: v_cvt_f32_u32
 ; SI: v_cvt_f32_u32
@ -325,3 +382,7 @@ define amdgpu_kernel void @test_udiv24_u23_u16_i32(i32 addrspace(1)* %out, i32 a
  store i32 %result, i32 addrspace(1)* %out, align 4
  ret void
 }
+
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #1 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
+attributes #2 = { "denormal-fp-math-f32"="preserve-sign,ieee" }
--- a/test/CodeGen/AMDGPU/v_mac.ll
+++ b/test/CodeGen/AMDGPU/v_mac.ll
@ -1,6 +1,6 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s

 ; GCN-LABEL: {{^}}mac_vvv:
 ; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}}
--- a/test/CodeGen/AMDGPU/v_mac_f16.ll
+++ b/test/CodeGen/AMDGPU/v_mac_f16.ll
@ -1,5 +1,5 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s

 ; GCN-LABEL: {{^}}mac_f16:
 ; GCN: {{buffer|flat}}_load_ushort v[[A_F16:[0-9]+]]
@ -677,6 +677,6 @@ entry:

 declare void @llvm.amdgcn.s.barrier() #2

-attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }
-attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }
+attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" }
 attributes #2 = { nounwind convergent }
--- a/test/CodeGen/AMDGPU/v_madak_f16.ll
+++ b/test/CodeGen/AMDGPU/v_madak_f16.ll
@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI

 define amdgpu_kernel void @madak_f16(
 ; SI-LABEL: madak_f16:
@ -52,7 +52,7 @@ define amdgpu_kernel void @madak_f16(
 ; VI-NEXT:    s_endpgm
    half addrspace(1)* %r,
    half addrspace(1)* %a,
-    half addrspace(1)* %b) {
+    half addrspace(1)* %b) #0 {
 entry:
  %a.val = load half, half addrspace(1)* %a
  %b.val = load half, half addrspace(1)* %b
@ -136,7 +136,7 @@ define amdgpu_kernel void @madak_f16_use_2(
    half addrspace(1)* %r1,
    half addrspace(1)* %a,
    half addrspace(1)* %b,
-    half addrspace(1)* %c) {
+    half addrspace(1)* %c) #0 {
 entry:
  %a.val = load volatile half, half addrspace(1)* %a
  %b.val = load volatile half, half addrspace(1)* %b
@ -151,3 +151,5 @@ entry:
  store half %r1.val, half addrspace(1)* %r1
  ret void
 }
+
+attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
--- a/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
+++ b/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll
@ -15,89 +15,74 @@ define i32 @target_cpu_call_no_target_cpu() #1 {

 ; CHECK-LABEL: @target_cpu_target_features_call_no_target_cpu(
 ; CHECK-NEXT: ret i32 0
-define i32 @target_cpu_target_features_call_no_target_cpu() #2 {
+define i32 @target_cpu_target_features_call_no_target_cpu() {
  %call = call i32 @func_no_target_cpu()
  ret i32 %call
 }

-; CHECK-LABEL: @fp32_denormals(
-define i32 @fp32_denormals() #3 {
-  ret i32 0
-}
-
-; CHECK-LABEL: @no_fp32_denormals_call_f32_denormals(
-; CHECK-NEXT: call i32 @fp32_denormals()
-define i32 @no_fp32_denormals_call_f32_denormals() #4 {
-  %call = call i32 @fp32_denormals()
-  ret i32 %call
-}
-
 ; Make sure gfx9 can call unspecified functions because of movrel
 ; feature change.
 ; CHECK-LABEL: @gfx9_target_features_call_no_target_cpu(
 ; CHECK-NEXT: ret i32 0
-define i32 @gfx9_target_features_call_no_target_cpu() #5 {
+define i32 @gfx9_target_features_call_no_target_cpu() #2 {
  %call = call i32 @func_no_target_cpu()
  ret i32 %call
 }

-define i32 @func_no_halfrate64ops() #6 {
+define i32 @func_no_halfrate64ops() #3 {
  ret i32 0
 }

-define i32 @func_with_halfrate64ops() #7 {
+define i32 @func_with_halfrate64ops() #4 {
  ret i32 0
 }

 ; CHECK-LABEL: @call_func_without_halfrate64ops(
 ; CHECK-NEXT: ret i32 0
-define i32 @call_func_without_halfrate64ops() #7 {
+define i32 @call_func_without_halfrate64ops() #4 {
  %call = call i32 @func_no_halfrate64ops()
  ret i32 %call
 }

 ; CHECK-LABEL: @call_func_with_halfrate64ops(
 ; CHECK-NEXT: ret i32 0
-define i32 @call_func_with_halfrate64ops() #6 {
+define i32 @call_func_with_halfrate64ops() #3 {
  %call = call i32 @func_with_halfrate64ops()
  ret i32 %call
 }

-define i32 @func_no_loadstoreopt() #8 {
+define i32 @func_no_loadstoreopt() #5 {
  ret i32 0
 }

-define i32 @func_with_loadstoreopt() #9 {
+define i32 @func_with_loadstoreopt() #6 {
  ret i32 0
 }

 ; CHECK-LABEL: @call_func_without_loadstoreopt(
 ; CHECK-NEXT: ret i32 0
-define i32 @call_func_without_loadstoreopt() #9 {
+define i32 @call_func_without_loadstoreopt() #6 {
  %call = call i32 @func_no_loadstoreopt()
  ret i32 %call
 }

-define i32 @enable_codeobjectv3() #10 {
+define i32 @enable_codeobjectv3() #7 {
  ret i32 999
 }

 ; CHECK-LABEL: @disable_codeobjectv3_call_codeobjectv3(
 ; CHECK-NEXT: ret i32 999
-define i32 @disable_codeobjectv3_call_codeobjectv3() #11 {
+define i32 @disable_codeobjectv3_call_codeobjectv3() #8 {
  %call = call i32 @enable_codeobjectv3()
  ret i32 %call
 }

 attributes #0 = { nounwind }
 attributes #1 = { nounwind "target-cpu"="fiji" }
-attributes #2 = { nounwind "target-cpu"="fiji" "target-features"="+fp32-denormals" }
-attributes #3 = { nounwind "target-features"="+fp32-denormals" }
-attributes #4 = { nounwind "target-features"="-fp32-denormals" }
-attributes #5 = { nounwind "target-cpu"="gfx900" }
-attributes #6 = { nounwind "target-features"="-half-rate-64-ops" }
-attributes #7 = { nounwind "target-features"="+half-rate-64-ops" }
-attributes #8 = { nounwind "target-features"="-load-store-opt" }
-attributes #9 = { nounwind "target-features"="+load-store-opt" }
-attributes #10 = { nounwind "target-features"="+code-object-v3" }
-attributes #11 = { nounwind "target-features"="-code-object-v3" }
+attributes #2 = { nounwind "target-cpu"="gfx900" }
+attributes #3 = { nounwind "target-features"="-half-rate-64-ops" }
+attributes #4 = { nounwind "target-features"="+half-rate-64-ops" }
+attributes #5 = { nounwind "target-features"="-load-store-opt" }
+attributes #6 = { nounwind "target-features"="+load-store-opt" }
+attributes #7 = { nounwind "target-features"="+code-object-v3" }
+attributes #8 = { nounwind "target-features"="-code-object-v3" }
--- a/test/tools/llvm-objdump/ELF/AMDGPU/source-lines.ll
+++ b/test/tools/llvm-objdump/ELF/AMDGPU/source-lines.ll
@ -68,7 +68,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1

-attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fp64-fp16-denormals,-fp32-denormals" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }

 !llvm.dbg.cu = !{!0}