ScheduleDAGInstrs: Rework schedule graph builder.

Re-comitting with a change that avoids undefined uses getting put into
the VRegUses list.

The new algorithm remembers the uses encountered while walking backwards
until a matching def is found. Contrary to the previous version this:
- Works without LiveIntervals being available
- Allows to increase the precision to subregisters/lanemasks
  (not used for now)

The changes in the AMDGPU tests are necessary because the R600 scheduler
is not stable with respect to the order of nodes in the ready queues.

Differential Revision: http://reviews.llvm.org/D9068

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254683 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matthias Braun 2015-12-04 01:51:19 +00:00
parent ae4aa8b8d2
commit 7adbf112c7
16 changed files with 279 additions and 156 deletions

View File

@ -33,15 +33,26 @@ namespace llvm {
/// An individual mapping from virtual register number to SUnit.
struct VReg2SUnit {
unsigned VirtReg;
LaneBitmask LaneMask;
SUnit *SU;
VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {}
VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU)
: VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}
unsigned getSparseSetIndex() const {
return TargetRegisterInfo::virtReg2Index(VirtReg);
}
};
/// Mapping from virtual register to SUnit including an operand index.
struct VReg2SUnitOperIdx : public VReg2SUnit {
unsigned OperandIndex;
VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask,
unsigned OperandIndex, SUnit *SU)
: VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {}
};
/// Record a physical register access.
/// For non-data-dependent uses, OpIdx == -1.
struct PhysRegSUOper {
@ -69,7 +80,10 @@ namespace llvm {
/// Track local uses of virtual registers. These uses are gathered by the DAG
/// builder and may be consulted by the scheduler to avoid iterating an entire
/// vreg use list.
typedef SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2UseMap;
typedef SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2SUnitMultiMap;
typedef SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>
VReg2SUnitOperIdxMultiMap;
/// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
/// MachineInstrs.
@ -95,6 +109,9 @@ namespace llvm {
/// it has taken responsibility for scheduling the terminator correctly.
bool CanHandleTerminators;
/// Whether lane masks should get tracked.
bool TrackLaneMasks;
/// State specific to the current scheduling region.
/// ------------------------------------------------
@ -117,7 +134,7 @@ namespace llvm {
/// After calling BuildSchedGraph, each vreg used in the scheduling region
/// is mapped to a set of SUnits. These include all local vreg uses, not
/// just the uses for a singly defined vreg.
VReg2UseMap VRegUses;
VReg2SUnitMultiMap VRegUses;
/// State internal to DAG building.
/// -------------------------------
@ -129,8 +146,12 @@ namespace llvm {
Reg2SUnitsMap Defs;
Reg2SUnitsMap Uses;
/// Track the last instruction in this region defining each virtual register.
VReg2SUnitMap VRegDefs;
/// Tracks the last instruction(s) in this region defining each virtual
/// register. There may be multiple current definitions for a register with
/// disjunct lanemasks.
VReg2SUnitMultiMap CurrentVRegDefs;
/// Tracks the last instructions in this region using each virtual register.
VReg2SUnitOperIdxMultiMap CurrentVRegUses;
/// PendingLoads - Remember where unknown loads are after the most recent
/// unknown store, as we iterate. As with Defs and Uses, this is here
@ -200,7 +221,8 @@ namespace llvm {
/// input.
void buildSchedGraph(AliasAnalysis *AA,
RegPressureTracker *RPTracker = nullptr,
PressureDiffs *PDiffs = nullptr);
PressureDiffs *PDiffs = nullptr,
bool TrackLaneMasks = false);
/// addSchedBarrierDeps - Add dependencies from instructions in the current
/// list of instructions being scheduled to scheduling barrier. We want to
@ -247,6 +269,12 @@ namespace llvm {
/// Other adjustments may be made to the instruction if necessary. Return
/// true if the operand has been deleted, false if not.
bool toggleKillFlag(MachineInstr *MI, MachineOperand &MO);
/// Returns a mask for which lanes get read/written by the given (register)
/// machine operand.
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
void collectVRegUses(SUnit *SU);
};
/// newSUnit - Creates a new SUnit and return a ptr to it.

View File

@ -13,12 +13,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/ADT/IntEqClasses.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@ -55,7 +55,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
bool RemoveKillFlags)
: ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(LIS),
RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
FirstDbgValue(nullptr) {
TrackLaneMasks(false), FirstDbgValue(nullptr) {
DbgValues.clear();
const TargetSubtargetInfo &ST = mf.getSubtarget();
@ -363,6 +363,20 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
}
}
LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
{
unsigned Reg = MO.getReg();
// No point in tracking lanemasks if we don't have interesting subregisters.
const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
if (!RC.HasDisjunctSubRegs)
return ~0u;
unsigned SubReg = MO.getSubReg();
if (SubReg == 0)
return RC.getLaneMask();
return TRI->getSubRegIndexLaneMask(SubReg);
}
/// addVRegDefDeps - Add register output and data dependencies from this SUnit
/// to instructions that occur later in the same scheduling region if they read
/// from or write to the virtual register defined at OperIdx.
@ -370,35 +384,106 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
/// TODO: Hoist loop induction variable increments. This has to be
/// reevaluated. Generally, IV scheduling should be done before coalescing.
void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
const MachineInstr *MI = SU->getInstr();
unsigned Reg = MI->getOperand(OperIdx).getReg();
MachineInstr *MI = SU->getInstr();
MachineOperand &MO = MI->getOperand(OperIdx);
unsigned Reg = MO.getReg();
// Singly defined vregs do not have output/anti dependencies.
// The current operand is a def, so we have at least one.
// Check here if there are any others...
LaneBitmask DefLaneMask;
LaneBitmask KillLaneMask;
if (TrackLaneMasks) {
bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
DefLaneMask = getLaneMaskForMO(MO);
// If we have a <read-undef> flag, none of the lane values comes from an
// earlier instruction.
KillLaneMask = IsKill ? ~0u : DefLaneMask;
// Clear undef flag, we'll re-add it later once we know which subregister
// Def is first.
MO.setIsUndef(false);
} else {
DefLaneMask = ~0u;
KillLaneMask = ~0u;
}
if (MO.isDead()) {
assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&
"Dead defs should have no uses");
} else {
// Add data dependence to all uses we found so far.
const TargetSubtargetInfo &ST = MF.getSubtarget();
for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg),
E = CurrentVRegUses.end(); I != E; /*empty*/) {
LaneBitmask LaneMask = I->LaneMask;
// Ignore uses of other lanes.
if ((LaneMask & KillLaneMask) == 0) {
++I;
continue;
}
if ((LaneMask & DefLaneMask) != 0) {
SUnit *UseSU = I->SU;
MachineInstr *Use = UseSU->getInstr();
SDep Dep(SU, SDep::Data, Reg);
Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
I->OperandIndex));
ST.adjustSchedDependency(SU, UseSU, Dep);
UseSU->addPred(Dep);
}
LaneMask &= ~KillLaneMask;
// If we found a Def for all lanes of this use, remove it from the list.
if (LaneMask != 0) {
I->LaneMask = LaneMask;
++I;
} else
I = CurrentVRegUses.erase(I);
}
}
// Shortcut: Singly defined vregs do not have output/anti dependencies.
if (MRI.hasOneDef(Reg))
return;
// Add output dependence to the next nearest def of this vreg.
// Add output dependence to the next nearest defs of this vreg.
//
// Unless this definition is dead, the output dependence should be
// transitively redundant with antidependencies from this definition's
// uses. We're conservative for now until we have a way to guarantee the uses
// are not eliminated sometime during scheduling. The output dependence edge
// is also useful if output latency exceeds def-use latency.
VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
if (DefI == VRegDefs.end())
VRegDefs.insert(VReg2SUnit(Reg, SU));
else {
SUnit *DefSU = DefI->SU;
if (DefSU != SU && DefSU != &ExitSU) {
SDep Dep(SU, SDep::Output, Reg);
Dep.setLatency(
SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
DefSU->addPred(Dep);
}
DefI->SU = SU;
LaneBitmask LaneMask = DefLaneMask;
for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
CurrentVRegDefs.end())) {
// Ignore defs for other lanes.
if ((V2SU.LaneMask & LaneMask) == 0)
continue;
// Add an output dependence.
SUnit *DefSU = V2SU.SU;
// Ignore additional defs of the same lanes in one instruction. This can
// happen because lanemasks are shared for targets with too many
// subregisters. We also use some representration tricks/hacks where we
// add super-register defs/uses, to imply that although we only access parts
// of the reg we care about the full one.
if (DefSU == SU)
continue;
SDep Dep(SU, SDep::Output, Reg);
Dep.setLatency(
SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
DefSU->addPred(Dep);
// Update current definition. This can get tricky if the def was about a
// bigger lanemask before. We then have to shrink it and create a new
// VReg2SUnit for the non-overlapping part.
LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
if (NonOverlapMask != 0)
CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU));
V2SU.SU = SU;
V2SU.LaneMask = OverlapMask;
}
// If there was no CurrentVRegDefs entry for some lanes yet, create one.
if (LaneMask != 0)
CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
}
/// addVRegUseDeps - Add a register data dependency if the instruction that
@ -408,49 +493,26 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
///
/// TODO: Handle ExitSU "uses" properly.
void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
MachineInstr *MI = SU->getInstr();
unsigned Reg = MI->getOperand(OperIdx).getReg();
const MachineInstr *MI = SU->getInstr();
const MachineOperand &MO = MI->getOperand(OperIdx);
unsigned Reg = MO.getReg();
// Record this local VReg use.
VReg2UseMap::iterator UI = VRegUses.find(Reg);
for (; UI != VRegUses.end(); ++UI) {
if (UI->SU == SU)
break;
// Remember the use. Data dependencies will be added when we find the def.
LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u;
CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
// Add antidependences to the following defs of the vreg.
for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
CurrentVRegDefs.end())) {
// Ignore defs for unrelated lanes.
LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
if ((PrevDefLaneMask & LaneMask) == 0)
continue;
if (V2SU.SU == SU)
continue;
V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
}
if (UI == VRegUses.end())
VRegUses.insert(VReg2SUnit(Reg, SU));
// Lookup this operand's reaching definition.
assert(LIS && "vreg dependencies requires LiveIntervals");
LiveQueryResult LRQ
= LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI));
VNInfo *VNI = LRQ.valueIn();
// VNI will be valid because MachineOperand::readsReg() is checked by caller.
assert(VNI && "No value to read by operand");
MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
// Phis and other noninstructions (after coalescing) have a NULL Def.
if (Def) {
SUnit *DefSU = getSUnit(Def);
if (DefSU) {
// The reaching Def lives within this scheduling region.
// Create a data dependence.
SDep dep(DefSU, SDep::Data, Reg);
// Adjust the dependence latency using operand def/use information, then
// allow the target to perform its own adjustments.
int DefOp = Def->findRegisterDefOperandIdx(Reg);
dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
const TargetSubtargetInfo &ST = MF.getSubtarget();
ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
SU->addPred(dep);
}
}
// Add antidependence to the following def of the vreg it uses.
VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
if (DefI != VRegDefs.end() && DefI->SU != SU)
DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
}
/// Return true if MI is an instruction we are unable to reason about
@ -733,17 +795,44 @@ void ScheduleDAGInstrs::initSUnits() {
}
}
void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) {
const MachineInstr *MI = SU->getInstr();
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
if (!MO.readsReg())
continue;
if (TrackLaneMasks && !MO.isUse())
continue;
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
// Record this local VReg use.
VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
for (; UI != VRegUses.end(); ++UI) {
if (UI->SU == SU)
break;
}
if (UI == VRegUses.end())
VRegUses.insert(VReg2SUnit(Reg, 0, SU));
}
}
/// If RegPressure is non-null, compute register pressure as a side effect. The
/// DAG builder is an efficient place to do it because it already visits
/// operands.
void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
RegPressureTracker *RPTracker,
PressureDiffs *PDiffs) {
PressureDiffs *PDiffs,
bool TrackLaneMasks) {
const TargetSubtargetInfo &ST = MF.getSubtarget();
bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
: ST.useAA();
AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
this->TrackLaneMasks = TrackLaneMasks;
MISUnitMap.clear();
ScheduleDAG::clearDAG();
@ -777,10 +866,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
Defs.setUniverse(TRI->getNumRegs());
Uses.setUniverse(TRI->getNumRegs());
assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
unsigned NumVirtRegs = MRI.getNumVirtRegs();
CurrentVRegDefs.setUniverse(NumVirtRegs);
CurrentVRegUses.setUniverse(NumVirtRegs);
VRegUses.clear();
VRegDefs.setUniverse(MRI.getNumVirtRegs());
VRegUses.setUniverse(MRI.getNumVirtRegs());
VRegUses.setUniverse(NumVirtRegs);
// Model data dependencies between instructions being scheduled and the
// ExitSU.
@ -808,6 +901,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
assert(RPTracker->getPos() == std::prev(MII) &&
"RPTracker can't find MI");
collectVRegUses(SU);
}
assert(
@ -1057,7 +1151,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
Defs.clear();
Uses.clear();
VRegDefs.clear();
CurrentVRegDefs.clear();
CurrentVRegUses.clear();
PendingLoads.clear();
}

View File

@ -6,7 +6,7 @@
; FUNC-LABEL: {{^}}width_2d:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[2].Z
; EG: MOV * [[VAL]], KC0[2].Z
define void @width_2d (%opencl.image2d_t addrspace(1)* %in,
i32 addrspace(1)* %out) {
entry:
@ -19,7 +19,7 @@ entry:
; FUNC-LABEL: {{^}}width_3d:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[2].Z
; EG: MOV * [[VAL]], KC0[2].Z
define void @width_3d (%opencl.image3d_t addrspace(1)* %in,
i32 addrspace(1)* %out) {
entry:
@ -36,7 +36,7 @@ entry:
; FUNC-LABEL: {{^}}height_2d:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[2].W
; EG: MOV * [[VAL]], KC0[2].W
define void @height_2d (%opencl.image2d_t addrspace(1)* %in,
i32 addrspace(1)* %out) {
entry:
@ -49,7 +49,7 @@ entry:
; FUNC-LABEL: {{^}}height_3d:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[2].W
; EG: MOV * [[VAL]], KC0[2].W
define void @height_3d (%opencl.image3d_t addrspace(1)* %in,
i32 addrspace(1)* %out) {
entry:
@ -66,7 +66,7 @@ entry:
; FUNC-LABEL: {{^}}depth_3d:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[3].X
; EG: MOV * [[VAL]], KC0[3].X
define void @depth_3d (%opencl.image3d_t addrspace(1)* %in,
i32 addrspace(1)* %out) {
entry:
@ -83,7 +83,7 @@ entry:
; FUNC-LABEL: {{^}}data_type_2d:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[3].Y
; EG: MOV * [[VAL]], KC0[3].Y
define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in,
i32 addrspace(1)* %out) {
entry:
@ -96,7 +96,7 @@ entry:
; FUNC-LABEL: {{^}}data_type_3d:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[3].Y
; EG: MOV * [[VAL]], KC0[3].Y
define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in,
i32 addrspace(1)* %out) {
entry:
@ -113,7 +113,7 @@ entry:
; FUNC-LABEL: {{^}}channel_order_2d:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[3].Z
; EG: MOV * [[VAL]], KC0[3].Z
define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in,
i32 addrspace(1)* %out) {
entry:
@ -126,7 +126,7 @@ entry:
; FUNC-LABEL: {{^}}channel_order_3d:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[3].Z
; EG: MOV * [[VAL]], KC0[3].Z
define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in,
i32 addrspace(1)* %out) {
entry:
@ -145,7 +145,7 @@ entry:
;
; FUNC-LABEL: {{^}}image_arg_2nd:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[4].Z
; EG: MOV * [[VAL]], KC0[4].Z
define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1,
i32 %x,
%opencl.image2d_t addrspace(1)* %in2,

View File

@ -7,8 +7,8 @@
; ADD_INT literal.x KC0[2].Z, 5
; CHECK: {{^}}i32_literal:
; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: ADD_INT * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y
; CHECK-NEXT: 5
define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
entry:
@ -24,8 +24,8 @@ entry:
; ADD literal.x KC0[2].Z, 5.0
; CHECK: {{^}}float_literal:
; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: ADD * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y
; CHECK-NEXT: 1084227584(5.0
define void @float_literal(float addrspace(1)* %out, float %in) {
entry:

View File

@ -4,7 +4,7 @@
; FUNC-LABEL: {{^}}read_workdim:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[2].Z
; EG: MOV * [[VAL]], KC0[2].Z
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c

View File

@ -3,7 +3,7 @@
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
; R600: {{^}}amdgpu_trunc:
; R600: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; R600: TRUNC {{\*? *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI: {{^}}amdgpu_trunc:
; SI: v_trunc_f32

View File

@ -5,7 +5,7 @@
; FUNC-LABEL: {{^}}local_size_x:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[1].Z
; EG: MOV * [[VAL]], KC0[1].Z
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
@ -23,7 +23,7 @@ entry:
; FUNC-LABEL: {{^}}local_size_y:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[1].W
; EG: MOV * [[VAL]], KC0[1].W
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
@ -38,7 +38,7 @@ entry:
; FUNC-LABEL: {{^}}local_size_z:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[2].X
; EG: MOV * [[VAL]], KC0[2].X
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20

View File

@ -153,7 +153,7 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
}
; FUNC-LABEL: {{^}}or_i1:
; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
; EG: OR_INT * {{\** *}}T{{[0-9]+\.[XYZW], PS, PV\.[XYZW]}}
; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {

View File

@ -5,8 +5,8 @@
; SET*DX10 instructions.
; CHECK: {{^}}fcmp_une_select_fptosi:
; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@ -19,8 +19,8 @@ entry:
}
; CHECK: {{^}}fcmp_une_select_i32:
; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@ -31,8 +31,8 @@ entry:
}
; CHECK: {{^}}fcmp_oeq_select_fptosi:
; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@ -45,8 +45,8 @@ entry:
}
; CHECK: {{^}}fcmp_oeq_select_i32:
; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@ -57,8 +57,8 @@ entry:
}
; CHECK: {{^}}fcmp_ogt_select_fptosi:
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@ -71,8 +71,8 @@ entry:
}
; CHECK: {{^}}fcmp_ogt_select_i32:
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@ -83,8 +83,8 @@ entry:
}
; CHECK: {{^}}fcmp_oge_select_fptosi:
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@ -97,8 +97,8 @@ entry:
}
; CHECK: {{^}}fcmp_oge_select_i32:
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@ -109,8 +109,8 @@ entry:
}
; CHECK: {{^}}fcmp_ole_select_fptosi:
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@ -123,8 +123,8 @@ entry:
}
; CHECK: {{^}}fcmp_ole_select_i32:
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@ -135,8 +135,8 @@ entry:
}
; CHECK: {{^}}fcmp_olt_select_fptosi:
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@ -149,8 +149,8 @@ entry:
}
; CHECK: {{^}}fcmp_olt_select_i32:
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
entry:

View File

@ -12,8 +12,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI: buffer_store_dword [[EXTRACT]],
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
; EG-NEXT: LSHR * [[ADDR]]
; EG: LSHR * [[ADDR]]
; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1
define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
%shl = shl i32 %in, 31
%sext = ashr i32 %shl, 31

View File

@ -53,14 +53,14 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
ret void
}
;EG: {{^}}shl_i64:
;EG-LABEL: {{^}}shl_i64:
;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
;EG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
;EG-DAG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
;EG-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]]
;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}}
;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]|PV.[XYZW]}}
;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
@ -80,7 +80,7 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
ret void
}
;EG: {{^}}shl_v2i64:
;EG-LABEL: {{^}}shl_v2i64:
;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
;EG-DAG: LSHR {{\*? *}}[[COMPSHA]]

View File

@ -70,11 +70,11 @@ entry:
;EG-LABEL: {{^}}ashr_i64_2:
;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
;EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
;EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
;EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
;EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|PV.[XYZW]|[[SHIFT]]}}
;EG-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
;EG-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal

View File

@ -65,14 +65,14 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
; EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
; EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
; EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
; EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]|PV\.[XYZW]}}
; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]|PS}}
; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], [[SHIFT]]
; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1

View File

@ -3,8 +3,8 @@
; These tests are for condition codes that are not supported by the hardware
; CHECK-LABEL: {{^}}slt:
; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
; CHECK-NEXT: 5(7.006492e-45)
define void @slt(i32 addrspace(1)* %out, i32 %in) {
entry:
@ -15,8 +15,8 @@ entry:
}
; CHECK-LABEL: {{^}}ult_i32:
; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
; CHECK-NEXT: 5(7.006492e-45)
define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
@ -40,8 +40,8 @@ entry:
}
; CHECK-LABEL: {{^}}ult_float_native:
; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
; CHECK-NEXT: LSHR *
; CHECK: LSHR
; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}}
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @ult_float_native(float addrspace(1)* %out, float %in) {
entry:
@ -52,8 +52,8 @@ entry:
}
; CHECK-LABEL: {{^}}olt:
; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR *
; CHECK: LSHR
; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @olt(float addrspace(1)* %out, float %in) {
entry:
@ -64,8 +64,8 @@ entry:
}
; CHECK-LABEL: {{^}}sle:
; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
; CHECK-NEXT: 6(8.407791e-45)
define void @sle(i32 addrspace(1)* %out, i32 %in) {
entry:
@ -76,8 +76,8 @@ entry:
}
; CHECK-LABEL: {{^}}ule_i32:
; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK: LSHR
; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
; CHECK-NEXT: 6(8.407791e-45)
define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
@ -101,8 +101,8 @@ entry:
}
; CHECK-LABEL: {{^}}ule_float_native:
; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
; CHECK-NEXT: LSHR *
; CHECK: LSHR
; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}}
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @ule_float_native(float addrspace(1)* %out, float %in) {
entry:
@ -113,8 +113,8 @@ entry:
}
; CHECK-LABEL: {{^}}ole:
; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR *
; CHECK: LSHR
; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
; CHECK-NEXT:1084227584(5.000000e+00)
define void @ole(float addrspace(1)* %out, float %in) {
entry:

View File

@ -7,7 +7,7 @@
; FUNC-LABEL: {{^}}ngroups_x:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[0].X
; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
; HSA: .amd_kernel_code_t
@ -38,7 +38,7 @@ entry:
; FUNC-LABEL: {{^}}ngroups_y:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[0].Y
; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
@ -53,7 +53,7 @@ entry:
; FUNC-LABEL: {{^}}ngroups_z:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[0].Z
; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
@ -68,7 +68,7 @@ entry:
; FUNC-LABEL: {{^}}global_size_x:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[0].W
; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
@ -83,7 +83,7 @@ entry:
; FUNC-LABEL: {{^}}global_size_y:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[1].X
; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
@ -98,7 +98,7 @@ entry:
; FUNC-LABEL: {{^}}global_size_z:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[1].Y
; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14

View File

@ -38,7 +38,7 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
}
; FUNC-LABEL: {{^}}xor_i1:
; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
; EG: XOR_INT {{\** *}}{{T[0-9]+\.[XYZW]}}, {{PS|PV\.[XYZW]}}, {{PS|PV\.[XYZW]}}
; SI-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 0, {{v[0-9]+}}
; SI-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 1.0, {{v[0-9]+}}