AMDGPU: Handle i64->v2i32 loads/stores in PreprocessISelDAG

This fixes a select error when the i64 source was also
bitcasted to v2i32 in the original source.

Instead of awkwardly trying to select the modified source value and
the store, replace before isel begins.

Uses a worklist to avoid possible problems from mutating the DAG,
although it seems to work OK without it.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248589 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2015-09-25 17:27:08 +00:00
parent 7a6a7f2409
commit 9225f01169
2 changed files with 80 additions and 52 deletions

View File

@ -47,6 +47,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
SDNode *Select(SDNode *N) override;
const char *getPassName() const override;
void PreprocessISelDAG() override;
void PostprocessISelDAG() override;
private:
@ -466,62 +467,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
N->getValueType(0), Ops);
}
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDLoc SL(N);
EVT VT = N->getValueType(0);
if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) {
N = glueCopyToM0(N);
break;
}
// To simplify the TableGen patters, we replace all i64 loads with
// v2i32 loads. Alternatively, we could promote i64 loads to v2i32
// during DAG legalization, however, so places (ExpandUnalignedLoad)
// in the DAG legalizer assume that if i64 is legal, so doing this
// promotion early can cause problems.
SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
LD->getBasePtr(), LD->getMemOperand());
SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
MVT::i64, NewLoad);
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
SDNode *Load = glueCopyToM0(NewLoad.getNode());
SelectCode(Load);
N = BitCast.getNode();
break;
}
case ISD::LOAD:
case ISD::STORE: {
// Handle i64 stores here for the same reason mentioned above for loads.
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Value = ST->getValue();
if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) {
SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
MVT::v2i32, Value);
SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
ST->getBasePtr(), ST->getMemOperand());
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
if (NewValue.getOpcode() == ISD::BITCAST) {
Select(NewStore.getNode());
return SelectCode(NewValue.getNode());
}
// getNode() may fold the bitcast if its input was another bitcast. If
// that happens we should only select the new store.
N = NewStore.getNode();
}
N = glueCopyToM0(N);
break;
}
case AMDGPUISD::REGISTER_LOAD: {
if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
break;
@ -1545,6 +1495,65 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}
void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
bool Modified = false;
// XXX - Other targets seem to be able to do this without a worklist.
SmallVector<LoadSDNode *, 8> LoadsToReplace;
SmallVector<StoreSDNode *, 8> StoresToReplace;
for (SDNode &Node : CurDAG->allnodes()) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) {
EVT VT = LD->getValueType(0);
if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
continue;
// To simplify the TableGen patters, we replace all i64 loads with v2i32
// loads. Alternatively, we could promote i64 loads to v2i32 during DAG
// legalization, however, so places (ExpandUnalignedLoad) in the DAG
// legalizer assume that if i64 is legal, so doing this promotion early
// can cause problems.
LoadsToReplace.push_back(LD);
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) {
// Handle i64 stores here for the same reason mentioned above for loads.
SDValue Value = ST->getValue();
if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
continue;
StoresToReplace.push_back(ST);
}
}
for (LoadSDNode *LD : LoadsToReplace) {
SDLoc SL(LD);
SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(),
LD->getBasePtr(), LD->getMemOperand());
SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
MVT::i64, NewLoad);
CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast);
Modified = true;
}
for (StoreSDNode *ST : StoresToReplace) {
SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST),
MVT::v2i32, ST->getValue());
const SDValue StoreOps[] = {
ST->getChain(),
NewValue,
ST->getBasePtr(),
ST->getOffset()
};
CurDAG->UpdateNodeOperands(ST, StoreOps);
Modified = true;
}
// XXX - Is this necessary?
if (Modified)
CurDAG->RemoveDeadNodes();
}
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());

View File

@ -0,0 +1,19 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; How the replacement of i64 stores with v2i32 stores resulted in
; breaking other users of the bitcast if they already existed
; GCN-LABEL: {{^}}extract_vector_elt_select_error:
; GCN: buffer_store_dword
; GCN: buffer_store_dword
; GCN: buffer_store_dwordx2
define void @extract_vector_elt_select_error(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %val) nounwind {
%vec = bitcast i64 %val to <2 x i32>
%elt0 = extractelement <2 x i32> %vec, i32 0
%elt1 = extractelement <2 x i32> %vec, i32 1
store volatile i32 %elt0, i32 addrspace(1)* %out
store volatile i32 %elt1, i32 addrspace(1)* %out
store volatile i64 %val, i64 addrspace(1)* %in
ret void
}