mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-26 06:14:42 +00:00
AMDGPU: Handle i64->v2i32 loads/stores in PreprocessISelDAG
This fixes a select error when the i64 source was also bitcasted to v2i32 in the original source. Instead of awkwardly trying to select the modified source value and the store, replace before isel begins. Uses a worklist to avoid possible problems from mutating the DAG, although it seems to work OK without it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248589 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7a6a7f2409
commit
9225f01169
@ -47,6 +47,7 @@ public:
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
SDNode *Select(SDNode *N) override;
|
||||
const char *getPassName() const override;
|
||||
void PreprocessISelDAG() override;
|
||||
void PostprocessISelDAG() override;
|
||||
|
||||
private:
|
||||
@ -466,62 +467,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
|
||||
N->getValueType(0), Ops);
|
||||
}
|
||||
|
||||
case ISD::LOAD: {
|
||||
LoadSDNode *LD = cast<LoadSDNode>(N);
|
||||
SDLoc SL(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) {
|
||||
N = glueCopyToM0(N);
|
||||
break;
|
||||
}
|
||||
|
||||
// To simplify the TableGen patters, we replace all i64 loads with
|
||||
// v2i32 loads. Alternatively, we could promote i64 loads to v2i32
|
||||
// during DAG legalization, however, so places (ExpandUnalignedLoad)
|
||||
// in the DAG legalizer assume that if i64 is legal, so doing this
|
||||
// promotion early can cause problems.
|
||||
|
||||
SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
|
||||
LD->getBasePtr(), LD->getMemOperand());
|
||||
SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
|
||||
MVT::i64, NewLoad);
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
|
||||
SDNode *Load = glueCopyToM0(NewLoad.getNode());
|
||||
SelectCode(Load);
|
||||
N = BitCast.getNode();
|
||||
break;
|
||||
}
|
||||
|
||||
case ISD::LOAD:
|
||||
case ISD::STORE: {
|
||||
// Handle i64 stores here for the same reason mentioned above for loads.
|
||||
StoreSDNode *ST = cast<StoreSDNode>(N);
|
||||
SDValue Value = ST->getValue();
|
||||
if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) {
|
||||
|
||||
SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
|
||||
MVT::v2i32, Value);
|
||||
SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
|
||||
ST->getBasePtr(), ST->getMemOperand());
|
||||
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
|
||||
|
||||
if (NewValue.getOpcode() == ISD::BITCAST) {
|
||||
Select(NewStore.getNode());
|
||||
return SelectCode(NewValue.getNode());
|
||||
}
|
||||
|
||||
// getNode() may fold the bitcast if its input was another bitcast. If
|
||||
// that happens we should only select the new store.
|
||||
N = NewStore.getNode();
|
||||
}
|
||||
|
||||
N = glueCopyToM0(N);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPUISD::REGISTER_LOAD: {
|
||||
if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
|
||||
break;
|
||||
@ -1545,6 +1495,65 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
|
||||
return SelectVOP3Mods(In, Src, SrcMods);
|
||||
}
|
||||
|
||||
void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
|
||||
bool Modified = false;
|
||||
|
||||
// XXX - Other targets seem to be able to do this without a worklist.
|
||||
SmallVector<LoadSDNode *, 8> LoadsToReplace;
|
||||
SmallVector<StoreSDNode *, 8> StoresToReplace;
|
||||
|
||||
for (SDNode &Node : CurDAG->allnodes()) {
|
||||
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) {
|
||||
EVT VT = LD->getValueType(0);
|
||||
if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
|
||||
continue;
|
||||
|
||||
// To simplify the TableGen patters, we replace all i64 loads with v2i32
|
||||
// loads. Alternatively, we could promote i64 loads to v2i32 during DAG
|
||||
// legalization, however, so places (ExpandUnalignedLoad) in the DAG
|
||||
// legalizer assume that if i64 is legal, so doing this promotion early
|
||||
// can cause problems.
|
||||
LoadsToReplace.push_back(LD);
|
||||
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) {
|
||||
// Handle i64 stores here for the same reason mentioned above for loads.
|
||||
SDValue Value = ST->getValue();
|
||||
if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
|
||||
continue;
|
||||
StoresToReplace.push_back(ST);
|
||||
}
|
||||
}
|
||||
|
||||
for (LoadSDNode *LD : LoadsToReplace) {
|
||||
SDLoc SL(LD);
|
||||
|
||||
SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(),
|
||||
LD->getBasePtr(), LD->getMemOperand());
|
||||
SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
|
||||
MVT::i64, NewLoad);
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast);
|
||||
Modified = true;
|
||||
}
|
||||
|
||||
for (StoreSDNode *ST : StoresToReplace) {
|
||||
SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST),
|
||||
MVT::v2i32, ST->getValue());
|
||||
const SDValue StoreOps[] = {
|
||||
ST->getChain(),
|
||||
NewValue,
|
||||
ST->getBasePtr(),
|
||||
ST->getOffset()
|
||||
};
|
||||
|
||||
CurDAG->UpdateNodeOperands(ST, StoreOps);
|
||||
Modified = true;
|
||||
}
|
||||
|
||||
// XXX - Is this necessary?
|
||||
if (Modified)
|
||||
CurDAG->RemoveDeadNodes();
|
||||
}
|
||||
|
||||
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
|
||||
const AMDGPUTargetLowering& Lowering =
|
||||
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
|
||||
|
19
test/CodeGen/AMDGPU/extract-vector-elt-i64.ll
Normal file
19
test/CodeGen/AMDGPU/extract-vector-elt-i64.ll
Normal file
@ -0,0 +1,19 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; How the replacement of i64 stores with v2i32 stores resulted in
|
||||
; breaking other users of the bitcast if they already existed
|
||||
|
||||
; GCN-LABEL: {{^}}extract_vector_elt_select_error:
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: buffer_store_dwordx2
|
||||
define void @extract_vector_elt_select_error(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %val) nounwind {
|
||||
%vec = bitcast i64 %val to <2 x i32>
|
||||
%elt0 = extractelement <2 x i32> %vec, i32 0
|
||||
%elt1 = extractelement <2 x i32> %vec, i32 1
|
||||
|
||||
store volatile i32 %elt0, i32 addrspace(1)* %out
|
||||
store volatile i32 %elt1, i32 addrspace(1)* %out
|
||||
store volatile i64 %val, i64 addrspace(1)* %in
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user