mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-07 19:47:53 +00:00
When creating X86 MUL8 and DIV8 instructions, make sure we don't produce
CopyFromReg nodes for aliasing registers (AX and AL). This confuses the fast register allocator. Instead of CopyFromReg(AL), use ExtractSubReg(CopyFromReg(AX), sub_8bit). This fixes PR7312. llvm-svn: 106934
This commit is contained in:
parent
a3b10d15f2
commit
6dee31aa07
@ -1646,6 +1646,26 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
||||
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
|
||||
}
|
||||
|
||||
// Prevent use of AH in a REX instruction by referencing AX instead.
|
||||
if (HiReg == X86::AH && Subtarget->is64Bit() &&
|
||||
!SDValue(Node, 1).use_empty()) {
|
||||
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
|
||||
X86::AX, MVT::i16, InFlag);
|
||||
InFlag = Result.getValue(2);
|
||||
// Get the low part if needed. Don't use getCopyFromReg for aliasing
|
||||
// registers.
|
||||
if (!SDValue(Node, 0).use_empty())
|
||||
ReplaceUses(SDValue(Node, 1),
|
||||
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
|
||||
|
||||
// Shift AX down 8 bits.
|
||||
Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
|
||||
Result,
|
||||
CurDAG->getTargetConstant(8, MVT::i8)), 0);
|
||||
// Then truncate it down to i8.
|
||||
ReplaceUses(SDValue(Node, 1),
|
||||
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
|
||||
}
|
||||
// Copy the low half of the result, if it is needed.
|
||||
if (!SDValue(Node, 0).use_empty()) {
|
||||
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
|
||||
@ -1656,24 +1676,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
||||
}
|
||||
// Copy the high half of the result, if it is needed.
|
||||
if (!SDValue(Node, 1).use_empty()) {
|
||||
SDValue Result;
|
||||
if (HiReg == X86::AH && Subtarget->is64Bit()) {
|
||||
// Prevent use of AH in a REX instruction by referencing AX instead.
|
||||
// Shift it down 8 bits.
|
||||
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
|
||||
X86::AX, MVT::i16, InFlag);
|
||||
InFlag = Result.getValue(2);
|
||||
Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
|
||||
Result,
|
||||
CurDAG->getTargetConstant(8, MVT::i8)), 0);
|
||||
// Then truncate it down to i8.
|
||||
Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
|
||||
MVT::i8, Result);
|
||||
} else {
|
||||
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
|
||||
HiReg, NVT, InFlag);
|
||||
InFlag = Result.getValue(2);
|
||||
}
|
||||
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
|
||||
HiReg, NVT, InFlag);
|
||||
InFlag = Result.getValue(2);
|
||||
ReplaceUses(SDValue(Node, 1), Result);
|
||||
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
|
||||
}
|
||||
@ -1786,6 +1791,29 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
||||
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
|
||||
}
|
||||
|
||||
// Prevent use of AH in a REX instruction by referencing AX instead.
|
||||
// Shift it down 8 bits.
|
||||
if (HiReg == X86::AH && Subtarget->is64Bit() &&
|
||||
!SDValue(Node, 1).use_empty()) {
|
||||
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
|
||||
X86::AX, MVT::i16, InFlag);
|
||||
InFlag = Result.getValue(2);
|
||||
|
||||
// If we also need AL (the quotient), get it by extracting a subreg from
|
||||
// Result. The fast register allocator does not like multiple CopyFromReg
|
||||
// nodes using aliasing registers.
|
||||
if (!SDValue(Node, 0).use_empty())
|
||||
ReplaceUses(SDValue(Node, 0),
|
||||
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
|
||||
|
||||
// Shift AX right by 8 bits instead of using AH.
|
||||
Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
|
||||
Result,
|
||||
CurDAG->getTargetConstant(8, MVT::i8)),
|
||||
0);
|
||||
ReplaceUses(SDValue(Node, 1),
|
||||
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
|
||||
}
|
||||
// Copy the division (low) result, if it is needed.
|
||||
if (!SDValue(Node, 0).use_empty()) {
|
||||
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
|
||||
@ -1796,25 +1824,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
||||
}
|
||||
// Copy the remainder (high) result, if it is needed.
|
||||
if (!SDValue(Node, 1).use_empty()) {
|
||||
SDValue Result;
|
||||
if (HiReg == X86::AH && Subtarget->is64Bit()) {
|
||||
// Prevent use of AH in a REX instruction by referencing AX instead.
|
||||
// Shift it down 8 bits.
|
||||
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
|
||||
X86::AX, MVT::i16, InFlag);
|
||||
InFlag = Result.getValue(2);
|
||||
Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
|
||||
Result,
|
||||
CurDAG->getTargetConstant(8, MVT::i8)),
|
||||
0);
|
||||
// Then truncate it down to i8.
|
||||
Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
|
||||
MVT::i8, Result);
|
||||
} else {
|
||||
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
|
||||
HiReg, NVT, InFlag);
|
||||
InFlag = Result.getValue(2);
|
||||
}
|
||||
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
|
||||
HiReg, NVT, InFlag);
|
||||
InFlag = Result.getValue(2);
|
||||
ReplaceUses(SDValue(Node, 1), Result);
|
||||
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
|
||||
}
|
||||
|
31
test/CodeGen/X86/crash-O0.ll
Normal file
31
test/CodeGen/X86/crash-O0.ll
Normal file
@ -0,0 +1,31 @@
|
||||
; RUN: llc -O0 -relocation-model=pic -disable-fp-elim < %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-darwin10"
|
||||
|
||||
; This file contains functions that may crash llc -O0
|
||||
|
||||
; The DIV8 instruction produces results in AH and AL, but we don't want to use
|
||||
; AH in 64-bit mode. The hack used must not generate copyFromReg nodes for
|
||||
; aliased registers (AX and AL) - RegAllocFast does not like that.
|
||||
; PR7312
|
||||
define i32 @div8() nounwind {
|
||||
entry:
|
||||
%0 = trunc i64 undef to i8 ; <i8> [#uses=3]
|
||||
%1 = udiv i8 0, %0 ; <i8> [#uses=1]
|
||||
%2 = urem i8 0, %0 ; <i8> [#uses=1]
|
||||
%3 = icmp uge i8 %2, %0 ; <i1> [#uses=1]
|
||||
br i1 %3, label %"40", label %"39"
|
||||
|
||||
"39": ; preds = %"36"
|
||||
%4 = zext i8 %1 to i32 ; <i32> [#uses=1]
|
||||
%5 = mul nsw i32 %4, undef ; <i32> [#uses=1]
|
||||
%6 = add nsw i32 %5, undef ; <i32> [#uses=1]
|
||||
%7 = icmp ne i32 %6, undef ; <i1> [#uses=1]
|
||||
br i1 %7, label %"40", label %"41"
|
||||
|
||||
"40": ; preds = %"39", %"36"
|
||||
unreachable
|
||||
|
||||
"41": ; preds = %"39"
|
||||
unreachable
|
||||
}
|
Loading…
Reference in New Issue
Block a user