When creating X86 MUL8 and DIV8 instructions, make sure we don't produce

CopyFromReg nodes for aliasing registers (AX and AL). This confuses the fast
register allocator.

Instead of CopyFromReg(AL), use ExtractSubReg(CopyFromReg(AX), sub_8bit).

This fixes PR7312.

llvm-svn: 106934
This commit is contained in:
Jakob Stoklund Olesen 2010-06-26 00:39:23 +00:00
parent a3b10d15f2
commit 6dee31aa07
2 changed files with 80 additions and 37 deletions

View File

@ -1646,6 +1646,26 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
}
// Prevent use of AH in a REX instruction by referencing AX instead.
if (HiReg == X86::AH && Subtarget->is64Bit() &&
!SDValue(Node, 1).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
X86::AX, MVT::i16, InFlag);
InFlag = Result.getValue(2);
// Get the low part if needed. Don't use getCopyFromReg for aliasing
// registers.
if (!SDValue(Node, 0).use_empty())
ReplaceUses(SDValue(Node, 1),
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
// Shift AX down 8 bits.
Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
Result,
CurDAG->getTargetConstant(8, MVT::i8)), 0);
// Then truncate it down to i8.
ReplaceUses(SDValue(Node, 1),
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
}
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@ -1656,24 +1676,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
SDValue Result;
if (HiReg == X86::AH && Subtarget->is64Bit()) {
// Prevent use of AH in a REX instruction by referencing AX instead.
// Shift it down 8 bits.
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
X86::AX, MVT::i16, InFlag);
InFlag = Result.getValue(2);
Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
Result,
CurDAG->getTargetConstant(8, MVT::i8)), 0);
// Then truncate it down to i8.
Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
MVT::i8, Result);
} else {
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
HiReg, NVT, InFlag);
InFlag = Result.getValue(2);
}
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
HiReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
@ -1786,6 +1791,29 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
}
// Prevent use of AH in a REX instruction by referencing AX instead.
// Shift it down 8 bits.
if (HiReg == X86::AH && Subtarget->is64Bit() &&
!SDValue(Node, 1).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
X86::AX, MVT::i16, InFlag);
InFlag = Result.getValue(2);
// If we also need AL (the quotient), get it by extracting a subreg from
// Result. The fast register allocator does not like multiple CopyFromReg
// nodes using aliasing registers.
if (!SDValue(Node, 0).use_empty())
ReplaceUses(SDValue(Node, 0),
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
// Shift AX right by 8 bits instead of using AH.
Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
Result,
CurDAG->getTargetConstant(8, MVT::i8)),
0);
ReplaceUses(SDValue(Node, 1),
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
}
// Copy the division (low) result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@ -1796,25 +1824,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
// Copy the remainder (high) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
SDValue Result;
if (HiReg == X86::AH && Subtarget->is64Bit()) {
// Prevent use of AH in a REX instruction by referencing AX instead.
// Shift it down 8 bits.
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
X86::AX, MVT::i16, InFlag);
InFlag = Result.getValue(2);
Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
Result,
CurDAG->getTargetConstant(8, MVT::i8)),
0);
// Then truncate it down to i8.
Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
MVT::i8, Result);
} else {
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
HiReg, NVT, InFlag);
InFlag = Result.getValue(2);
}
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
HiReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}

View File

@ -0,0 +1,31 @@
; RUN: llc -O0 -relocation-model=pic -disable-fp-elim < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10"
; This file contains functions that may crash llc -O0
; The DIV8 instruction produces results in AH and AL, but we don't want to use
; AH in 64-bit mode. The hack used must not generate copyFromReg nodes for
; aliased registers (AX and AL) - RegAllocFast does not like that.
; PR7312
define i32 @div8() nounwind {
entry:
%0 = trunc i64 undef to i8 ; <i8> [#uses=3]
%1 = udiv i8 0, %0 ; <i8> [#uses=1]
%2 = urem i8 0, %0 ; <i8> [#uses=1]
%3 = icmp uge i8 %2, %0 ; <i1> [#uses=1]
br i1 %3, label %"40", label %"39"
"39": ; preds = %"36"
%4 = zext i8 %1 to i32 ; <i32> [#uses=1]
%5 = mul nsw i32 %4, undef ; <i32> [#uses=1]
%6 = add nsw i32 %5, undef ; <i32> [#uses=1]
%7 = icmp ne i32 %6, undef ; <i1> [#uses=1]
br i1 %7, label %"40", label %"41"
"40": ; preds = %"39", %"36"
unreachable
"41": ; preds = %"39"
unreachable
}