Revert "Reimplement (part of) the or -> add optimization. Matching 'or' into

'add'", which seems to have broken just about everything.

llvm-svn: 116033
This commit is contained in:
Daniel Dunbar 2010-10-08 02:07:32 +00:00
parent 59848f6703
commit d3b6b8bf2b
4 changed files with 65 additions and 126 deletions

View File

@ -997,63 +997,6 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
def : Pat<(i64 (zext def32:$src)), def : Pat<(i64 (zext def32:$src)),
(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
//===----------------------------------------------------------------------===//
// Pattern match OR as ADD
//===----------------------------------------------------------------------===//
// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
// 3-addressified into an LEA instruction to avoid copies. However, we also
// want to finally emit these instructions as an or at the end of the code
// generator to make the generated code easier to read. To do this, we select
// into "disjoint bits" pseudo ops.
// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt KnownZero0, KnownOne0;
CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
APInt KnownZero1, KnownOne1;
CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
return (~KnownZero0 & ~KnownZero1) == 0;
}]>;
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
let AddedComplexity = 5 in { // Try this before the selecting to OR
let isCommutable = 1, isConvertibleToThreeAddress = 1,
Constraints = "$src1 = $dst" in {
def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"", // orw/addw REG, REG
[(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
"", // orl/addl REG, REG
[(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
"", // orq/addq REG, REG
[(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
}
def : Pat<(or_is_add GR16:$src1, imm:$src2),
(ADD16ri GR16:$src1, imm:$src2)>;
def : Pat<(or_is_add GR32:$src1, imm:$src2),
(ADD32ri GR32:$src1, imm:$src2)>;
def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2),
(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2),
(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2),
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
} // AddedComplexity
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Some peepholes // Some peepholes
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -1366,8 +1309,27 @@ def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))), def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C32r)>; (SETB_C32r)>;
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
let AddedComplexity = 5 in { // Try this before the selecting to OR
def : Pat<(or_is_add GR16:$src1, imm:$src2),
(ADD16ri GR16:$src1, imm:$src2)>;
def : Pat<(or_is_add GR32:$src1, imm:$src2),
(ADD32ri GR32:$src1, imm:$src2)>;
def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2),
(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2),
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
def : Pat<(or_is_add GR16:$src1, GR16:$src2),
(ADD16rr GR16:$src1, GR16:$src2)>;
def : Pat<(or_is_add GR32:$src1, GR32:$src2),
(ADD32rr GR32:$src1, GR32:$src2)>;
def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2),
(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
def : Pat<(or_is_add GR64:$src1, GR64:$src2),
(ADD64rr GR64:$src1, GR64:$src2)>;
} // AddedComplexity
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// EFLAGS-defining Patterns // EFLAGS-defining Patterns

View File

@ -54,11 +54,6 @@ ReMatPICStubLoad("remat-pic-stub-load",
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
TM(tm), RI(tm, *this) { TM(tm), RI(tm, *this) {
enum {
TB_NOT_REVERSABLE = 1U << 31,
TB_FLAGS = TB_NOT_REVERSABLE
};
static const unsigned OpTbl2Addr[][2] = { static const unsigned OpTbl2Addr[][2] = {
{ X86::ADC32ri, X86::ADC32mi }, { X86::ADC32ri, X86::ADC32mi },
{ X86::ADC32ri8, X86::ADC32mi8 }, { X86::ADC32ri8, X86::ADC32mi8 },
@ -69,15 +64,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::ADD16ri, X86::ADD16mi }, { X86::ADD16ri, X86::ADD16mi },
{ X86::ADD16ri8, X86::ADD16mi8 }, { X86::ADD16ri8, X86::ADD16mi8 },
{ X86::ADD16rr, X86::ADD16mr }, { X86::ADD16rr, X86::ADD16mr },
{ X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE },
{ X86::ADD32ri, X86::ADD32mi }, { X86::ADD32ri, X86::ADD32mi },
{ X86::ADD32ri8, X86::ADD32mi8 }, { X86::ADD32ri8, X86::ADD32mi8 },
{ X86::ADD32rr, X86::ADD32mr }, { X86::ADD32rr, X86::ADD32mr },
{ X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE },
{ X86::ADD64ri32, X86::ADD64mi32 }, { X86::ADD64ri32, X86::ADD64mi32 },
{ X86::ADD64ri8, X86::ADD64mi8 }, { X86::ADD64ri8, X86::ADD64mi8 },
{ X86::ADD64rr, X86::ADD64mr }, { X86::ADD64rr, X86::ADD64mr },
{ X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE },
{ X86::ADD8ri, X86::ADD8mi }, { X86::ADD8ri, X86::ADD8mi },
{ X86::ADD8rr, X86::ADD8mr }, { X86::ADD8rr, X86::ADD8mr },
{ X86::AND16ri, X86::AND16mi }, { X86::AND16ri, X86::AND16mi },
@ -222,21 +214,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
unsigned RegOp = OpTbl2Addr[i][0]; unsigned RegOp = OpTbl2Addr[i][0];
unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS; unsigned MemOp = OpTbl2Addr[i][1];
assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?"); if (!RegOp2MemOpTable2Addr.insert(std::make_pair(RegOp,
RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U); std::make_pair(MemOp,0))).second)
assert(false && "Duplicated entries?");
// If this is not a reversable operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
continue;
// Index 0, folded load and store, no alignment requirement. // Index 0, folded load and store, no alignment requirement.
unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
if (!MemOp2RegOpTable.insert(std::make_pair(MemOp,
assert(!MemOp2RegOpTable.count(MemOp) && std::make_pair(RegOp,
"Duplicated entries in unfolding maps?"); AuxInfo))).second)
MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); assert(false && "Duplicated entries in unfolding maps?");
} }
// If the third value is 1, then it's folding either a load or a store. // If the third value is 1, then it's folding either a load or a store.
@ -466,11 +453,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::ADC32rr, X86::ADC32rm, 0 }, { X86::ADC32rr, X86::ADC32rm, 0 },
{ X86::ADC64rr, X86::ADC64rm, 0 }, { X86::ADC64rr, X86::ADC64rm, 0 },
{ X86::ADD16rr, X86::ADD16rm, 0 }, { X86::ADD16rr, X86::ADD16rm, 0 },
{ X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 },
{ X86::ADD32rr, X86::ADD32rm, 0 }, { X86::ADD32rr, X86::ADD32rm, 0 },
{ X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 },
{ X86::ADD64rr, X86::ADD64rm, 0 }, { X86::ADD64rr, X86::ADD64rm, 0 },
{ X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 },
{ X86::ADD8rr, X86::ADD8rm, 0 }, { X86::ADD8rr, X86::ADD8rm, 0 },
{ X86::ADDPDrr, X86::ADDPDrm, 16 }, { X86::ADDPDrr, X86::ADDPDrm, 16 },
{ X86::ADDPSrr, X86::ADDPSrm, 16 }, { X86::ADDPSrr, X86::ADDPSrm, 16 },
@ -665,23 +649,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
unsigned RegOp = OpTbl2[i][0]; unsigned RegOp = OpTbl2[i][0];
unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS; unsigned MemOp = OpTbl2[i][1];
unsigned Align = OpTbl2[i][2]; unsigned Align = OpTbl2[i][2];
if (!RegOp2MemOpTable2.insert(std::make_pair(RegOp,
assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!"); std::make_pair(MemOp,Align))).second)
RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align); assert(false && "Duplicated entries?");
// If this is not a reversable operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
continue;
// Index 2, folded load // Index 2, folded load
unsigned AuxInfo = 2 | (1 << 4); unsigned AuxInfo = 2 | (1 << 4);
assert(!MemOp2RegOpTable.count(MemOp) && if (!MemOp2RegOpTable.insert(std::make_pair(MemOp,
"Duplicated entries in unfolding maps?"); std::make_pair(RegOp, AuxInfo))).second)
MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); assert(false && "Duplicated entries in unfolding maps?");
} }
} }
@ -1156,8 +1133,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
case X86::ADD16ri8: case X86::ADD16ri8:
addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
break; break;
case X86::ADD16rr: case X86::ADD16rr: {
case X86::ADD16rr_DB: {
unsigned Src2 = MI->getOperand(2).getReg(); unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill(); bool isKill2 = MI->getOperand(2).isKill();
unsigned leaInReg2 = 0; unsigned leaInReg2 = 0;
@ -1370,27 +1346,18 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
Src, isKill, -1); Src, isKill, -1);
break; break;
case X86::ADD64rr: case X86::ADD64rr:
case X86::ADD64rr_DB: case X86::ADD32rr: {
case X86::ADD32rr:
case X86::ADD32rr_DB: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc; unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r
TargetRegisterClass *RC; : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
Opc = X86::LEA64r;
RC = X86::GR64_NOSPRegisterClass;
} else {
Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
RC = X86::GR32_NOSPRegisterClass;
}
unsigned Src2 = MI->getOperand(2).getReg(); unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill(); bool isKill2 = MI->getOperand(2).isKill();
// LEA can't handle RSP. // LEA can't handle RSP.
if (TargetRegisterInfo::isVirtualRegister(Src2) && if (TargetRegisterInfo::isVirtualRegister(Src2) &&
!MF.getRegInfo().constrainRegClass(Src2, RC)) !MF.getRegInfo().constrainRegClass(Src2,
MIOpc == X86::ADD64rr ? X86::GR64_NOSPRegisterClass :
X86::GR32_NOSPRegisterClass))
return 0; return 0;
NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
@ -1401,8 +1368,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LV->replaceKillInstruction(Src2, MI, NewMI); LV->replaceKillInstruction(Src2, MI, NewMI);
break; break;
} }
case X86::ADD16rr: case X86::ADD16rr: {
case X86::ADD16rr_DB: {
if (DisableLEA16) if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
@ -2630,8 +2596,13 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
OpcodeTablePtr = &RegOp2MemOpTable2; OpcodeTablePtr = &RegOp2MemOpTable2;
} }
if (OpcodeTablePtr && OpcodeTablePtr->count(Opc)) if (OpcodeTablePtr) {
return true; // Find the Opcode to fuse
DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
OpcodeTablePtr->find(Opc);
if (I != OpcodeTablePtr->end())
return true;
}
return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops); return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
} }

View File

@ -544,6 +544,20 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
return N->hasOneUse(); return N->hasOneUse();
}]>; }]>;
// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt KnownZero0, KnownOne0;
CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
APInt KnownZero1, KnownOne1;
CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
return (~KnownZero0 & ~KnownZero1) == 0;
}]>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Instruction list. // Instruction list.
// //

View File

@ -347,7 +347,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
} }
// Handle a few special cases to eliminate operand modifiers. // Handle a few special cases to eliminate operand modifiers.
ReSimplify:
switch (OutMI.getOpcode()) { switch (OutMI.getOpcode()) {
case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand. case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
lower_lea64_32mem(&OutMI, 1); lower_lea64_32mem(&OutMI, 1);
@ -434,13 +433,6 @@ ReSimplify:
break; break;
} }
// These are pseudo-ops for OR to help with the OR->ADD transformation. We do
// this with an ugly goto in case the resultant OR uses EAX and needs the
// short form.
case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
// The assembler backend wants to see branches in their small form and relax // The assembler backend wants to see branches in their small form and relax
// them to their large form. The JIT can only handle the large form because // them to their large form. The JIT can only handle the large form because
// it does not do relaxation. For now, translate the large form to the // it does not do relaxation. For now, translate the large form to the