For something like

uint32_t hi(uint64_t res)
{
        uint_32t hi = res >> 32;
        return !hi;
}

llvm IR looks like this:
define i32 @hi(i64 %res) nounwind uwtable ssp {
entry:
  %lnot = icmp ult i64 %res, 4294967296
  %lnot.ext = zext i1 %lnot to i32
  ret i32 %lnot.ext
}

The optimizer has optimize away the right shift and truncate but the resulting
constant is too large to fit in the 32-bit immediate field. The resulting x86
code is worse as a result:
        movabsq $4294967296, %rax       ## imm = 0x100000000
        cmpq    %rax, %rdi
        sbbl    %eax, %eax
        andl    $1, %eax

This patch teaches the x86 lowering code to handle ult against a large immediate
with trailing zeros. It will issue a right shift and a truncate followed by
a comparison against a shifted immediate.
        shrq    $32, %rdi
        testl   %edi, %edi
        sete    %al
        movzbl  %al, %eax

It also handles a ugt comparison against a large immediate with trailing bits
set. i.e. X >  0x0ffffffff -> (X >> 32) >= 1

rdar://11866926


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160312 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2012-07-16 19:35:43 +00:00
parent c76fa8937d
commit 98819c9d1e
3 changed files with 94 additions and 2 deletions

View File

@ -3059,6 +3059,50 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
RHS = DAG.getConstant(0, RHS.getValueType());
return X86::COND_LE;
}
if (SetCCOpcode == ISD::SETULT || SetCCOpcode == ISD::SETUGE) {
unsigned TrailZeros = RHSC->getAPIntValue().countTrailingZeros();
if (TrailZeros >= 32) {
// The constant doesn't fit in cmp immediate field. Right shift LHS by
// the # of trailing zeros and truncate it to 32-bit. Then compare
// against shifted RHS.
assert(LHS.getValueType() == MVT::i64 && "Expecting a 64-bit cmp!");
DebugLoc dl = LHS.getDebugLoc();
LHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
DAG.getNode(ISD::SRL, dl, MVT::i64, LHS,
DAG.getConstant(TrailZeros, MVT::i8)));
uint64_t C = RHSC->getZExtValue() >> TrailZeros;
if (SetCCOpcode == ISD::SETULT) {
// X < 0x300000000 -> (X >> 32) < 3
// X < 0x100000000 -> (X >> 32) == 0
// X < 0x200000000 -> (X >> 33) == 0
if (C == 1) {
RHS = DAG.getConstant(0, MVT::i32);
return X86::COND_E;
}
RHS = DAG.getConstant(C, MVT::i32);
return X86::COND_B;
} else /* SetCCOpcode == ISD::SETUGE */ {
// X >= 0x100000000 -> (X >> 32) >= 1
RHS = DAG.getConstant(C, MVT::i32);
return X86::COND_AE;
}
}
}
if (SetCCOpcode == ISD::SETUGT) {
unsigned TrailOnes = RHSC->getAPIntValue().countTrailingOnes();
if (TrailOnes >= 32 && !RHSC->isAllOnesValue()) {
assert(LHS.getValueType() == MVT::i64 && "Expecting a 64-bit cmp!");
DebugLoc dl = LHS.getDebugLoc();
LHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
DAG.getNode(ISD::SRL, dl, MVT::i64, LHS,
DAG.getConstant(TrailOnes, MVT::i8)));
uint64_t C = (RHSC->getZExtValue()+1) >> TrailOnes;
// X > 0x0ffffffff -> (X >> 32) >= 1
RHS = DAG.getConstant(C, MVT::i32);
return X86::COND_AE;
}
}
}
switch (SetCCOpcode) {

View File

@ -12,9 +12,9 @@ declare hidden fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalData
; Avoid hoisting the test above loads or copies
; CHECK: %entry
; CHECK: cmpq
; CHECK: test
; CHECK-NOT: mov
; CHECK: jb
; CHECK: je
define i32 @cti_op_eq(i8** nocapture %args) nounwind ssp {
entry:
%0 = load i8** null, align 8

View File

@ -90,3 +90,51 @@ F:
; CHECK: encoding: [0x48,0x83,0x7c,0x24,0xf8,0x00]
}
; rdar://11866926
define i32 @test7(i64 %res) nounwind uwtable readnone ssp {
entry:
; CHECK: test7:
; CHECK-NOT: movabsq
; CHECK: shrq $32, %rdi
; CHECK: testl %edi, %edi
; CHECK: sete
%lnot = icmp ult i64 %res, 4294967296
%lnot.ext = zext i1 %lnot to i32
ret i32 %lnot.ext
}
define i32 @test8(i64 %res) nounwind uwtable readnone ssp {
entry:
; CHECK: test8:
; CHECK-NOT: movabsq
; CHECK: shrq $32, %rdi
; CHECK: cmpl $3, %edi
%lnot = icmp ult i64 %res, 12884901888
%lnot.ext = zext i1 %lnot to i32
ret i32 %lnot.ext
}
define i32 @test9(i64 %res) nounwind uwtable readnone ssp {
entry:
; CHECK: test9:
; CHECK-NOT: movabsq
; CHECK: shrq $33, %rdi
; CHECK: testl %edi, %edi
; CHECK: sete
%lnot = icmp ult i64 %res, 8589934592
%lnot.ext = zext i1 %lnot to i32
ret i32 %lnot.ext
}
define i32 @test10(i64 %res) nounwind uwtable readnone ssp {
entry:
; CHECK: test10:
; CHECK-NOT: movabsq
; CHECK: shrq $32, %rdi
; CHECK: cmpl $1, %edi
; CHECK: setae
%lnot = icmp uge i64 %res, 4294967296
%lnot.ext = zext i1 %lnot to i32
ret i32 %lnot.ext
}