From b409a61574cf3cd25879915c5eeff174fbc64fc9 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 16 Jul 2012 19:35:43 +0000 Subject: [PATCH] For something like uint32_t hi(uint64_t res) { uint_32t hi = res >> 32; return !hi; } llvm IR looks like this: define i32 @hi(i64 %res) nounwind uwtable ssp { entry: %lnot = icmp ult i64 %res, 4294967296 %lnot.ext = zext i1 %lnot to i32 ret i32 %lnot.ext } The optimizer has optimize away the right shift and truncate but the resulting constant is too large to fit in the 32-bit immediate field. The resulting x86 code is worse as a result: movabsq $4294967296, %rax ## imm = 0x100000000 cmpq %rax, %rdi sbbl %eax, %eax andl $1, %eax This patch teaches the x86 lowering code to handle ult against a large immediate with trailing zeros. It will issue a right shift and a truncate followed by a comparison against a shifted immediate. shrq $32, %rdi testl %edi, %edi sete %al movzbl %al, %eax It also handles a ugt comparison against a large immediate with trailing bits set. i.e. X > 0x0ffffffff -> (X >> 32) >= 1 rdar://11866926 llvm-svn: 160312 --- lib/Target/X86/X86ISelLowering.cpp | 44 ++++++++++++++++++++ test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll | 4 +- test/CodeGen/X86/cmp.ll | 48 ++++++++++++++++++++++ 3 files changed, 94 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c55a1ef743c..1d72aad6754 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3059,6 +3059,50 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, RHS = DAG.getConstant(0, RHS.getValueType()); return X86::COND_LE; } + if (SetCCOpcode == ISD::SETULT || SetCCOpcode == ISD::SETUGE) { + unsigned TrailZeros = RHSC->getAPIntValue().countTrailingZeros(); + if (TrailZeros >= 32) { + // The constant doesn't fit in cmp immediate field. Right shift LHS by + // the # of trailing zeros and truncate it to 32-bit. Then compare + // against shifted RHS. + assert(LHS.getValueType() == MVT::i64 && "Expecting a 64-bit cmp!"); + DebugLoc dl = LHS.getDebugLoc(); + LHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, + DAG.getNode(ISD::SRL, dl, MVT::i64, LHS, + DAG.getConstant(TrailZeros, MVT::i8))); + uint64_t C = RHSC->getZExtValue() >> TrailZeros; + + if (SetCCOpcode == ISD::SETULT) { + // X < 0x300000000 -> (X >> 32) < 3 + // X < 0x100000000 -> (X >> 32) == 0 + // X < 0x200000000 -> (X >> 33) == 0 + if (C == 1) { + RHS = DAG.getConstant(0, MVT::i32); + return X86::COND_E; + } + RHS = DAG.getConstant(C, MVT::i32); + return X86::COND_B; + } else /* SetCCOpcode == ISD::SETUGE */ { + // X >= 0x100000000 -> (X >> 32) >= 1 + RHS = DAG.getConstant(C, MVT::i32); + return X86::COND_AE; + } + } + } + if (SetCCOpcode == ISD::SETUGT) { + unsigned TrailOnes = RHSC->getAPIntValue().countTrailingOnes(); + if (TrailOnes >= 32 && !RHSC->isAllOnesValue()) { + assert(LHS.getValueType() == MVT::i64 && "Expecting a 64-bit cmp!"); + DebugLoc dl = LHS.getDebugLoc(); + LHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, + DAG.getNode(ISD::SRL, dl, MVT::i64, LHS, + DAG.getConstant(TrailOnes, MVT::i8))); + uint64_t C = (RHSC->getZExtValue()+1) >> TrailOnes; + // X > 0x0ffffffff -> (X >> 32) >= 1 + RHS = DAG.getConstant(C, MVT::i32); + return X86::COND_AE; + } + } } switch (SetCCOpcode) { diff --git a/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll b/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll index c6f4b497af1..be10ad5cc20 100644 --- a/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll +++ b/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll @@ -12,9 +12,9 @@ declare hidden fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalData ; Avoid hoisting the test above loads or copies ; CHECK: %entry -; CHECK: cmpq +; CHECK: test ; CHECK-NOT: mov -; CHECK: jb +; CHECK: je define i32 @cti_op_eq(i8** nocapture %args) nounwind ssp { entry: %0 = load i8** null, align 8 diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll index ef5e353e9f9..c9c85abad85 100644 --- a/test/CodeGen/X86/cmp.ll +++ b/test/CodeGen/X86/cmp.ll @@ -90,3 +90,51 @@ F: ; CHECK: encoding: [0x48,0x83,0x7c,0x24,0xf8,0x00] } +; rdar://11866926 +define i32 @test7(i64 %res) nounwind uwtable readnone ssp { +entry: +; CHECK: test7: +; CHECK-NOT: movabsq +; CHECK: shrq $32, %rdi +; CHECK: testl %edi, %edi +; CHECK: sete + %lnot = icmp ult i64 %res, 4294967296 + %lnot.ext = zext i1 %lnot to i32 + ret i32 %lnot.ext +} + +define i32 @test8(i64 %res) nounwind uwtable readnone ssp { +entry: +; CHECK: test8: +; CHECK-NOT: movabsq +; CHECK: shrq $32, %rdi +; CHECK: cmpl $3, %edi + %lnot = icmp ult i64 %res, 12884901888 + %lnot.ext = zext i1 %lnot to i32 + ret i32 %lnot.ext +} + +define i32 @test9(i64 %res) nounwind uwtable readnone ssp { +entry: +; CHECK: test9: +; CHECK-NOT: movabsq +; CHECK: shrq $33, %rdi +; CHECK: testl %edi, %edi +; CHECK: sete + %lnot = icmp ult i64 %res, 8589934592 + %lnot.ext = zext i1 %lnot to i32 + ret i32 %lnot.ext +} + +define i32 @test10(i64 %res) nounwind uwtable readnone ssp { +entry: +; CHECK: test10: +; CHECK-NOT: movabsq +; CHECK: shrq $32, %rdi +; CHECK: cmpl $1, %edi +; CHECK: setae + %lnot = icmp uge i64 %res, 4294967296 + %lnot.ext = zext i1 %lnot to i32 + ret i32 %lnot.ext +} +