llvm/test/CodeGen/X86/setcc.ll
Sanjay Patel cc9614d291 [x86] don't blindly transform SETB into SBB
I noticed unnecessary 'sbb' instructions in D30472 and while looking at 'ptest' codegen recently. 
This happens because we were transforming any 'setb' - even when we only wanted a single-bit result.

This patch moves those transforms under visitAdd/visitSub, so we we're only creating sbb/adc when it
is a win. I don't know why we need a SETCC_CARRY node type, but I'm not proposing to change that
existing behavior in this patch.

Also, I'm skeptical that sbb/adc are a win for all micro-arches, so I added comments to the test files
where this transform still fires.

The test changes here are all cases where we no longer produce sbb/adc. Avoiding partial register
stalls (generating an xor to clear a register) is not handled in some cases, but that's a separate
issue.

Differential Revision: https://reviews.llvm.org/D30611


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297586 91177308-0d34-0410-b5e6-96231b3b80d8
2017-03-12 18:28:48 +00:00

93 lines
2.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
; rdar://7329206
; Use sbb x, x to materialize carry bit in a GPR. The value is either
; all 1's or all 0's.
define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp {
; CHECK-LABEL: t1:
; CHECK: ## BB#0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $26, %edi
; CHECK-NEXT: seta %al
; CHECK-NEXT: shll $5, %eax
; CHECK-NEXT: retq
%t0 = icmp ugt i16 %x, 26
%if = select i1 %t0, i16 32, i16 0
ret i16 %if
}
define zeroext i16 @t2(i16 zeroext %x) nounwind readnone ssp {
; CHECK-LABEL: t2:
; CHECK: ## BB#0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $26, %edi
; CHECK-NEXT: setb %al
; CHECK-NEXT: shll $5, %eax
; CHECK-NEXT: retq
%t0 = icmp ult i16 %x, 26
%if = select i1 %t0, i16 32, i16 0
ret i16 %if
}
define i64 @t3(i64 %x) nounwind readnone ssp {
; CHECK-LABEL: t3:
; CHECK: ## BB#0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpq $18, %rdi
; CHECK-NEXT: setb %al
; CHECK-NEXT: shlq $6, %rax
; CHECK-NEXT: retq
%t0 = icmp ult i64 %x, 18
%if = select i1 %t0, i64 64, i64 0
ret i64 %if
}
@v4 = common global i32 0, align 4
define i32 @t4(i32 %a) {
; CHECK-LABEL: t4:
; CHECK: ## BB#0:
; CHECK-NEXT: movq _v4@{{.*}}(%rip), %rax
; CHECK-NEXT: cmpl $1, (%rax)
; CHECK-NEXT: movw $1, %ax
; CHECK-NEXT: adcw $0, %ax
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: retq
%t0 = load i32, i32* @v4, align 4
%not.tobool = icmp eq i32 %t0, 0
%conv.i = sext i1 %not.tobool to i16
%call.lobit = lshr i16 %conv.i, 15
%add.i.1 = add nuw nsw i16 %call.lobit, 1
%conv4.2 = zext i16 %add.i.1 to i32
%add = shl nuw nsw i32 %conv4.2, 16
ret i32 %add
}
define i8 @t5(i32 %a) #0 {
; CHECK-LABEL: t5:
; CHECK: ## BB#0:
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: setns %al
; CHECK-NEXT: retq
%.lobit = lshr i32 %a, 31
%trunc = trunc i32 %.lobit to i8
%.not = xor i8 %trunc, 1
ret i8 %.not
}
define zeroext i1 @t6(i32 %a) #0 {
; CHECK-LABEL: t6:
; CHECK: ## BB#0:
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: setns %al
; CHECK-NEXT: retq
%.lobit = lshr i32 %a, 31
%trunc = trunc i32 %.lobit to i1
%.not = xor i1 %trunc, 1
ret i1 %.not
}
attributes #0 = { "target-cpu"="skylake-avx512" }