mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-30 00:32:53 +00:00
Optimized loading (zextload) of i1 value from memory.
This patch is a partial revert of https://llvm.org/svn/llvm-project/llvm/trunk@237793. Extra "and" causes performance degradation. We assume that i1 is stored in zero-extended form. And store operation is responsible for zeroing upper bits. Differential Revision: http://reviews.llvm.org/D17541 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@261828 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1f7d1552d1
commit
558bab1214
@ -2193,14 +2193,6 @@ def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
|
||||
def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
|
||||
def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
|
||||
|
||||
def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
|
||||
}]>;
|
||||
|
||||
def : Pat<(truncstorei1 GR8:$src, addr:$dst),
|
||||
(MOV8mr addr:$dst, GR8:$src)>;
|
||||
|
||||
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
|
||||
let Predicates = [HasAVX512, NoDQI] in {
|
||||
// GR from/to 8-bit mask without native support
|
||||
|
@ -1139,12 +1139,13 @@ defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
|
||||
defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
|
||||
|
||||
// zextload bool -> zextload byte
|
||||
def : Pat<(zextloadi8i1 addr:$src), (AND8ri (MOV8rm addr:$src), (i8 1))>;
|
||||
def : Pat<(zextloadi16i1 addr:$src), (AND16ri8 (MOVZX16rm8 addr:$src), (i16 1))>;
|
||||
def : Pat<(zextloadi32i1 addr:$src), (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1))>;
|
||||
// i1 stored in one byte in zero-extended form.
|
||||
// Upper bits cleanup should be executed before Store.
|
||||
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
|
||||
def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
|
||||
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
|
||||
def : Pat<(zextloadi64i1 addr:$src),
|
||||
(SUBREG_TO_REG (i64 0),
|
||||
(AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>;
|
||||
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
|
||||
|
||||
// extload bool -> extload byte
|
||||
// When extloading from 16-bit and smaller memory locations into 64-bit
|
||||
|
@ -15,27 +15,18 @@ define void @f1() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @f2(i1 *%x, i16 *%y) {
|
||||
define void @f2(i16 %x, i1 *%y) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: andl $1, %eax # encoding: [0x83,0xe0,0x01]
|
||||
%a = load i1, i1* %x
|
||||
%b = zext i1 %a to i16
|
||||
store i16 %b, i16* %y
|
||||
; CHECK: andl $1, %edi # encoding: [0x83,0xe7,0x01]
|
||||
%c = trunc i16 %x to i1
|
||||
store i1 %c, i1* %y
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @f3(i1 *%x) {
|
||||
define void @f3(i32 %x, i1 *%y) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: andl $1, %eax # encoding: [0x83,0xe0,0x01]
|
||||
%a = load i1, i1* %x
|
||||
%b = zext i1 %a to i32
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define i64 @f4(i1 *%x) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: andl $1, %eax # encoding: [0x83,0xe0,0x01]
|
||||
%a = load i1, i1* %x
|
||||
%b = zext i1 %a to i64
|
||||
ret i64 %b
|
||||
; CHECK: andl $1, %edi # encoding: [0x83,0xe7,0x01]
|
||||
%c = trunc i32 %x to i1
|
||||
store i1 %c, i1* %y
|
||||
ret void
|
||||
}
|
||||
|
@ -190,7 +190,6 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
|
||||
|
||||
;CHECK-LABEL: test15
|
||||
;CHECK: movb (%rdi), %al
|
||||
;CHECK: andb $1, %al
|
||||
;CHECK: movw $-1, %ax
|
||||
;CHECK: cmovew
|
||||
define i16 @test15(i1 *%addr) {
|
||||
@ -202,7 +201,6 @@ define i16 @test15(i1 *%addr) {
|
||||
|
||||
;CHECK-LABEL: test16
|
||||
;CHECK: movb (%rdi), %al
|
||||
;CHECK: andw $1, %ax
|
||||
;CHECK: kmovw
|
||||
;CHECK: kshiftlw $10
|
||||
;CHECK: korw
|
||||
|
@ -1586,7 +1586,6 @@ define void @f1(i32 %c) {
|
||||
; KNL-LABEL: f1:
|
||||
; KNL: ## BB#0: ## %entry
|
||||
; KNL-NEXT: movzbl {{.*}}(%rip), %edi
|
||||
; KNL-NEXT: andl $1, %edi
|
||||
; KNL-NEXT: movl %edi, %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: kmovw %eax, %k0
|
||||
@ -1601,7 +1600,6 @@ define void @f1(i32 %c) {
|
||||
; SKX-LABEL: f1:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: movzbl {{.*}}(%rip), %edi
|
||||
; SKX-NEXT: andl $1, %edi
|
||||
; SKX-NEXT: movl %edi, %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: kmovw %eax, %k0
|
||||
@ -1622,3 +1620,24 @@ entry:
|
||||
|
||||
declare void @f2(i32) #1
|
||||
|
||||
define void @store_i16_i1(i16 %x, i1 *%y) {
|
||||
; CHECK-LABEL: store_i16_i1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: andl $1, %edi
|
||||
; CHECK-NEXT: movb %dil, (%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
%c = trunc i16 %x to i1
|
||||
store i1 %c, i1* %y
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_i8_i1(i8 %x, i1 *%y) {
|
||||
; CHECK-LABEL: store_i8_i1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: andl $1, %edi
|
||||
; CHECK-NEXT: movb %dil, (%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
%c = trunc i8 %x to i1
|
||||
store i1 %c, i1* %y
|
||||
ret void
|
||||
}
|
||||
|
@ -1466,7 +1466,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
||||
; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
|
||||
; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
|
||||
; SKX-NEXT: # implicit-def: %XMM0
|
||||
; SKX-NEXT: andb $1, %al
|
||||
; SKX-NEXT: testb %al, %al
|
||||
; SKX-NEXT: je .LBB29_2
|
||||
; SKX-NEXT: # BB#1: # %cond.load
|
||||
; SKX-NEXT: vmovq %xmm1, %rax
|
||||
@ -1474,7 +1474,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
||||
; SKX-NEXT: .LBB29_2: # %else
|
||||
; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp)
|
||||
; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
|
||||
; SKX-NEXT: andb $1, %al
|
||||
; SKX-NEXT: testb %al, %al
|
||||
; SKX-NEXT: je .LBB29_4
|
||||
; SKX-NEXT: # BB#3: # %cond.load1
|
||||
; SKX-NEXT: vpextrq $1, %xmm1, %rax
|
||||
@ -1482,7 +1482,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
||||
; SKX-NEXT: .LBB29_4: # %else2
|
||||
; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp)
|
||||
; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
|
||||
; SKX-NEXT: andb $1, %al
|
||||
; SKX-NEXT: testb %al, %al
|
||||
; SKX-NEXT: je .LBB29_6
|
||||
; SKX-NEXT: # BB#5: # %cond.load4
|
||||
; SKX-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
@ -1505,7 +1505,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
||||
; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2
|
||||
; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; SKX_32-NEXT: # implicit-def: %XMM1
|
||||
; SKX_32-NEXT: andb $1, %al
|
||||
; SKX_32-NEXT: testb %al, %al
|
||||
; SKX_32-NEXT: je .LBB29_2
|
||||
; SKX_32-NEXT: # BB#1: # %cond.load
|
||||
; SKX_32-NEXT: vmovd %xmm2, %eax
|
||||
@ -1513,7 +1513,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
||||
; SKX_32-NEXT: .LBB29_2: # %else
|
||||
; SKX_32-NEXT: kmovb %k1, {{[0-9]+}}(%esp)
|
||||
; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; SKX_32-NEXT: andb $1, %al
|
||||
; SKX_32-NEXT: testb %al, %al
|
||||
; SKX_32-NEXT: je .LBB29_4
|
||||
; SKX_32-NEXT: # BB#3: # %cond.load1
|
||||
; SKX_32-NEXT: vpextrd $1, %xmm2, %eax
|
||||
@ -1522,7 +1522,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
||||
; SKX_32-NEXT: vmovdqa32 {{[0-9]+}}(%esp), %xmm0
|
||||
; SKX_32-NEXT: kmovb %k1, (%esp)
|
||||
; SKX_32-NEXT: movb (%esp), %al
|
||||
; SKX_32-NEXT: andb $1, %al
|
||||
; SKX_32-NEXT: testb %al, %al
|
||||
; SKX_32-NEXT: je .LBB29_6
|
||||
; SKX_32-NEXT: # BB#5: # %cond.load4
|
||||
; SKX_32-NEXT: vpextrd $2, %xmm2, %eax
|
||||
|
@ -811,8 +811,6 @@ end:
|
||||
;
|
||||
; Load the value of b.
|
||||
; CHECK: movb _b(%rip), [[BOOL:%cl]]
|
||||
; Extract i1 from the loaded value.
|
||||
; CHECK-NEXT: andb $1, [[BOOL]]
|
||||
; Create the zero value for the select assignment.
|
||||
; CHECK-NEXT: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]]
|
||||
; CHECK-NEXT: testb [[BOOL]], [[BOOL]]
|
||||
|
Loading…
x
Reference in New Issue
Block a user