mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-13 14:47:00 +00:00
1f4a5003d2
xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) was only being combined at the AfterLegalizeTypes stage, this patch permits the combine to occur anytime before then as well. The main aim with this to improve the ability to recognise bitmasks that can be converted to shuffles. I had to modify a number of AVX512 mask tests as the basic bitcast to/from scalar pattern was being stripped out, preventing testing of the mmask bitops. By replacing the bitcasts with loads we can get almost the same result. Differential Revision: http://reviews.llvm.org/D18944 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265998 91177308-0d34-0410-b5e6-96231b3b80d8
290 lines
8.6 KiB
LLVM
290 lines
8.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X32-SSE --check-prefix=X32-SSE42
|
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE --check-prefix=X64-SSE42
|
|
|
|
;
|
|
; AND/XOR/OR i24 as v3i8
|
|
;
|
|
|
|
define i24 @and_i24_as_v3i8(i24 %a, i24 %b) nounwind {
|
|
; X32-SSE-LABEL: and_i24_as_v3i8:
|
|
; X32-SSE: # BB#0:
|
|
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax
|
|
; X32-SSE-NEXT: retl
|
|
;
|
|
; X64-SSE-LABEL: and_i24_as_v3i8:
|
|
; X64-SSE: # BB#0:
|
|
; X64-SSE-NEXT: andl %esi, %edi
|
|
; X64-SSE-NEXT: movl %edi, %eax
|
|
; X64-SSE-NEXT: retq
|
|
%1 = bitcast i24 %a to <3 x i8>
|
|
%2 = bitcast i24 %b to <3 x i8>
|
|
%3 = and <3 x i8> %1, %2
|
|
%4 = bitcast <3 x i8> %3 to i24
|
|
ret i24 %4
|
|
}
|
|
|
|
define i24 @xor_i24_as_v3i8(i24 %a, i24 %b) nounwind {
|
|
; X32-SSE-LABEL: xor_i24_as_v3i8:
|
|
; X32-SSE: # BB#0:
|
|
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
|
; X32-SSE-NEXT: retl
|
|
;
|
|
; X64-SSE-LABEL: xor_i24_as_v3i8:
|
|
; X64-SSE: # BB#0:
|
|
; X64-SSE-NEXT: xorl %esi, %edi
|
|
; X64-SSE-NEXT: movl %edi, %eax
|
|
; X64-SSE-NEXT: retq
|
|
%1 = bitcast i24 %a to <3 x i8>
|
|
%2 = bitcast i24 %b to <3 x i8>
|
|
%3 = xor <3 x i8> %1, %2
|
|
%4 = bitcast <3 x i8> %3 to i24
|
|
ret i24 %4
|
|
}
|
|
|
|
define i24 @or_i24_as_v3i8(i24 %a, i24 %b) nounwind {
|
|
; X32-SSE-LABEL: or_i24_as_v3i8:
|
|
; X32-SSE: # BB#0:
|
|
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax
|
|
; X32-SSE-NEXT: retl
|
|
;
|
|
; X64-SSE-LABEL: or_i24_as_v3i8:
|
|
; X64-SSE: # BB#0:
|
|
; X64-SSE-NEXT: orl %esi, %edi
|
|
; X64-SSE-NEXT: movl %edi, %eax
|
|
; X64-SSE-NEXT: retq
|
|
%1 = bitcast i24 %a to <3 x i8>
|
|
%2 = bitcast i24 %b to <3 x i8>
|
|
%3 = or <3 x i8> %1, %2
|
|
%4 = bitcast <3 x i8> %3 to i24
|
|
ret i24 %4
|
|
}
|
|
|
|
;
|
|
; AND/XOR/OR i24 as v8i3
|
|
;
|
|
|
|
define i24 @and_i24_as_v8i3(i24 %a, i24 %b) nounwind {
|
|
; X32-SSE-LABEL: and_i24_as_v8i3:
|
|
; X32-SSE: # BB#0:
|
|
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax
|
|
; X32-SSE-NEXT: retl
|
|
;
|
|
; X64-SSE-LABEL: and_i24_as_v8i3:
|
|
; X64-SSE: # BB#0:
|
|
; X64-SSE-NEXT: andl %esi, %edi
|
|
; X64-SSE-NEXT: movl %edi, %eax
|
|
; X64-SSE-NEXT: retq
|
|
%1 = bitcast i24 %a to <8 x i3>
|
|
%2 = bitcast i24 %b to <8 x i3>
|
|
%3 = and <8 x i3> %1, %2
|
|
%4 = bitcast <8 x i3> %3 to i24
|
|
ret i24 %4
|
|
}
|
|
|
|
define i24 @xor_i24_as_v8i3(i24 %a, i24 %b) nounwind {
|
|
; X32-SSE-LABEL: xor_i24_as_v8i3:
|
|
; X32-SSE: # BB#0:
|
|
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
|
; X32-SSE-NEXT: retl
|
|
;
|
|
; X64-SSE-LABEL: xor_i24_as_v8i3:
|
|
; X64-SSE: # BB#0:
|
|
; X64-SSE-NEXT: xorl %esi, %edi
|
|
; X64-SSE-NEXT: movl %edi, %eax
|
|
; X64-SSE-NEXT: retq
|
|
%1 = bitcast i24 %a to <8 x i3>
|
|
%2 = bitcast i24 %b to <8 x i3>
|
|
%3 = xor <8 x i3> %1, %2
|
|
%4 = bitcast <8 x i3> %3 to i24
|
|
ret i24 %4
|
|
}
|
|
|
|
define i24 @or_i24_as_v8i3(i24 %a, i24 %b) nounwind {
|
|
; X32-SSE-LABEL: or_i24_as_v8i3:
|
|
; X32-SSE: # BB#0:
|
|
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax
|
|
; X32-SSE-NEXT: retl
|
|
;
|
|
; X64-SSE-LABEL: or_i24_as_v8i3:
|
|
; X64-SSE: # BB#0:
|
|
; X64-SSE-NEXT: orl %esi, %edi
|
|
; X64-SSE-NEXT: movl %edi, %eax
|
|
; X64-SSE-NEXT: retq
|
|
%1 = bitcast i24 %a to <8 x i3>
|
|
%2 = bitcast i24 %b to <8 x i3>
|
|
%3 = or <8 x i3> %1, %2
|
|
%4 = bitcast <8 x i3> %3 to i24
|
|
ret i24 %4
|
|
}
|
|
|
|
;
|
|
; AND/XOR/OR v3i8 as i24
|
|
;
|
|
|
|
define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
|
|
; X32-SSE-LABEL: and_v3i8_as_i24:
|
|
; X32-SSE: # BB#0:
|
|
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
|
|
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
|
|
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
|
|
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
|
|
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
|
|
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
|
|
; X32-SSE-NEXT: pand %xmm0, %xmm1
|
|
; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
|
|
; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
|
|
; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
|
|
; X32-SSE-NEXT: retl
|
|
;
|
|
; X64-SSE-LABEL: and_v3i8_as_i24:
|
|
; X64-SSE: # BB#0:
|
|
; X64-SSE-NEXT: movd %ecx, %xmm0
|
|
; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
|
|
; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
|
|
; X64-SSE-NEXT: movd %edi, %xmm1
|
|
; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1
|
|
; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1
|
|
; X64-SSE-NEXT: pand %xmm0, %xmm1
|
|
; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
|
|
; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
|
|
; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
|
|
; X64-SSE-NEXT: retq
|
|
%1 = bitcast <3 x i8> %a to i24
|
|
%2 = bitcast <3 x i8> %b to i24
|
|
%3 = and i24 %1, %2
|
|
%4 = bitcast i24 %3 to <3 x i8>
|
|
ret <3 x i8> %4
|
|
}
|
|
|
|
define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
|
|
; X32-SSE-LABEL: xor_v3i8_as_i24:
|
|
; X32-SSE: # BB#0:
|
|
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
|
|
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
|
|
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
|
|
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
|
|
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
|
|
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
|
|
; X32-SSE-NEXT: pxor %xmm0, %xmm1
|
|
; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
|
|
; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
|
|
; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
|
|
; X32-SSE-NEXT: retl
|
|
;
|
|
; X64-SSE-LABEL: xor_v3i8_as_i24:
|
|
; X64-SSE: # BB#0:
|
|
; X64-SSE-NEXT: movd %ecx, %xmm0
|
|
; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
|
|
; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
|
|
; X64-SSE-NEXT: movd %edi, %xmm1
|
|
; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1
|
|
; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1
|
|
; X64-SSE-NEXT: pxor %xmm0, %xmm1
|
|
; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
|
|
; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
|
|
; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
|
|
; X64-SSE-NEXT: retq
|
|
%1 = bitcast <3 x i8> %a to i24
|
|
%2 = bitcast <3 x i8> %b to i24
|
|
%3 = xor i24 %1, %2
|
|
%4 = bitcast i24 %3 to <3 x i8>
|
|
ret <3 x i8> %4
|
|
}
|
|
|
|
define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
|
|
; X32-SSE-LABEL: or_v3i8_as_i24:
|
|
; X32-SSE: # BB#0:
|
|
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
|
|
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
|
|
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
|
|
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
|
|
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
|
|
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
|
|
; X32-SSE-NEXT: por %xmm0, %xmm1
|
|
; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
|
|
; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
|
|
; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
|
|
; X32-SSE-NEXT: retl
|
|
;
|
|
; X64-SSE-LABEL: or_v3i8_as_i24:
|
|
; X64-SSE: # BB#0:
|
|
; X64-SSE-NEXT: movd %ecx, %xmm0
|
|
; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
|
|
; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
|
|
; X64-SSE-NEXT: movd %edi, %xmm1
|
|
; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1
|
|
; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1
|
|
; X64-SSE-NEXT: por %xmm0, %xmm1
|
|
; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
|
|
; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
|
|
; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
|
|
; X64-SSE-NEXT: retq
|
|
%1 = bitcast <3 x i8> %a to i24
|
|
%2 = bitcast <3 x i8> %b to i24
|
|
%3 = or i24 %1, %2
|
|
%4 = bitcast i24 %3 to <3 x i8>
|
|
ret <3 x i8> %4
|
|
}
|
|
|
|
;
|
|
; AND/XOR/OR v8i3 as i24
|
|
;
|
|
|
|
define <8 x i3> @and_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
|
|
; X32-SSE-LABEL: and_v8i3_as_i24:
|
|
; X32-SSE: # BB#0:
|
|
; X32-SSE-NEXT: andps %xmm1, %xmm0
|
|
; X32-SSE-NEXT: retl
|
|
;
|
|
; X64-SSE-LABEL: and_v8i3_as_i24:
|
|
; X64-SSE: # BB#0:
|
|
; X64-SSE-NEXT: andps %xmm1, %xmm0
|
|
; X64-SSE-NEXT: retq
|
|
%1 = bitcast <8 x i3> %a to i24
|
|
%2 = bitcast <8 x i3> %b to i24
|
|
%3 = and i24 %1, %2
|
|
%4 = bitcast i24 %3 to <8 x i3>
|
|
ret <8 x i3> %4
|
|
}
|
|
|
|
define <8 x i3> @xor_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
|
|
; X32-SSE-LABEL: xor_v8i3_as_i24:
|
|
; X32-SSE: # BB#0:
|
|
; X32-SSE-NEXT: xorps %xmm1, %xmm0
|
|
; X32-SSE-NEXT: retl
|
|
;
|
|
; X64-SSE-LABEL: xor_v8i3_as_i24:
|
|
; X64-SSE: # BB#0:
|
|
; X64-SSE-NEXT: xorps %xmm1, %xmm0
|
|
; X64-SSE-NEXT: retq
|
|
%1 = bitcast <8 x i3> %a to i24
|
|
%2 = bitcast <8 x i3> %b to i24
|
|
%3 = xor i24 %1, %2
|
|
%4 = bitcast i24 %3 to <8 x i3>
|
|
ret <8 x i3> %4
|
|
}
|
|
|
|
define <8 x i3> @or_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
|
|
; X32-SSE-LABEL: or_v8i3_as_i24:
|
|
; X32-SSE: # BB#0:
|
|
; X32-SSE-NEXT: orps %xmm1, %xmm0
|
|
; X32-SSE-NEXT: retl
|
|
;
|
|
; X64-SSE-LABEL: or_v8i3_as_i24:
|
|
; X64-SSE: # BB#0:
|
|
; X64-SSE-NEXT: orps %xmm1, %xmm0
|
|
; X64-SSE-NEXT: retq
|
|
%1 = bitcast <8 x i3> %a to i24
|
|
%2 = bitcast <8 x i3> %b to i24
|
|
%3 = or i24 %1, %2
|
|
%4 = bitcast i24 %3 to <8 x i3>
|
|
ret <8 x i3> %4
|
|
}
|