mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-02 00:16:25 +00:00
X86: Resolve a long standing FIXME and properly isel pextr[bw].
Generalize the AArch64 .td nodes for AssertZext and AssertSext. Use them to match the relevant pextr store instructions. The test widen_load-2.ll requires a slight change because with the stores gone, the remaining instructions are scheduled in a different order. Add test cases for SSE4 and AVX variants. Resolves rdar://13414672. Patch by Adam Nemet <anemet@apple.com>. llvm-svn: 200957
This commit is contained in:
parent
77433ac346
commit
f2f14a2d43
@ -492,6 +492,12 @@ def intrinsic_wo_chain : SDNode<"ISD::INTRINSIC_WO_CHAIN",
|
||||
// Do not use cvt directly. Use cvt forms below
|
||||
def cvt : SDNode<"ISD::CONVERT_RNDSAT", SDTConvertOp>;
|
||||
|
||||
def SDT_assertext : SDTypeProfile<1, 1,
|
||||
[SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>;
|
||||
def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>;
|
||||
def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Selection DAG Condition Codes
|
||||
|
||||
|
@ -64,11 +64,6 @@ def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
|
||||
[SDTCisVec<0>, SDTCisSameAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
|
||||
|
||||
def SDT_assertext : SDTypeProfile<1, 1,
|
||||
[SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>;
|
||||
def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>;
|
||||
def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Addressing-mode instantiations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -494,11 +494,6 @@ is memory.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
SSE4 extract-to-mem ops aren't being pattern matched because of the AssertZext
|
||||
sitting between the truncate and the extract.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
INSERTPS can match any insert (extract, imm1), imm2 for 4 x float, and insert
|
||||
any number of 0.0 simultaneously. Currently we only use it for simple
|
||||
insertions.
|
||||
|
@ -6210,10 +6210,8 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
|
||||
(ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>;
|
||||
// FIXME:
|
||||
// There's an AssertZext in the way of writing the store pattern
|
||||
// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
|
||||
[(store (i8 (trunc (assertzext (X86pextrb (v16i8 VR128:$src1),
|
||||
imm:$src2)))), addr:$dst)]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in
|
||||
@ -6236,10 +6234,8 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
|
||||
(ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>;
|
||||
// FIXME:
|
||||
// There's an AssertZext in the way of writing the store pattern
|
||||
// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
|
||||
[(store (i16 (trunc (assertzext (X86pextrw (v8i16 VR128:$src1),
|
||||
imm:$src2)))), addr:$dst)]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in
|
||||
|
22
test/CodeGen/X86/extract-store.ll
Normal file
22
test/CodeGen/X86/extract-store.ll
Normal file
@ -0,0 +1,22 @@
|
||||
; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse4.1 | FileCheck %s -check-prefix=SSE41
|
||||
; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX
|
||||
|
||||
define void @pextrb(i8* nocapture %dst, <16 x i8> %foo) {
|
||||
; AVX: vpextrb
|
||||
; SSE41: pextrb
|
||||
; AVX-NOT: movb
|
||||
; SSE41-NOT: movb
|
||||
%vecext = extractelement <16 x i8> %foo, i32 15
|
||||
store i8 %vecext, i8* %dst, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @pextrw(i16* nocapture %dst, <8 x i16> %foo) {
|
||||
; AVX: vpextrw
|
||||
; SSE41: pextrw
|
||||
; AVX-NOT: movw
|
||||
; SSE41-NOT: movw
|
||||
%vecext = extractelement <8 x i16> %foo, i32 15
|
||||
store i16 %vecext, i16* %dst, align 1
|
||||
ret void
|
||||
}
|
@ -149,9 +149,9 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
|
||||
; CHECK: movdqa
|
||||
; CHECK: paddb
|
||||
; CHECK: paddb
|
||||
; CHECK: movq
|
||||
; CHECK: pextrb
|
||||
; CHECK: pextrw
|
||||
; CHECK: movq
|
||||
; CHECK: ret
|
||||
%a = load %i8vec31* %ap, align 16
|
||||
%b = load %i8vec31* %bp, align 16
|
||||
|
Loading…
Reference in New Issue
Block a user