mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-24 12:19:53 +00:00
d9dffbdd6a
The constant folder didn't know how to always fold bitcasts of constant integer vectors. In particular, it was unable to handle the case where a constant vector had some undef elements, and the resulting (i.e. bitcasted) vector type had more elements than the original vector type. Example: %cast = bitcast <2 x i64><i64 undef, i64 2> to <4 x i32> On a little endian target, %cast could have been folded to: <4 x i32><i32 undef, i32 undef, i32 2, i32 0> This patch improves the folding logic by teaching how to correctly propagate undef elements in the folded vector. Differential Revision: https://reviews.llvm.org/D24301 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@281343 91177308-0d34-0410-b5e6-96231b3b80d8
409 lines
17 KiB
LLVM
409 lines
17 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -instcombine -S | FileCheck %s
|
|
|
|
;
|
|
; EXTRQ
|
|
;
|
|
|
|
define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @test_extrq_call(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @test_extrq_zero_arg0(
|
|
; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @test_extrq_zero_arg1(
|
|
; CHECK-NEXT: ret <2 x i64> %x
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @test_extrq_to_extqi(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15)
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @test_extrq_constant(
|
|
; CHECK-NEXT: ret <2 x i64> <i64 255, i64 undef>
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @test_extrq_constant_undef(
|
|
; CHECK-NEXT: ret <2 x i64> <i64 65535, i64 undef>
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) {
|
|
; CHECK-LABEL: @test_extrq_call_constexpr(
|
|
; CHECK-NEXT: ret <2 x i64> %x
|
|
;
|
|
%1 = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> <i64 0, i64 undef> to <16 x i8>))
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
;
|
|
; EXTRQI
|
|
;
|
|
|
|
define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
|
|
; CHECK-LABEL: @test_extrqi_call(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
|
|
; CHECK-LABEL: @test_extrqi_shuffle_1zuu(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %x to <16 x i8>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
|
|
; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %x to <16 x i8>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
|
|
; CHECK-LABEL: @test_extrqi_undef(
|
|
; CHECK-NEXT: ret <2 x i64> undef
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
|
|
; CHECK-LABEL: @test_extrqi_zero(
|
|
; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
|
|
; CHECK-LABEL: @test_extrqi_constant(
|
|
; CHECK-NEXT: ret <2 x i64> <i64 7, i64 undef>
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
|
|
; CHECK-LABEL: @test_extrqi_constant_undef(
|
|
; CHECK-NEXT: ret <2 x i64> <i64 15, i64 undef>
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_extrqi_call_constexpr() {
|
|
; CHECK-LABEL: @test_extrqi_call_constexpr(
|
|
; CHECK-NEXT: ret <2 x i64> zeroinitializer
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 8, i8 16)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
;
|
|
; INSERTQ
|
|
;
|
|
|
|
define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: @test_insertq_call(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) #1
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: @test_insertq_to_insertqi(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: @test_insertq_constant(
|
|
; CHECK-NEXT: ret <2 x i64> <i64 32, i64 undef>
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: @test_insertq_constant_undef(
|
|
; CHECK-NEXT: ret <2 x i64> <i64 33, i64 undef>
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_insertq_call_constexpr(<2 x i64> %x) {
|
|
; CHECK-LABEL: @test_insertq_call_constexpr(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 0, i64 undef>, i8 2, i8 0)
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>))
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
;
|
|
; INSERTQI
|
|
;
|
|
|
|
define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
|
|
; CHECK-LABEL: @test_insertqi_shuffle_04uu(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
; CHECK-NEXT: ret <16 x i8> [[TMP1]]
|
|
;
|
|
%1 = bitcast <16 x i8> %v to <2 x i64>
|
|
%2 = bitcast <16 x i8> %i to <2 x i64>
|
|
%3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
|
|
%4 = bitcast <2 x i64> %3 to <16 x i8>
|
|
ret <16 x i8> %4
|
|
}
|
|
|
|
define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
|
|
; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
; CHECK-NEXT: ret <16 x i8> [[TMP1]]
|
|
;
|
|
%1 = bitcast <16 x i8> %v to <2 x i64>
|
|
%2 = bitcast <16 x i8> %i to <2 x i64>
|
|
%3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
|
|
%4 = bitcast <2 x i64> %3 to <16 x i8>
|
|
ret <16 x i8> %4
|
|
}
|
|
|
|
define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
|
|
; CHECK-LABEL: @test_insertqi_constant(
|
|
; CHECK-NEXT: ret <2 x i64> <i64 -131055, i64 undef>
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) {
|
|
; CHECK-LABEL: @test_insertqi_call_constexpr(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 0, i64 undef>, i8 48, i8 3)
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
; The result of this insert is the second arg, since the top 64 bits of
|
|
; the result are undefined, and we copy the bottom 64 bits from the
|
|
; second arg
|
|
define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
|
|
; CHECK-LABEL: @testInsert64Bits(
|
|
; CHECK-NEXT: ret <2 x i64> %i
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
|
|
; CHECK-LABEL: @testZeroLength(
|
|
; CHECK-NEXT: ret <2 x i64> %i
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
|
|
; CHECK-LABEL: @testUndefinedInsertq_1(
|
|
; CHECK-NEXT: ret <2 x i64> undef
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
|
|
; CHECK-LABEL: @testUndefinedInsertq_2(
|
|
; CHECK-NEXT: ret <2 x i64> undef
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
|
|
; CHECK-LABEL: @testUndefinedInsertq_3(
|
|
; CHECK-NEXT: ret <2 x i64> undef
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
|
|
ret <2 x i64> %1
|
|
}
|
|
|
|
;
|
|
; Vector Demanded Bits
|
|
;
|
|
|
|
define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @test_extrq_arg0(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @test_extrq_arg1(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @test_extrq_args01(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
|
|
ret <2 x i64> %3
|
|
}
|
|
|
|
define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @test_extrq_ret(
|
|
; CHECK-NEXT: ret <2 x i64> undef
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
|
|
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
|
|
; CHECK-LABEL: @test_extrqi_arg0(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
|
|
; CHECK-LABEL: @test_extrqi_ret(
|
|
; CHECK-NEXT: ret <2 x i64> undef
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
|
|
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: @test_insertq_arg0(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) #1
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: @test_insertq_ret(
|
|
; CHECK-NEXT: ret <2 x i64> undef
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
|
|
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: @test_insertqi_arg0(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: @test_insertqi_arg1(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: @test_insertqi_args01(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
|
;
|
|
%1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
|
|
ret <2 x i64> %3
|
|
}
|
|
|
|
define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: @test_insertqi_ret(
|
|
; CHECK-NEXT: ret <2 x i64> undef
|
|
;
|
|
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
|
|
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrq
|
|
declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
|
|
|
|
; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrqi
|
|
declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
|
|
|
|
; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertq
|
|
declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
|
|
|
|
; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
|
|
declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
|