mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-15 07:59:50 +00:00
[X86][SSE41] Added fast-isel intrinsics tests
As discussed on PR24580, this patch adds some (more to come) initial fast-isel codegen tests to match the IR generated in clang/test/CodeGen/sse41-builtins.c git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@261438 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
01ad432bbe
commit
ab724ed2c7
380
test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
Normal file
380
test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
Normal file
@ -0,0 +1,380 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=X32
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=X64
|
||||
|
||||
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse41-builtins.c
|
||||
|
||||
define <2 x i64> @test_mm_blend_epi16(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; X32-LABEL: test_mm_blend_epi16:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6,7]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_blend_epi16:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6,7]
|
||||
; X64-NEXT: retq
|
||||
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
|
||||
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
|
||||
%shuf = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
|
||||
%res = bitcast <8 x i16> %shuf to <2 x i64>
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_mm_blend_pd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; X32-LABEL: test_mm_blend_pd:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_blend_pd:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; X64-NEXT: retq
|
||||
%res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm_blend_ps(<4 x float> %a0, <4 x float> %a1) {
|
||||
; X32-LABEL: test_mm_blend_ps:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_blend_ps:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
|
||||
; X64-NEXT: retq
|
||||
%res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_blendv_epi8(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
|
||||
; X32-LABEL: test_mm_blendv_epi8:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movdqa %xmm0, %xmm3
|
||||
; X32-NEXT: movaps %xmm2, %xmm0
|
||||
; X32-NEXT: pblendvb %xmm1, %xmm3
|
||||
; X32-NEXT: movdqa %xmm3, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_blendv_epi8:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movdqa %xmm0, %xmm3
|
||||
; X64-NEXT: movaps %xmm2, %xmm0
|
||||
; X64-NEXT: pblendvb %xmm1, %xmm3
|
||||
; X64-NEXT: movdqa %xmm3, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
|
||||
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
|
||||
%arg2 = bitcast <2 x i64> %a2 to <16 x i8>
|
||||
%call = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %arg0, <16 x i8> %arg1, <16 x i8> %arg2)
|
||||
%res = bitcast <16 x i8> %call to <2 x i64>
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <2 x double> @test_mm_blendv_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; X32-LABEL: test_mm_blendv_pd:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movapd %xmm0, %xmm3
|
||||
; X32-NEXT: movaps %xmm2, %xmm0
|
||||
; X32-NEXT: blendvpd %xmm1, %xmm3
|
||||
; X32-NEXT: movapd %xmm3, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_blendv_pd:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movapd %xmm0, %xmm3
|
||||
; X64-NEXT: movaps %xmm2, %xmm0
|
||||
; X64-NEXT: blendvpd %xmm1, %xmm3
|
||||
; X64-NEXT: movapd %xmm3, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_mm_blendv_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; X32-LABEL: test_mm_blendv_ps:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movaps %xmm0, %xmm3
|
||||
; X32-NEXT: movaps %xmm2, %xmm0
|
||||
; X32-NEXT: blendvps %xmm1, %xmm3
|
||||
; X32-NEXT: movaps %xmm3, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_blendv_ps:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movaps %xmm0, %xmm3
|
||||
; X64-NEXT: movaps %xmm2, %xmm0
|
||||
; X64-NEXT: blendvps %xmm1, %xmm3
|
||||
; X64-NEXT: movaps %xmm3, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <2 x double> @test_mm_ceil_pd(<2 x double> %a0) {
|
||||
; X32-LABEL: test_mm_ceil_pd:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: roundpd $10, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_ceil_pd:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: roundpd $10, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 10)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
|
||||
|
||||
define <4 x float> @test_mm_ceil_ps(<4 x float> %a0) {
|
||||
; X32-LABEL: test_mm_ceil_ps:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: roundps $10, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_ceil_ps:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: roundps $10, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 10)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
|
||||
|
||||
define <2 x double> @test_mm_ceil_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; X32-LABEL: test_mm_ceil_sd:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: roundsd $10, %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_ceil_sd:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: roundsd $10, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 10)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
|
||||
|
||||
define <4 x float> @test_mm_ceil_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; X32-LABEL: test_mm_ceil_ss:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: roundss $10, %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_ceil_ss:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: roundss $10, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 10)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
|
||||
|
||||
define <2 x i64> @test_mm_cmpeq_epi64(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
; X32-LABEL: test_mm_cmpeq_epi64:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: pcmpeqq %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_cmpeq_epi64:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: pcmpeqq %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%cmp = icmp eq <2 x i64> %a0, %a1
|
||||
%res = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
; TODO test_mm_cvtepi8_epi16
|
||||
; TODO test_mm_cvtepi8_epi32
|
||||
; TODO test_mm_cvtepi8_epi64
|
||||
; TODO test_mm_cvtepi16_epi32
|
||||
; TODO test_mm_cvtepi16_epi64
|
||||
; TODO test_mm_cvtepi32_epi64
|
||||
|
||||
; TODO test_mm_cvtepu8_epi16
|
||||
; TODO test_mm_cvtepu8_epi32
|
||||
; TODO test_mm_cvtepu8_epi64
|
||||
; TODO test_mm_cvtepu16_epi32
|
||||
; TODO test_mm_cvtepu16_epi64
|
||||
; TODO test_mm_cvtepu32_epi64
|
||||
|
||||
define <2 x double> @test_mm_dp_pd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; X32-LABEL: test_mm_dp_pd:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: dppd $7, %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_dp_pd:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: dppd $7, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @test_mm_dp_ps(<4 x float> %a0, <4 x float> %a1) {
|
||||
; X32-LABEL: test_mm_dp_ps:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: dpps $7, %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_dp_ps:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: dpps $7, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
; TODO test_mm_extract_epi8
|
||||
; TODO test_mm_extract_epi32
|
||||
; TODO test_mm_extract_epi64
|
||||
; TODO test_mm_extract_ps
|
||||
|
||||
define <2 x double> @test_mm_floor_pd(<2 x double> %a0) {
|
||||
; X32-LABEL: test_mm_floor_pd:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: roundpd $9, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_floor_pd:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: roundpd $9, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 9)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm_floor_ps(<4 x float> %a0) {
|
||||
; X32-LABEL: test_mm_floor_ps:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: roundps $9, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_floor_ps:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: roundps $9, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 9)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_mm_floor_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; X32-LABEL: test_mm_floor_sd:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: roundsd $9, %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_floor_sd:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: roundsd $9, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 9)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm_floor_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; X32-LABEL: test_mm_floor_ss:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: roundss $9, %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_floor_ss:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: roundss $9, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 9)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
; TODO test_mm_insert_epi8
|
||||
; TODO test_mm_insert_epi32
|
||||
; TODO test_mm_insert_epi64
|
||||
; TODO test_mm_insert_ps
|
||||
|
||||
; TODO test_mm_max_epi8
|
||||
; TODO test_mm_max_epu16
|
||||
; TODO test_mm_max_epi32
|
||||
; TODO test_mm_max_epu32
|
||||
; TODO test_mm_min_epi8
|
||||
; TODO test_mm_min_epu16
|
||||
; TODO test_mm_min_epi32
|
||||
; TODO test_mm_min_epu32
|
||||
; TODO test_mm_minpos_epu16
|
||||
; TODO test_mm_mpsadbw_epu8
|
||||
; TODO test_mm_mul_epi32
|
||||
; TODO test_mm_mullo_epi32
|
||||
; TODO test_mm_packus_epi32
|
||||
|
||||
define <2 x double> @test_mm_round_pd(<2 x double> %a0) {
|
||||
; X32-LABEL: test_mm_round_pd:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: roundpd $2, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_round_pd:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: roundpd $2, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm_round_ps(<4 x float> %a0) {
|
||||
; X32-LABEL: test_mm_round_ps:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: roundps $2, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_round_ps:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: roundps $2, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_mm_round_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; X32-LABEL: test_mm_round_sd:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: roundsd $2, %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_round_sd:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: roundsd $2, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm_round_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; X32-LABEL: test_mm_round_ss:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: roundss $2, %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_round_ss:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: roundss $2, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
; TODO test_mm_stream_load_si128
|
||||
; TODO test_mm_test_all_ones
|
||||
; TODO test_mm_test_all_zeros
|
||||
; TODO test_mm_test_mix_ones_zeros
|
||||
; TODO test_mm_testc_si128
|
||||
; TODO test_mm_testnzc_si128
|
||||
; TODO test_mm_testz_si128
|
Loading…
Reference in New Issue
Block a user