[X86] Make _mm_mask_cvtps_ph, _mm_maskz_cvtps_ph, _mm256_mask_cvtps_ph, and _mm256_maskz_cvtps_ph aliases for their corresponding cvt_roundps_ph intrinsic.

These intrinsics should always take an immediate for the rounding mode.
The base instruction comes from before EVEX embdedded rounding. The
user should always provide the immediate rather than us assuming
CUR_DIRECTION.

Make the 512-bit versions also explicit aliases instead of copy
pasting the code.

llvm-svn: 363961
This commit is contained in:
Craig Topper 2019-06-20 18:24:29 +00:00
parent 73dbe9d517
commit 6d9fb68c53
3 changed files with 12 additions and 48 deletions

View File

@ -3779,20 +3779,9 @@ _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
(__v16hi)_mm256_setzero_si256(), \ (__v16hi)_mm256_setzero_si256(), \
(__mmask16)(W)) (__mmask16)(W))
#define _mm512_cvtps_ph(A, I) \ #define _mm512_cvtps_ph _mm512_cvt_roundps_ph
(__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ #define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
(__v16hi)_mm256_setzero_si256(), \ #define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
(__mmask16)-1)
#define _mm512_mask_cvtps_ph(U, W, A, I) \
(__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
(__v16hi)(__m256i)(U), \
(__mmask16)(W))
#define _mm512_maskz_cvtps_ph(W, A, I) \
(__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
(__v16hi)_mm256_setzero_si256(), \
(__mmask16)(W))
#define _mm512_cvt_roundph_ps(A, R) \ #define _mm512_cvt_roundph_ps(A, R) \
(__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \

View File

@ -8411,22 +8411,6 @@ _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
(__mmask8) __U); (__mmask8) __U);
} }
static __inline __m128i __DEFAULT_FN_ATTRS128
_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
{
return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
(__v8hi) __W,
(__mmask8) __U);
}
static __inline __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
{
return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
(__v8hi) _mm_setzero_si128 (),
(__mmask8) __U);
}
#define _mm_mask_cvt_roundps_ph(W, U, A, I) \ #define _mm_mask_cvt_roundps_ph(W, U, A, I) \
(__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
(__v8hi)(__m128i)(W), \ (__v8hi)(__m128i)(W), \
@ -8437,21 +8421,9 @@ _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
(__v8hi)_mm_setzero_si128(), \ (__v8hi)_mm_setzero_si128(), \
(__mmask8)(U)) (__mmask8)(U))
static __inline __m128i __DEFAULT_FN_ATTRS256 #define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph
_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A) #define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
{
return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
(__v8hi) __W,
(__mmask8) __U);
}
static __inline __m128i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
{
return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
(__v8hi) _mm_setzero_si128(),
(__mmask8) __U);
}
#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \ #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
(__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
(__v8hi)(__m128i)(W), \ (__v8hi)(__m128i)(W), \
@ -8462,6 +8434,9 @@ _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
(__v8hi)_mm_setzero_si128(), \ (__v8hi)_mm_setzero_si128(), \
(__mmask8)(U)) (__mmask8)(U))
#define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph
#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
#undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256 #undef __DEFAULT_FN_ATTRS256

View File

@ -9726,25 +9726,25 @@ __m256 test_mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A) {
__m128i test_mm_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m128 __A) { __m128i test_mm_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m128 __A) {
// CHECK-LABEL: @test_mm_mask_cvtps_ph // CHECK-LABEL: @test_mm_mask_cvtps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128
return _mm_mask_cvtps_ph(__W, __U, __A); return _mm_mask_cvtps_ph(__W, __U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
} }
__m128i test_mm_maskz_cvtps_ph(__mmask8 __U, __m128 __A) { __m128i test_mm_maskz_cvtps_ph(__mmask8 __U, __m128 __A) {
// CHECK-LABEL: @test_mm_maskz_cvtps_ph // CHECK-LABEL: @test_mm_maskz_cvtps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128
return _mm_maskz_cvtps_ph(__U, __A); return _mm_maskz_cvtps_ph(__U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
} }
__m128i test_mm256_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m256 __A) { __m128i test_mm256_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m256 __A) {
// CHECK-LABEL: @test_mm256_mask_cvtps_ph // CHECK-LABEL: @test_mm256_mask_cvtps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256
return _mm256_mask_cvtps_ph(__W, __U, __A); return _mm256_mask_cvtps_ph(__W, __U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
} }
__m128i test_mm256_maskz_cvtps_ph(__mmask8 __U, __m256 __A) { __m128i test_mm256_maskz_cvtps_ph(__mmask8 __U, __m256 __A) {
// CHECK-LABEL: @test_mm256_maskz_cvtps_ph // CHECK-LABEL: @test_mm256_maskz_cvtps_ph
// CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256
return _mm256_maskz_cvtps_ph(__U, __A); return _mm256_maskz_cvtps_ph(__U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
} }
__m128i test_mm_mask_cvt_roundps_ph(__m128i __W, __mmask8 __U, __m128 __A) { __m128i test_mm_mask_cvt_roundps_ph(__m128i __W, __mmask8 __U, __m128 __A) {