mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-04-13 03:41:11 +00:00
[X86] Allow _MM_FROUND_CUR_DIRECTION and _MM_FROUND_NO_EXC to be used together on instructions that only support SAE and not embedded rounding.
Current for SAE instructions we only allow _MM_FROUND_CUR_DIRECTION(bit 2) or _MM_FROUND_NO_EXC(bit 3) to be used as the immediate passed to the inrinsics. But these instructions don't perform rounding so _MM_FROUND_CUR_DIRECTION is just sort of a default placeholder when you don't want to suppress exceptions. Using _MM_FROUND_NO_EXC by itself is really bit equivalent to (_MM_FROUND_NO_EXC | _MM_FROUND_TO_NEAREST_INT) since _MM_FROUND_TO_NEAREST_INT is 0. Since we aren't rounding on these instructions we should also accept (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) as equivalent to (_MM_FROUND_NO_EXC). icc allows this, but gcc does not. Differential Revision: https://reviews.llvm.org/D67289 llvm-svn: 371430
This commit is contained in:
parent
a85d9ef11a
commit
ce2cb0f09e
@ -3546,9 +3546,11 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
|
||||
|
||||
// Make sure rounding mode is either ROUND_CUR_DIRECTION or ROUND_NO_EXC bit
|
||||
// is set. If the intrinsic has rounding control(bits 1:0), make sure its only
|
||||
// combined with ROUND_NO_EXC.
|
||||
// combined with ROUND_NO_EXC. If the intrinsic does not have rounding
|
||||
// control, allow ROUND_NO_EXC and ROUND_CUR_DIRECTION together.
|
||||
if (Result == 4/*ROUND_CUR_DIRECTION*/ ||
|
||||
Result == 8/*ROUND_NO_EXC*/ ||
|
||||
(!HasRC && Result == 12/*ROUND_CUR_DIRECTION|ROUND_NO_EXC*/) ||
|
||||
(HasRC && Result.getZExtValue() >= 8 && Result.getZExtValue() <= 11))
|
||||
return false;
|
||||
|
||||
|
@ -81,6 +81,19 @@ __mmask16 test__builtin_ia32_cmpps512_mask_rounding(__m512 __a, __m512 __b, __mm
|
||||
return __builtin_ia32_cmpps512_mask(__a, __b, 0, __u, 0); // expected-error {{invalid rounding argument}}
|
||||
}
|
||||
|
||||
// Make sure we allow 4(CUR_DIRECTION), 8(NO_EXC), and 12(CUR_DIRECTION|NOEXC) for SAE arguments.
|
||||
__mmask16 test__builtin_ia32_cmpps512_mask_rounding_cur_dir(__m512 __a, __m512 __b, __mmask16 __u) {
|
||||
return __builtin_ia32_cmpps512_mask(__a, __b, 0, __u, 4); // no-error
|
||||
}
|
||||
|
||||
__mmask16 test__builtin_ia32_cmpps512_mask_rounding_sae1(__m512 __a, __m512 __b, __mmask16 __u) {
|
||||
return __builtin_ia32_cmpps512_mask(__a, __b, 0, __u, 8); // no-error
|
||||
}
|
||||
|
||||
__mmask16 test__builtin_ia32_cmpps512_mask_rounding_sae2(__m512 __a, __m512 __b, __mmask16 __u) {
|
||||
return __builtin_ia32_cmpps512_mask(__a, __b, 0, __u, 12); // no-error
|
||||
}
|
||||
|
||||
__m512 test__builtin_ia32_getmantps512_mask(__m512 a, __m512 b) {
|
||||
return __builtin_ia32_getmantps512_mask(a, 0, b, (__mmask16)-1, 10); // expected-error {{invalid rounding argument}}
|
||||
}
|
||||
|
@ -22706,8 +22706,16 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
return false;
|
||||
};
|
||||
auto isRoundModeSAE = [](SDValue Rnd) {
|
||||
if (auto *C = dyn_cast<ConstantSDNode>(Rnd))
|
||||
return C->getAPIntValue() == X86::STATIC_ROUNDING::NO_EXC;
|
||||
if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) {
|
||||
unsigned RC = C->getZExtValue();
|
||||
if (RC & X86::STATIC_ROUNDING::NO_EXC) {
|
||||
// Clear the NO_EXC bit and check remaining bits.
|
||||
RC ^= X86::STATIC_ROUNDING::NO_EXC;
|
||||
// As a convenience we allow no other bits or explicitly
|
||||
// current direction.
|
||||
return RC == 0 || RC == X86::STATIC_ROUNDING::CUR_DIRECTION;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
@ -755,7 +755,7 @@ define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vgetexppd {sae}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 12)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
Loading…
x
Reference in New Issue
Block a user