mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-13 19:24:21 +00:00
Add an SSE2 algorithm for uint64->f64 conversion.
The same one Apple gcc uses, faster. Also gets the extreme case in gcc.c-torture/execute/ieee/rbug.c correct which we weren't before; this is not sufficient to get the test to pass though, there is another bug. llvm-svn: 57926
This commit is contained in:
parent
8b44b88eff
commit
28929589e7
@ -5473,6 +5473,22 @@ ExpandIntToFP(bool isSigned, MVT DestTy, SDValue Source) {
|
||||
Hi = Source;
|
||||
}
|
||||
|
||||
// Check to see if the target has a custom way to lower this. If so, use it.
|
||||
// (Note we've already expanded the operand in this case.)
|
||||
switch (TLI.getOperationAction(ISD::UINT_TO_FP, SourceVT)) {
|
||||
default: assert(0 && "This action not implemented for this operation!");
|
||||
case TargetLowering::Legal:
|
||||
case TargetLowering::Expand:
|
||||
break; // This case is handled below.
|
||||
case TargetLowering::Custom: {
|
||||
SDValue NV = TLI.LowerOperation(DAG.getNode(ISD::UINT_TO_FP, DestTy,
|
||||
Source), DAG);
|
||||
if (NV.getNode())
|
||||
return LegalizeOp(NV);
|
||||
break; // The target decided this was legal after all
|
||||
}
|
||||
}
|
||||
|
||||
// If this is unsigned, and not supported, first perform the conversion to
|
||||
// signed, then adjust the result if the sign bit is set.
|
||||
SDValue SignedConv = ExpandIntToFP(true, DestTy, Source);
|
||||
|
@ -112,10 +112,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
|
||||
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
|
||||
} else {
|
||||
if (X86ScalarSSEf64)
|
||||
if (X86ScalarSSEf64) {
|
||||
// We have an impenetrably clever algorithm for ui64->double only.
|
||||
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
|
||||
// If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
|
||||
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
|
||||
else
|
||||
} else
|
||||
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
|
||||
}
|
||||
|
||||
@ -4686,6 +4688,70 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
|
||||
return Result;
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
|
||||
MVT SrcVT = Op.getOperand(0).getValueType();
|
||||
assert(SrcVT.getSimpleVT() == MVT::i64 && "Unknown UINT_TO_FP to lower!");
|
||||
|
||||
// We only handle SSE2 f64 target here; caller can handle the rest.
|
||||
if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
|
||||
return SDValue();
|
||||
|
||||
// Get a XMM-vector-sized stack slot.
|
||||
unsigned Size = 128/8;
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
|
||||
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
|
||||
|
||||
// Build some magic constants.
|
||||
std::vector<Constant*>CV0;
|
||||
CV0.push_back(ConstantInt::get(APInt(32, 0x45300000)));
|
||||
CV0.push_back(ConstantInt::get(APInt(32, 0x43300000)));
|
||||
CV0.push_back(ConstantInt::get(APInt(32, 0)));
|
||||
CV0.push_back(ConstantInt::get(APInt(32, 0)));
|
||||
Constant *C0 = ConstantVector::get(CV0);
|
||||
SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 4);
|
||||
|
||||
std::vector<Constant*>CV1;
|
||||
CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL))));
|
||||
CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL))));
|
||||
Constant *C1 = ConstantVector::get(CV1);
|
||||
SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 4);
|
||||
|
||||
SmallVector<SDValue, 4> MaskVec;
|
||||
MaskVec.push_back(DAG.getConstant(0, MVT::i32));
|
||||
MaskVec.push_back(DAG.getConstant(4, MVT::i32));
|
||||
MaskVec.push_back(DAG.getConstant(1, MVT::i32));
|
||||
MaskVec.push_back(DAG.getConstant(5, MVT::i32));
|
||||
SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0],
|
||||
MaskVec.size());
|
||||
SmallVector<SDValue, 4> MaskVec2;
|
||||
MaskVec2.push_back(DAG.getConstant(1, MVT::i64));
|
||||
MaskVec2.push_back(DAG.getConstant(0, MVT::i64));
|
||||
SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, &MaskVec2[0],
|
||||
MaskVec2.size());
|
||||
|
||||
SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32,
|
||||
Op.getOperand(0).getOperand(1));
|
||||
SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32,
|
||||
Op.getOperand(0).getOperand(0));
|
||||
SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32,
|
||||
XR1, XR2, UnpcklMask);
|
||||
SDValue CLod0 = DAG.getLoad(MVT::v4i32, DAG.getEntryNode(), CPIdx0,
|
||||
PseudoSourceValue::getConstantPool(), 0, false, 16);
|
||||
SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32,
|
||||
Unpck1, CLod0, UnpcklMask);
|
||||
SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Unpck2);
|
||||
SDValue CLod1 = DAG.getLoad(MVT::v2f64, CLod0.getValue(1), CPIdx1,
|
||||
PseudoSourceValue::getConstantPool(), 0, false, 16);
|
||||
SDValue Sub = DAG.getNode(ISD::FSUB, MVT::v2f64, XR2F, CLod1);
|
||||
// Add the halves; easiest way is to swap them into another reg first.
|
||||
SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2f64,
|
||||
Sub, Sub, ShufMask);
|
||||
SDValue Add = DAG.getNode(ISD::FADD, MVT::v2f64, Shuf, Sub);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64, Add,
|
||||
DAG.getIntPtrConstant(0));
|
||||
}
|
||||
|
||||
std::pair<SDValue,SDValue> X86TargetLowering::
|
||||
FP_TO_SINTHelper(SDValue Op, SelectionDAG &DAG) {
|
||||
assert(Op.getValueType().getSimpleVT() <= MVT::i64 &&
|
||||
@ -6184,6 +6250,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
|
||||
case ISD::SRA_PARTS:
|
||||
case ISD::SRL_PARTS: return LowerShift(Op, DAG);
|
||||
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
|
||||
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
|
||||
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
|
||||
case ISD::FABS: return LowerFABS(Op, DAG);
|
||||
case ISD::FNEG: return LowerFNEG(Op, DAG);
|
||||
|
@ -558,6 +558,7 @@ namespace llvm {
|
||||
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerShift(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerFABS(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG);
|
||||
|
Loading…
Reference in New Issue
Block a user