Adds alignment information to the mem loadstore IR ops

Allows us to not always emit unaligned loadstore ops for these
This commit is contained in:
Ryan Houdek 2019-11-04 08:32:02 -08:00 committed by Stefanos Kornilios Mitsis Poiitidis
parent f697da77b6
commit 9e132e107a
5 changed files with 232 additions and 226 deletions

View File

@ -1174,7 +1174,10 @@ void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView<true> const
}
break;
case 16: {
movups(GetDst(Node), xword [rax]);
if (Op->Size == Op->Align)
movaps(GetDst(Node), xword [rax]);
else
movups(GetDst(Node), xword [rax]);
if (MemoryDebug) {
movq(rcx, GetDst(Node));
}
@ -1204,7 +1207,10 @@ void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView<true> const
mov(qword [rax], GetSrc<RA_64>(Op->Header.Args[1].ID()));
break;
case 16:
movups(xword [rax], GetSrc(Op->Header.Args[1].ID()));
if (Op->Size == Op->Align)
movaps(xword [rax], GetSrc(Op->Header.Args[1].ID()));
else
movups(xword [rax], GetSrc(Op->Header.Args[1].ID()));
break;
default: LogMan::Msg::A("Unhandled StoreMem size: %d", Op->Size);
}

View File

@ -55,8 +55,8 @@ private:
void HandleIR(FEXCore::IR::IRListView<true> const *IR, IR::NodeWrapperIterator *Node);
llvm::Value *CreateContextGEP(uint64_t Offset, uint8_t Size);
llvm::Value *CreateContextPtr(uint64_t Offset, uint8_t Size);
llvm::Value *CreateMemoryLoad(llvm::Value *Ptr);
void CreateMemoryStore(llvm::Value *Ptr, llvm::Value *Val);
llvm::Value *CreateMemoryLoad(llvm::Value *Ptr, uint8_t Align);
void CreateMemoryStore(llvm::Value *Ptr, llvm::Value *Val, uint8_t Align);
void ValidateMemoryInVM(uint64_t Ptr, uint8_t Size, bool Load);
template<typename Type>
@ -273,7 +273,7 @@ void LLVMJITCore::MemoryStore_Validate(uint64_t Ptr, Type Val) {
LogMan::Msg::D("\tStoring: 0x%016lx", Data);
}
llvm::Value *LLVMJITCore::CreateMemoryLoad(llvm::Value *Ptr) {
llvm::Value *LLVMJITCore::CreateMemoryLoad(llvm::Value *Ptr, uint8_t Align) {
if (CTX->Config.LLVM_MemoryValidation) {
std::vector<llvm::Value*> Args;
Args.emplace_back(JITState.IRBuilder->getInt64(reinterpret_cast<uint64_t>(this)));
@ -290,10 +290,10 @@ llvm::Value *LLVMJITCore::CreateMemoryLoad(llvm::Value *Ptr) {
}
}
return JITState.IRBuilder->CreateLoad(Ptr);
return JITState.IRBuilder->CreateAlignedLoad(Ptr, Align);
}
void LLVMJITCore::CreateMemoryStore(llvm::Value *Ptr, llvm::Value *Val) {
void LLVMJITCore::CreateMemoryStore(llvm::Value *Ptr, llvm::Value *Val, uint8_t Align) {
if (CTX->Config.LLVM_MemoryValidation) {
std::vector<llvm::Value*> Args;
Args.emplace_back(JITState.IRBuilder->getInt64(reinterpret_cast<uint64_t>(this)));
@ -312,7 +312,7 @@ void LLVMJITCore::CreateMemoryStore(llvm::Value *Ptr, llvm::Value *Val) {
return;
}
JITState.IRBuilder->CreateStore(Val, Ptr);
JITState.IRBuilder->CreateAlignedStore(Val, Ptr, Align);
}
@ -1701,7 +1701,7 @@ void LLVMJITCore::HandleIR(FEXCore::IR::IRListView<true> const *IR, IR::NodeWrap
Src = JITState.IRBuilder->CreateAdd(Src, JITState.IRBuilder->getInt64(CTX->MemoryMapper.GetBaseOffset<uint64_t>(0)));
// Cast the pointer type correctly
Src = JITState.IRBuilder->CreateIntToPtr(Src, Type::getIntNTy(*Con, Op->Size * 8)->getPointerTo());
auto Result = CreateMemoryLoad(Src);
auto Result = CreateMemoryLoad(Src, Op->Align);
SetDest(*WrapperOp, Result);
break;
}
@ -1715,7 +1715,7 @@ void LLVMJITCore::HandleIR(FEXCore::IR::IRListView<true> const *IR, IR::NodeWrap
auto Type = Type::getIntNTy(*Con, Op->Size * 8);
Src = JITState.IRBuilder->CreateZExtOrTrunc(Src, Type);
Dst = JITState.IRBuilder->CreateIntToPtr(Dst, Type->getPointerTo());
CreateMemoryStore(Dst, Src);
CreateMemoryStore(Dst, Src, Op->Align);
break;
}
case IR::OP_DUMMY:

File diff suppressed because it is too large Load Diff

View File

@ -216,66 +216,64 @@ public:
return Op;
}
IRPair<IROp_Bfe> _Bfe(uint8_t Width, uint8_t lsb, OrderedNode *ssa0) {
IRPair<IROp_Bfe> _Bfe(uint8_t Width, uint8_t lsb, OrderedNode *ssa0) {
return _Bfe(ssa0, Width, lsb);
}
IRPair<IROp_Bfi> _Bfi(uint8_t Width, uint8_t lsb, OrderedNode *ssa0, OrderedNode *ssa1) {
IRPair<IROp_Bfi> _Bfi(uint8_t Width, uint8_t lsb, OrderedNode *ssa0, OrderedNode *ssa1) {
return _Bfi(ssa0, ssa1, Width, lsb);
}
IRPair<IROp_StoreMem> _StoreMem(uint8_t Size, OrderedNode *ssa0, OrderedNode *ssa1) {
return _StoreMem(ssa0, ssa1, Size);
IRPair<IROp_StoreMem> _StoreMem(uint8_t Size, OrderedNode *ssa0, OrderedNode *ssa1, uint8_t Align = 1) {
return _StoreMem(ssa0, ssa1, Size, Align);
}
IRPair<IROp_LoadMem> _LoadMem(uint8_t Size, OrderedNode *ssa0) {
return _LoadMem(ssa0, Size);
IRPair<IROp_LoadMem> _LoadMem(uint8_t Size, OrderedNode *ssa0, uint8_t Align = 1) {
return _LoadMem(ssa0, Size, Align);
}
IRPair<IROp_StoreContext> _StoreContext(uint8_t Size, uint32_t Offset, OrderedNode *ssa0) {
IRPair<IROp_StoreContext> _StoreContext(uint8_t Size, uint32_t Offset, OrderedNode *ssa0) {
return _StoreContext(ssa0, Size, Offset);
}
IRPair<IROp_Select> _Select(uint8_t Cond, OrderedNode *ssa0, OrderedNode *ssa1, OrderedNode *ssa2, OrderedNode *ssa3) {
IRPair<IROp_Select> _Select(uint8_t Cond, OrderedNode *ssa0, OrderedNode *ssa1, OrderedNode *ssa2, OrderedNode *ssa3) {
return _Select(ssa0, ssa1, ssa2, ssa3, {Cond});
}
IRPair<IROp_Sext> _Sext(uint8_t SrcSize, OrderedNode *ssa0) {
IRPair<IROp_Sext> _Sext(uint8_t SrcSize, OrderedNode *ssa0) {
return _Sext(ssa0, SrcSize);
}
IRPair<IROp_Zext> _Zext(uint8_t SrcSize, OrderedNode *ssa0) {
IRPair<IROp_Zext> _Zext(uint8_t SrcSize, OrderedNode *ssa0) {
return _Zext(ssa0, SrcSize);
}
IRPair<IROp_VInsElement> _VInsElement(uint8_t RegisterSize, uint8_t ElementSize, uint8_t DestIdx, uint8_t SrcIdx, OrderedNode *ssa0, OrderedNode *ssa1) {
IRPair<IROp_VInsElement> _VInsElement(uint8_t RegisterSize, uint8_t ElementSize, uint8_t DestIdx, uint8_t SrcIdx, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VInsElement(ssa0, ssa1, RegisterSize, ElementSize, DestIdx, SrcIdx);
}
IRPair<IROp_VAdd> _VAdd(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
IRPair<IROp_VAdd> _VAdd(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VAdd(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VSub> _VSub(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
IRPair<IROp_VSub> _VSub(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VSub(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VUMin> _VUMin(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VUMin(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VSMin> _VSMin(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
IRPair<IROp_VSMin> _VSMin(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VSMin(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VZip> _VZip(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
IRPair<IROp_VZip> _VZip(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VZip(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VZip2> _VZip2(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
IRPair<IROp_VZip2> _VZip2(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VZip2(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VCMPEQ> _VCMPEQ(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
IRPair<IROp_VCMPEQ> _VCMPEQ(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VCMPEQ(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VCMPGT> _VCMPGT(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
IRPair<IROp_VCMPGT> _VCMPGT(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VCMPGT(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VUShl> _VUShl(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
IRPair<IROp_VUShl> _VUShl(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VUShl(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VUShlS> _VUShlS(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
IRPair<IROp_VUShlS> _VUShlS(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VUShlS(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VUShr> _VUShr(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
IRPair<IROp_VUShr> _VUShr(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VUShr(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VExtr> _VExtr(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1, uint8_t Index) {
@ -460,11 +458,11 @@ private:
void RemoveArgUses(OrderedNode *Node);
bool DecodeFailure{false};
OrderedNode *LoadSource(FEXCore::X86Tables::DecodedOp const& Op, FEXCore::X86Tables::DecodedOperand const& Operand, uint32_t Flags, bool LoadData = true, bool ForceLoad = false);
OrderedNode *LoadSource_WithOpSize(FEXCore::X86Tables::DecodedOp const& Op, FEXCore::X86Tables::DecodedOperand const& Operand, uint8_t OpSize, uint32_t Flags, bool LoadData = true, bool ForceLoad = false);
void StoreResult_WithOpSize(FEXCore::X86Tables::DecodedOp Op, FEXCore::X86Tables::DecodedOperand const& Operand, OrderedNode *const Src, uint8_t OpSize);
void StoreResult(FEXCore::X86Tables::DecodedOp Op, FEXCore::X86Tables::DecodedOperand const& Operand, OrderedNode *const Src);
void StoreResult(FEXCore::X86Tables::DecodedOp Op, OrderedNode *const Src);
OrderedNode *LoadSource(FEXCore::X86Tables::DecodedOp const& Op, FEXCore::X86Tables::DecodedOperand const& Operand, uint32_t Flags, int8_t Align, bool LoadData = true, bool ForceLoad = false);
OrderedNode *LoadSource_WithOpSize(FEXCore::X86Tables::DecodedOp const& Op, FEXCore::X86Tables::DecodedOperand const& Operand, uint8_t OpSize, uint32_t Flags, int8_t Align, bool LoadData = true, bool ForceLoad = false);
void StoreResult_WithOpSize(FEXCore::X86Tables::DecodedOp Op, FEXCore::X86Tables::DecodedOperand const& Operand, OrderedNode *const Src, uint8_t OpSize, int8_t Align);
void StoreResult(FEXCore::X86Tables::DecodedOp Op, FEXCore::X86Tables::DecodedOperand const& Operand, OrderedNode *const Src, int8_t Align);
void StoreResult(FEXCore::X86Tables::DecodedOp Op, OrderedNode *const Src, int8_t Align);
uint8_t GetDstSize(FEXCore::X86Tables::DecodedOp Op);
uint8_t GetSrcSize(FEXCore::X86Tables::DecodedOp Op);

View File

@ -145,14 +145,16 @@
"DestSize": "Size",
"SSAArgs": "1",
"Args": [
"uint8_t", "Size"
"uint8_t", "Size",
"uint8_t", "Align"
]
},
"StoreMem": {
"SSAArgs": "2",
"Args": [
"uint8_t", "Size"
"uint8_t", "Size",
"uint8_t", "Align"
]
},