From 1a519052e2e91f1024a92c60f44064c17bde63cd Mon Sep 17 00:00:00 2001 From: Tobias Edler von Koch Date: Wed, 16 Dec 2015 17:29:37 +0000 Subject: [PATCH] [Hexagon] Make memcpy lowering thread-safe This removes an unpleasant hack involving a global variable for special lowering of certain memcpy calls. These are now lowered as intended in EmitTargetCodeForMemcpy in the same way that other targets do it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255785 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelLowering.cpp | 7 +-- .../Hexagon/HexagonSelectionDAGInfo.cpp | 48 ++++++++++++++----- lib/Target/Hexagon/HexagonTargetMachine.h | 2 - test/CodeGen/Hexagon/memcpy-likely-aligned.ll | 32 +++++++++++++ 4 files changed, 69 insertions(+), 20 deletions(-) create mode 100644 test/CodeGen/Hexagon/memcpy-likely-aligned.ll diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index f82fe7699e8..09e40d454ba 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -819,12 +819,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. - if (flag_aligned_memcpy) { - const char *MemcpyName = - "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; - Callee = DAG.getTargetExternalSymbol(MemcpyName, PtrVT); - flag_aligned_memcpy = false; - } else if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT); } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp index 276cc69eed0..239dbda8f27 100644 --- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -12,12 +12,11 @@ //===----------------------------------------------------------------------===// #include "HexagonTargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" using namespace llvm; #define DEBUG_TYPE "hexagon-selectiondag-info" -bool llvm::flag_aligned_memcpy; - SDValue HexagonSelectionDAGInfo:: EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, @@ -25,15 +24,40 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { - flag_aligned_memcpy = false; - if ((Align & 0x3) == 0) { - ConstantSDNode *ConstantSize = dyn_cast(Size); - if (ConstantSize) { - uint64_t SizeVal = ConstantSize->getZExtValue(); - if ((SizeVal > 32) && ((SizeVal % 8) == 0)) - flag_aligned_memcpy = true; - } - } + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize) + return SDValue(); - return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (SizeVal < 32 || (SizeVal % 8) != 0) + return SDValue(); + + // Special case aligned memcpys with size >= 32 bytes and a multiple of 8. + // + const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + Entry.Node = Src; + Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + + const char *SpecialMemcpyName = + "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY), + Type::getVoidTy(*DAG.getContext()), + DAG.getTargetExternalSymbol( + SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0) + .setDiscardResult(); + + std::pair CallResult = TLI.LowerCallTo(CLI); + return CallResult.second; } diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index b61f6bc92f7..b6b99992432 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -44,8 +44,6 @@ public: } }; -extern bool flag_aligned_memcpy; - } // end namespace llvm #endif diff --git a/test/CodeGen/Hexagon/memcpy-likely-aligned.ll b/test/CodeGen/Hexagon/memcpy-likely-aligned.ll new file mode 100644 index 00000000000..f2677efc304 --- /dev/null +++ b/test/CodeGen/Hexagon/memcpy-likely-aligned.ll @@ -0,0 +1,32 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; CHECK: __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes + +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32" +target triple = "hexagon-unknown-linux-gnu" + +%struct.e = type { i8, i8, [2 x i8] } +%struct.s = type { i8* } +%struct.o = type { %struct.n } +%struct.n = type { [2 x %struct.l] } +%struct.l = type { %struct.e, %struct.d, %struct.e } +%struct.d = type <{ i8, i8, i8, i8, [2 x i8], [2 x i8] }> + +@y = global { <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } }> } { <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } }> <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } { %struct.e { i8 3, i8 0, [2 x i8] undef }, { i8, i8, i8, [5 x i8] } { i8 -47, i8 2, i8 0, [5 x i8] undef }, %struct.e { i8 3, i8 0, [2 x i8] undef } }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } { %struct.e { i8 3, i8 0, [2 x i8] undef }, { i8, i8, i8, [5 x i8] } { i8 -47, i8 2, i8 0, [5 x i8] undef }, %struct.e { i8 3, i8 0, [2 x i8] undef } } }> }, align 4 +@t = common global %struct.s zeroinitializer, align 4 +@q = internal global %struct.o* null, align 4 + +define void @foo() nounwind { +entry: + %0 = load i8*, i8** getelementptr inbounds (%struct.s, %struct.s* @t, i32 0, i32 0), align 4 + %1 = bitcast i8* %0 to %struct.o* + store %struct.o* %1, %struct.o** @q, align 4 + %2 = load %struct.o*, %struct.o** @q, align 4 + %p = getelementptr inbounds %struct.o, %struct.o* %2, i32 0, i32 0 + %m = getelementptr inbounds %struct.n, %struct.n* %p, i32 0, i32 0 + %arraydecay = getelementptr inbounds [2 x %struct.l], [2 x %struct.l]* %m, i32 0, i32 0 + %3 = bitcast %struct.l* %arraydecay to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* getelementptr inbounds ({ <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } }> }, { <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } }> }* @y, i32 0, i32 0, i32 0, i32 0, i32 0), i32 32, i32 4, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind