From d1c0859c87e859070db16f91f3288e168fe053e3 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Wed, 20 Mar 2013 00:10:32 +0000 Subject: [PATCH] Propagate DAG node ordering during type legalization and instruction selection A node's ordering is only propagated during legalization if (a) the new node does not have an ordering (is not a CSE'd node), or (b) the new node has an ordering that is higher than the node being legalized. llvm-svn: 177465 --- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 40 ++++++++++++++++++- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 4 +- test/CodeGen/NVPTX/sched1.ll | 31 ++++++++++++++ test/CodeGen/NVPTX/sched2.ll | 32 +++++++++++++++ test/CodeGen/NVPTX/vector-loads.ll | 16 ++++---- test/CodeGen/X86/win_ftol2.ll | 12 +++--- 7 files changed, 120 insertions(+), 16 deletions(-) create mode 100644 test/CodeGen/NVPTX/sched1.ll create mode 100644 test/CodeGen/NVPTX/sched2.ll diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index e26d1656e87..1d6a981eecf 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -619,6 +619,17 @@ void DAGTypeLegalizer::RemapValue(SDValue &N) { } } +/// PropagateOrdering - Propagate SDNode ordering information from \p Old to +/// \p New. Generally, this just means copying the ordering value, but if the +/// new node is actually a recycled node with a lower ordering already, then +/// we do not want to propagate the new (higher) ordering. +void DAGTypeLegalizer::PropagateOrdering(SDNode *Old, SDNode *New) { + unsigned OldOrder = DAG.GetOrdering(Old); + unsigned NewOrder = DAG.GetOrdering(New); + if (NewOrder == 0 || (NewOrder > 0 && OldOrder < NewOrder)) + DAG.AssignOrdering(New, OldOrder); +} + namespace { /// NodeUpdateListener - This class is a DAGUpdateListener that listens for /// updates to nodes and recomputes their ready state. @@ -735,6 +746,9 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { SDValue &OpEntry = PromotedIntegers[Op]; assert(OpEntry.getNode() == 0 && "Node is already promoted!"); OpEntry = Result; + + // Propagate node ordering + PropagateOrdering(Op.getNode(), Result.getNode()); } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { @@ -746,6 +760,9 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { SDValue &OpEntry = SoftenedFloats[Op]; assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); OpEntry = Result; + + // Propagate node ordering + PropagateOrdering(Op.getNode(), Result.getNode()); } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { @@ -760,6 +777,9 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { SDValue &OpEntry = ScalarizedVectors[Op]; assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); OpEntry = Result; + + // Propagate node ordering + PropagateOrdering(Op.getNode(), Result.getNode()); } void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, @@ -787,6 +807,10 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + PropagateOrdering(Op.getNode(), Lo.getNode()); + PropagateOrdering(Op.getNode(), Hi.getNode()); } void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, @@ -814,6 +838,10 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + PropagateOrdering(Op.getNode(), Lo.getNode()); + PropagateOrdering(Op.getNode(), Hi.getNode()); } void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, @@ -843,6 +871,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already split"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + PropagateOrdering(Op.getNode(), Lo.getNode()); + PropagateOrdering(Op.getNode(), Hi.getNode()); } void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { @@ -854,6 +886,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { SDValue &OpEntry = WidenedVectors[Op]; assert(OpEntry.getNode() == 0 && "Node already widened!"); OpEntry = Result; + + // Propagate node ordering + PropagateOrdering(Op.getNode(), Result.getNode()); } @@ -919,8 +954,11 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { // Make everything that once used N's values now use those in Results instead. assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); - for (unsigned i = 0, e = Results.size(); i != e; ++i) + for (unsigned i = 0, e = Results.size(); i != e; ++i) { ReplaceValueWith(SDValue(N, i), Results[i]); + // Propagate node ordering + DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N)); + } return true; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 27b3cf2abc9..b8b5ba8c9a9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -143,6 +143,7 @@ private: void ExpungeNode(SDNode *N); void PerformExpensiveChecks(); void RemapValue(SDValue &N); + void PropagateOrdering(SDNode *Old, SDNode *New); // Common routines. SDValue BitConvertToInteger(SDValue Op); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index c3b6276a8dc..15001f506b0 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -785,8 +785,10 @@ void SelectionDAGISel::DoInstructionSelection() { if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE) continue; // Replace node. - if (ResNode) + if (ResNode) { + CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node)); ReplaceUses(Node, ResNode); + } // If after the replacement this node is not used any more, // remove this dead node. diff --git a/test/CodeGen/NVPTX/sched1.ll b/test/CodeGen/NVPTX/sched1.ll new file mode 100644 index 00000000000..03ab635e73b --- /dev/null +++ b/test/CodeGen/NVPTX/sched1.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +; Ensure source scheduling is working + +define void @foo(i32* %a) { +; CHECK: .func foo +; CHECK: ld.u32 +; CHECK-NEXT: ld.u32 +; CHECK-NEXT: ld.u32 +; CHECK-NEXT: ld.u32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 + %ptr0 = getelementptr i32* %a, i32 0 + %val0 = load i32* %ptr0 + %ptr1 = getelementptr i32* %a, i32 1 + %val1 = load i32* %ptr1 + %ptr2 = getelementptr i32* %a, i32 2 + %val2 = load i32* %ptr2 + %ptr3 = getelementptr i32* %a, i32 3 + %val3 = load i32* %ptr3 + + %t0 = add i32 %val0, %val1 + %t1 = add i32 %t0, %val2 + %t2 = add i32 %t1, %val3 + + store i32 %t2, i32* %a + + ret void +} + diff --git a/test/CodeGen/NVPTX/sched2.ll b/test/CodeGen/NVPTX/sched2.ll new file mode 100644 index 00000000000..71a9a4963fa --- /dev/null +++ b/test/CodeGen/NVPTX/sched2.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +define void @foo(<2 x i32>* %a) { +; CHECK: .func foo +; CHECK: ld.v2.u32 +; CHECK-NEXT: ld.v2.u32 +; CHECK-NEXT: ld.v2.u32 +; CHECK-NEXT: ld.v2.u32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 + %ptr0 = getelementptr <2 x i32>* %a, i32 0 + %val0 = load <2 x i32>* %ptr0 + %ptr1 = getelementptr <2 x i32>* %a, i32 1 + %val1 = load <2 x i32>* %ptr1 + %ptr2 = getelementptr <2 x i32>* %a, i32 2 + %val2 = load <2 x i32>* %ptr2 + %ptr3 = getelementptr <2 x i32>* %a, i32 3 + %val3 = load <2 x i32>* %ptr3 + + %t0 = add <2 x i32> %val0, %val1 + %t1 = add <2 x i32> %t0, %val2 + %t2 = add <2 x i32> %t1, %val3 + + store <2 x i32> %t2, <2 x i32>* %a + + ret void +} + diff --git a/test/CodeGen/NVPTX/vector-loads.ll b/test/CodeGen/NVPTX/vector-loads.ll index f5a1795e3c2..58882bf1666 100644 --- a/test/CodeGen/NVPTX/vector-loads.ll +++ b/test/CodeGen/NVPTX/vector-loads.ll @@ -9,7 +9,7 @@ define void @foo(<2 x float>* %a) { ; CHECK: .func foo -; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}]; +; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}} %t1 = load <2 x float>* %a %t2 = fmul <2 x float> %t1, %t1 store <2 x float> %t2, <2 x float>* %a @@ -18,7 +18,7 @@ define void @foo(<2 x float>* %a) { define void @foo2(<4 x float>* %a) { ; CHECK: .func foo2 -; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}]; +; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} %t1 = load <4 x float>* %a %t2 = fmul <4 x float> %t1, %t1 store <4 x float> %t2, <4 x float>* %a @@ -27,8 +27,8 @@ define void @foo2(<4 x float>* %a) { define void @foo3(<8 x float>* %a) { ; CHECK: .func foo3 -; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}]; -; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}+16]; +; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} +; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} %t1 = load <8 x float>* %a %t2 = fmul <8 x float> %t1, %t1 store <8 x float> %t2, <8 x float>* %a @@ -39,7 +39,7 @@ define void @foo3(<8 x float>* %a) { define void @foo4(<2 x i32>* %a) { ; CHECK: .func foo4 -; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}]; +; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}} %t1 = load <2 x i32>* %a %t2 = mul <2 x i32> %t1, %t1 store <2 x i32> %t2, <2 x i32>* %a @@ -48,7 +48,7 @@ define void @foo4(<2 x i32>* %a) { define void @foo5(<4 x i32>* %a) { ; CHECK: .func foo5 -; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}]; +; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} %t1 = load <4 x i32>* %a %t2 = mul <4 x i32> %t1, %t1 store <4 x i32> %t2, <4 x i32>* %a @@ -57,8 +57,8 @@ define void @foo5(<4 x i32>* %a) { define void @foo6(<8 x i32>* %a) { ; CHECK: .func foo6 -; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}]; -; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}+16]; +; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} +; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} %t1 = load <8 x i32>* %a %t2 = mul <8 x i32> %t1, %t1 store <8 x i32> %t2, <8 x i32>* %a diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll index 7f8ae07463a..14591248f35 100644 --- a/test/CodeGen/X86/win_ftol2.ll +++ b/test/CodeGen/X86/win_ftol2.ll @@ -63,9 +63,9 @@ define i64 @double_ui64_2(double %x, double %y, double %z) nounwind { %1 = fdiv double %x, %y %2 = fsub double %x, %z - %3 = fptoui double %1 to i64 - %4 = fptoui double %2 to i64 - %5 = sub i64 %3, %4 + %3 = fptoui double %2 to i64 + %4 = fptoui double %1 to i64 + %5 = sub i64 %4, %3 ret i64 %5 } @@ -121,9 +121,9 @@ define {double, i64} @double_ui64_4(double %x, double %y) nounwind { ; FTOL_2: calll __ftol2 ;; stack is %x - %1 = fptoui double %x to i64 - %2 = fptoui double %y to i64 - %3 = sub i64 %1, %2 + %1 = fptoui double %y to i64 + %2 = fptoui double %x to i64 + %3 = sub i64 %2, %1 %4 = insertvalue {double, i64} undef, double %x, 0 %5 = insertvalue {double, i64} %4, i64 %3, 1 ret {double, i64} %5