From 05ded2973817914d600effadac0aa75504a6c4ef Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 3 Dec 2008 19:38:05 +0000 Subject: [PATCH] Use mmx (punpckldq VR64, (mmx_v_set0)) to clear high 32-bits of a VR64 register. llvm-svn: 60499 --- lib/Target/X86/X86InstrMMX.td | 16 +++++++++++++--- test/CodeGen/X86/mmx-vzmovl-2.ll | 24 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/X86/mmx-vzmovl-2.ll diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 3bd9af5a9f4..fd708013b7c 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -587,11 +587,21 @@ let AddedComplexity = 15 in { let AddedComplexity = 20 in { def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; + (MMX_MOVZDI2PDIrm addr:$src)>; def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; + (MMX_MOVZDI2PDIrm addr:$src)>; def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; + (MMX_MOVZDI2PDIrm addr:$src)>; +} + +// Clear top half. +let AddedComplexity = 15 in { + def : Pat<(v8i8 (X86vzmovl VR64:$src)), + (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; + def : Pat<(v4i16 (X86vzmovl VR64:$src)), + (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; + def : Pat<(v2i32 (X86vzmovl VR64:$src)), + (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; } // Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower diff --git a/test/CodeGen/X86/mmx-vzmovl-2.ll b/test/CodeGen/X86/mmx-vzmovl-2.ll new file mode 100644 index 00000000000..f0b5cc3d808 --- /dev/null +++ b/test/CodeGen/X86/mmx-vzmovl-2.ll @@ -0,0 +1,24 @@ +; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep pxor +; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep punpckldq + + %struct.vS1024 = type { [8 x <4 x i32>] } + %struct.vS512 = type { [4 x <4 x i32>] } + +declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone + +define void @t() nounwind { +entry: + br label %bb554 + +bb554: ; preds = %bb554, %entry + %sum.0.reg2mem.0 = phi <1 x i64> [ %tmp562, %bb554 ], [ zeroinitializer, %entry ] ; <<1 x i64>> [#uses=1] + %0 = load <1 x i64>* null, align 8 ; <<1 x i64>> [#uses=2] + %1 = bitcast <1 x i64> %0 to <2 x i32> ; <<2 x i32>> [#uses=1] + %tmp555 = and <2 x i32> %1, < i32 -1, i32 0 > ; <<2 x i32>> [#uses=1] + %2 = bitcast <2 x i32> %tmp555 to <1 x i64> ; <<1 x i64>> [#uses=1] + %3 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %0, i32 32) nounwind readnone ; <<1 x i64>> [#uses=1] + %tmp558 = add <1 x i64> %sum.0.reg2mem.0, %2 ; <<1 x i64>> [#uses=1] + %4 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %tmp558, i32 32) nounwind readnone ; <<1 x i64>> [#uses=1] + %tmp562 = add <1 x i64> %4, %3 ; <<1 x i64>> [#uses=1] + br label %bb554 +}