diff --git a/include/llvm/IR/IntrinsicsMips.td b/include/llvm/IR/IntrinsicsMips.td index 42c58214151..11dbb25da80 100644 --- a/include/llvm/IR/IntrinsicsMips.td +++ b/include/llvm/IR/IntrinsicsMips.td @@ -1544,22 +1544,26 @@ def int_mips_shf_w : GCCBuiltin<"__builtin_msa_shf_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_sld_b : GCCBuiltin<"__builtin_msa_sld_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_sld_h : GCCBuiltin<"__builtin_msa_sld_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_sld_w : GCCBuiltin<"__builtin_msa_sld_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_sld_d : GCCBuiltin<"__builtin_msa_sld_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_sldi_b : GCCBuiltin<"__builtin_msa_sldi_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem]>; def int_mips_sldi_h : GCCBuiltin<"__builtin_msa_sldi_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem]>; def int_mips_sldi_w : GCCBuiltin<"__builtin_msa_sldi_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem]>; def int_mips_sldi_d : GCCBuiltin<"__builtin_msa_sldi_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem]>; def int_mips_sll_b : GCCBuiltin<"__builtin_msa_sll_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 7a588f6e950..59b771f70b2 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1261,13 +1261,15 @@ class MSA_COPY_DESC_BASE { +class MSA_ELM_SLD_DESC_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm4:$n); + dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, uimm4:$n); string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$n]"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt4:$n))]; + list Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, ROWS:$ws, + immZExt4:$n))]; + string Constraints = "$wd = $wd_in"; InstrItinClass Itinerary = itin; } @@ -1410,10 +1412,12 @@ class MSA_3R_SLD_DESC_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, GPR32:$rt); + dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, GPR32:$rt); string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$rt]"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, GPR32:$rt))]; + list Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, ROWS:$ws, + GPR32:$rt))]; InstrItinClass Itinerary = itin; + string Constraints = "$wd = $wd_in"; } class MSA_3R_4R_DESC_BASE Pattern = [(set ROWD:$wd, - (OpNode ROWD:$wd_in, ROWS:$ws, ROWT:$wt))]; + list Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, ROWS:$ws, + ROWT:$wt))]; InstrItinClass Itinerary = itin; string Constraints = "$wd = $wd_in"; } @@ -2495,10 +2499,14 @@ class SLD_H_DESC : MSA_3R_SLD_DESC_BASE<"sld.h", int_mips_sld_h, MSA128HOpnd>; class SLD_W_DESC : MSA_3R_SLD_DESC_BASE<"sld.w", int_mips_sld_w, MSA128WOpnd>; class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>; -class SLDI_B_DESC : MSA_ELM_DESC_BASE<"sldi.b", int_mips_sldi_b, MSA128BOpnd>; -class SLDI_H_DESC : MSA_ELM_DESC_BASE<"sldi.h", int_mips_sldi_h, MSA128HOpnd>; -class SLDI_W_DESC : MSA_ELM_DESC_BASE<"sldi.w", int_mips_sldi_w, MSA128WOpnd>; -class SLDI_D_DESC : MSA_ELM_DESC_BASE<"sldi.d", int_mips_sldi_d, MSA128DOpnd>; +class SLDI_B_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.b", int_mips_sldi_b, + MSA128BOpnd>; +class SLDI_H_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.h", int_mips_sldi_h, + MSA128HOpnd>; +class SLDI_W_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.w", int_mips_sldi_w, + MSA128WOpnd>; +class SLDI_D_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.d", int_mips_sldi_d, + MSA128DOpnd>; class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>; class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>; diff --git a/test/CodeGen/Mips/msa/3r-s.ll b/test/CodeGen/Mips/msa/3r-s.ll index 30cf265233e..581c3bfd78a 100644 --- a/test/CodeGen/Mips/msa/3r-s.ll +++ b/test/CodeGen/Mips/msa/3r-s.ll @@ -5,98 +5,114 @@ ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s @llvm_mips_sld_b_ARG1 = global <16 x i8> , align 16 -@llvm_mips_sld_b_ARG2 = global i32 10, align 16 +@llvm_mips_sld_b_ARG2 = global <16 x i8> , align 16 +@llvm_mips_sld_b_ARG3 = global i32 10, align 16 @llvm_mips_sld_b_RES = global <16 x i8> , align 16 define void @llvm_mips_sld_b_test() nounwind { entry: %0 = load <16 x i8>* @llvm_mips_sld_b_ARG1 - %1 = load i32* @llvm_mips_sld_b_ARG2 - %2 = tail call <16 x i8> @llvm.mips.sld.b(<16 x i8> %0, i32 %1) - store <16 x i8> %2, <16 x i8>* @llvm_mips_sld_b_RES + %1 = load <16 x i8>* @llvm_mips_sld_b_ARG2 + %2 = load i32* @llvm_mips_sld_b_ARG3 + %3 = tail call <16 x i8> @llvm.mips.sld.b(<16 x i8> %0, <16 x i8> %1, i32 %2) + store <16 x i8> %3, <16 x i8>* @llvm_mips_sld_b_RES ret void } -declare <16 x i8> @llvm.mips.sld.b(<16 x i8>, i32) nounwind +declare <16 x i8> @llvm.mips.sld.b(<16 x i8>, <16 x i8>, i32) nounwind ; CHECK: llvm_mips_sld_b_test: ; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_b_ARG1) ; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sld_b_ARG2) -; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]]) -; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]]) -; CHECK-DAG: sld.b [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}} +; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_sld_b_ARG3) +; CHECK-DAG: ld.b [[WD:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R3]]) +; CHECK-DAG: sld.b [[WD]], [[WS]]{{\[}}[[RT]]{{\]}} ; CHECK-DAG: st.b [[WD]] ; CHECK: .size llvm_mips_sld_b_test ; @llvm_mips_sld_h_ARG1 = global <8 x i16> , align 16 -@llvm_mips_sld_h_ARG2 = global i32 10, align 16 +@llvm_mips_sld_h_ARG2 = global <8 x i16> , align 16 +@llvm_mips_sld_h_ARG3 = global i32 10, align 16 @llvm_mips_sld_h_RES = global <8 x i16> , align 16 define void @llvm_mips_sld_h_test() nounwind { entry: %0 = load <8 x i16>* @llvm_mips_sld_h_ARG1 - %1 = load i32* @llvm_mips_sld_h_ARG2 - %2 = tail call <8 x i16> @llvm.mips.sld.h(<8 x i16> %0, i32 %1) - store <8 x i16> %2, <8 x i16>* @llvm_mips_sld_h_RES + %1 = load <8 x i16>* @llvm_mips_sld_h_ARG2 + %2 = load i32* @llvm_mips_sld_h_ARG3 + %3 = tail call <8 x i16> @llvm.mips.sld.h(<8 x i16> %0, <8 x i16> %1, i32 %2) + store <8 x i16> %3, <8 x i16>* @llvm_mips_sld_h_RES ret void } -declare <8 x i16> @llvm.mips.sld.h(<8 x i16>, i32) nounwind +declare <8 x i16> @llvm.mips.sld.h(<8 x i16>, <8 x i16>, i32) nounwind ; CHECK: llvm_mips_sld_h_test: ; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_h_ARG1) -; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_h_ARG2) -; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]]) -; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]]) -; CHECK-DAG: sld.h [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}} +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sld_h_ARG2) +; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_sld_h_ARG3) +; CHECK-DAG: ld.h [[WD:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R3]]) +; CHECK-DAG: sld.h [[WD]], [[WS]]{{\[}}[[RT]]{{\]}} ; CHECK-DAG: st.h [[WD]] ; CHECK: .size llvm_mips_sld_h_test ; @llvm_mips_sld_w_ARG1 = global <4 x i32> , align 16 -@llvm_mips_sld_w_ARG2 = global i32 10, align 16 +@llvm_mips_sld_w_ARG2 = global <4 x i32> , align 16 +@llvm_mips_sld_w_ARG3 = global i32 10, align 16 @llvm_mips_sld_w_RES = global <4 x i32> , align 16 define void @llvm_mips_sld_w_test() nounwind { entry: %0 = load <4 x i32>* @llvm_mips_sld_w_ARG1 - %1 = load i32* @llvm_mips_sld_w_ARG2 - %2 = tail call <4 x i32> @llvm.mips.sld.w(<4 x i32> %0, i32 %1) - store <4 x i32> %2, <4 x i32>* @llvm_mips_sld_w_RES + %1 = load <4 x i32>* @llvm_mips_sld_w_ARG2 + %2 = load i32* @llvm_mips_sld_w_ARG3 + %3 = tail call <4 x i32> @llvm.mips.sld.w(<4 x i32> %0, <4 x i32> %1, i32 %2) + store <4 x i32> %3, <4 x i32>* @llvm_mips_sld_w_RES ret void } -declare <4 x i32> @llvm.mips.sld.w(<4 x i32>, i32) nounwind +declare <4 x i32> @llvm.mips.sld.w(<4 x i32>, <4 x i32>, i32) nounwind ; CHECK: llvm_mips_sld_w_test: ; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_w_ARG1) -; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_w_ARG2) -; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]]) -; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]]) -; CHECK-DAG: sld.w [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}} +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sld_w_ARG2) +; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_sld_w_ARG3) +; CHECK-DAG: ld.w [[WD:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R3]]) +; CHECK-DAG: sld.w [[WD]], [[WS]]{{\[}}[[RT]]{{\]}} ; CHECK-DAG: st.w [[WD]] ; CHECK: .size llvm_mips_sld_w_test ; @llvm_mips_sld_d_ARG1 = global <2 x i64> , align 16 -@llvm_mips_sld_d_ARG2 = global i32 10, align 16 +@llvm_mips_sld_d_ARG2 = global <2 x i64> , align 16 +@llvm_mips_sld_d_ARG3 = global i32 10, align 16 @llvm_mips_sld_d_RES = global <2 x i64> , align 16 define void @llvm_mips_sld_d_test() nounwind { entry: %0 = load <2 x i64>* @llvm_mips_sld_d_ARG1 - %1 = load i32* @llvm_mips_sld_d_ARG2 - %2 = tail call <2 x i64> @llvm.mips.sld.d(<2 x i64> %0, i32 %1) - store <2 x i64> %2, <2 x i64>* @llvm_mips_sld_d_RES + %1 = load <2 x i64>* @llvm_mips_sld_d_ARG2 + %2 = load i32* @llvm_mips_sld_d_ARG3 + %3 = tail call <2 x i64> @llvm.mips.sld.d(<2 x i64> %0, <2 x i64> %1, i32 %2) + store <2 x i64> %3, <2 x i64>* @llvm_mips_sld_d_RES ret void } -declare <2 x i64> @llvm.mips.sld.d(<2 x i64>, i32) nounwind +declare <2 x i64> @llvm.mips.sld.d(<2 x i64>, <2 x i64>, i32) nounwind ; CHECK: llvm_mips_sld_d_test: ; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_d_ARG1) -; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_d_ARG2) -; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]]) -; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]]) -; CHECK-DAG: sld.d [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}} +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sld_d_ARG2) +; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_sld_d_ARG3) +; CHECK-DAG: ld.d [[WD:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R3]]) +; CHECK-DAG: sld.d [[WD]], [[WS]]{{\[}}[[RT]]{{\]}} ; CHECK-DAG: st.d [[WD]] ; CHECK: .size llvm_mips_sld_d_test ; diff --git a/test/CodeGen/Mips/msa/elm_shift_slide.ll b/test/CodeGen/Mips/msa/elm_shift_slide.ll index 39d670dac84..00a6544b120 100644 --- a/test/CodeGen/Mips/msa/elm_shift_slide.ll +++ b/test/CodeGen/Mips/msa/elm_shift_slide.ll @@ -5,17 +5,19 @@ ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s @llvm_mips_sldi_b_ARG1 = global <16 x i8> , align 16 +@llvm_mips_sldi_b_ARG2 = global <16 x i8> , align 16 @llvm_mips_sldi_b_RES = global <16 x i8> , align 16 define void @llvm_mips_sldi_b_test() nounwind { entry: %0 = load <16 x i8>* @llvm_mips_sldi_b_ARG1 - %1 = tail call <16 x i8> @llvm.mips.sldi.b(<16 x i8> %0, i32 1) - store <16 x i8> %1, <16 x i8>* @llvm_mips_sldi_b_RES + %1 = load <16 x i8>* @llvm_mips_sldi_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.sldi.b(<16 x i8> %0, <16 x i8> %1, i32 1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_sldi_b_RES ret void } -declare <16 x i8> @llvm.mips.sldi.b(<16 x i8>, i32) nounwind +declare <16 x i8> @llvm.mips.sldi.b(<16 x i8>, <16 x i8>, i32) nounwind ; CHECK: llvm_mips_sldi_b_test: ; CHECK: ld.b @@ -24,17 +26,19 @@ declare <16 x i8> @llvm.mips.sldi.b(<16 x i8>, i32) nounwind ; CHECK: .size llvm_mips_sldi_b_test ; @llvm_mips_sldi_h_ARG1 = global <8 x i16> , align 16 +@llvm_mips_sldi_h_ARG2 = global <8 x i16> , align 16 @llvm_mips_sldi_h_RES = global <8 x i16> , align 16 define void @llvm_mips_sldi_h_test() nounwind { entry: %0 = load <8 x i16>* @llvm_mips_sldi_h_ARG1 - %1 = tail call <8 x i16> @llvm.mips.sldi.h(<8 x i16> %0, i32 1) - store <8 x i16> %1, <8 x i16>* @llvm_mips_sldi_h_RES + %1 = load <8 x i16>* @llvm_mips_sldi_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.sldi.h(<8 x i16> %0, <8 x i16> %1, i32 1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_sldi_h_RES ret void } -declare <8 x i16> @llvm.mips.sldi.h(<8 x i16>, i32) nounwind +declare <8 x i16> @llvm.mips.sldi.h(<8 x i16>, <8 x i16>, i32) nounwind ; CHECK: llvm_mips_sldi_h_test: ; CHECK: ld.h @@ -43,17 +47,19 @@ declare <8 x i16> @llvm.mips.sldi.h(<8 x i16>, i32) nounwind ; CHECK: .size llvm_mips_sldi_h_test ; @llvm_mips_sldi_w_ARG1 = global <4 x i32> , align 16 +@llvm_mips_sldi_w_ARG2 = global <4 x i32> , align 16 @llvm_mips_sldi_w_RES = global <4 x i32> , align 16 define void @llvm_mips_sldi_w_test() nounwind { entry: %0 = load <4 x i32>* @llvm_mips_sldi_w_ARG1 - %1 = tail call <4 x i32> @llvm.mips.sldi.w(<4 x i32> %0, i32 1) - store <4 x i32> %1, <4 x i32>* @llvm_mips_sldi_w_RES + %1 = load <4 x i32>* @llvm_mips_sldi_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.sldi.w(<4 x i32> %0, <4 x i32> %1, i32 1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_sldi_w_RES ret void } -declare <4 x i32> @llvm.mips.sldi.w(<4 x i32>, i32) nounwind +declare <4 x i32> @llvm.mips.sldi.w(<4 x i32>, <4 x i32>, i32) nounwind ; CHECK: llvm_mips_sldi_w_test: ; CHECK: ld.w @@ -62,17 +68,19 @@ declare <4 x i32> @llvm.mips.sldi.w(<4 x i32>, i32) nounwind ; CHECK: .size llvm_mips_sldi_w_test ; @llvm_mips_sldi_d_ARG1 = global <2 x i64> , align 16 +@llvm_mips_sldi_d_ARG2 = global <2 x i64> , align 16 @llvm_mips_sldi_d_RES = global <2 x i64> , align 16 define void @llvm_mips_sldi_d_test() nounwind { entry: %0 = load <2 x i64>* @llvm_mips_sldi_d_ARG1 - %1 = tail call <2 x i64> @llvm.mips.sldi.d(<2 x i64> %0, i32 1) - store <2 x i64> %1, <2 x i64>* @llvm_mips_sldi_d_RES + %1 = load <2 x i64>* @llvm_mips_sldi_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.sldi.d(<2 x i64> %0, <2 x i64> %1, i32 1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_sldi_d_RES ret void } -declare <2 x i64> @llvm.mips.sldi.d(<2 x i64>, i32) nounwind +declare <2 x i64> @llvm.mips.sldi.d(<2 x i64>, <2 x i64>, i32) nounwind ; CHECK: llvm_mips_sldi_d_test: ; CHECK: ld.d