diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 6998383f08e..7c70a7e3d0a 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -2198,6 +2198,11 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr())) Inst.setOpcode(Hexagon::S2_storerinewgp); break; + case Hexagon::A2_zxtb: { + Inst.setOpcode(Hexagon::A2_andir); + Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(255, Context))); + break; + } } // switch return Match_Success; diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 54db5ad4374..01ba1ccd37f 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -282,6 +282,36 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, break; } + case Hexagon::A2_tfrf: { + Inst.setOpcode(Hexagon::A2_paddif); + Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext))); + break; + } + + case Hexagon::A2_tfrt: { + Inst.setOpcode(Hexagon::A2_paddit); + Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext))); + break; + } + + case Hexagon::A2_tfrfnew: { + Inst.setOpcode(Hexagon::A2_paddifnew); + Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext))); + break; + } + + case Hexagon::A2_tfrtnew: { + Inst.setOpcode(Hexagon::A2_padditnew); + Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext))); + break; + } + + case Hexagon::A2_zxtb: { + Inst.setOpcode(Hexagon::A2_andir); + Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(255, OutContext))); + break; + } + // "$dst = CONST64(#$src1)", case Hexagon::CONST64: if (!OutStreamer->hasRawTextSupport()) { @@ -376,6 +406,9 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, Rs.setReg(getHexagonRegisterPair(Rs.getReg(), RI)); return; } + case Hexagon::PS_call_nr: + Inst.setOpcode(Hexagon::J2_call); + break; case Hexagon::S5_asrhub_rnd_sat_goodsyntax: { MCOperand &MO = MappedInst.getOperand(2); int64_t Imm; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index c5719ad5b6d..16298cae083 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -401,13 +401,12 @@ def A2_tfril: T_tfr16<0>; def A2_tfrih: T_tfr16<1>; // Conditional transfer is an alias to conditional "Rd = add(Rs, #0)". -let isPredicated = 1, hasNewValue = 1, opNewValue = 0 in +let isPredicated = 1, hasNewValue = 1, isCodeGenOnly = 1, opNewValue = 0, + isPseudo = 1 in class T_tfr_pred : ALU32Inst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), - "if ("#!if(isPredNot, "!", "")# - "$src1"#!if(isPredNew, ".new", "")# - ") $dst = $src2"> { + ""> { bits<5> dst; bits<2> src1; bits<5> src2; @@ -487,6 +486,11 @@ multiclass TFR64_base { } } +def A2_tfrfAlias : InstAlias<"if (!$Pu4) $Rd32=$Rs32", (A2_paddif IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>; +def A2_tfrfnewAlias : InstAlias<"if (!$Pu4.new) $Rd32=$Rs32", (A2_paddifnew IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>; +def A2_tfrtAlias : InstAlias<"if ($Pu4) $Rd32=$Rs32", (A2_paddit IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>; +def A2_tfrtnewAlias : InstAlias<"if ($Pu4.new) $Rd32=$Rs32", (A2_padditnew IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>; + let InputType = "imm", isExtendable = 1, isExtentSigned = 1, opExtentBits = 12, isMoveImm = 1, opExtendable = 2, BaseOpcode = "TFRI", CextOpcode = "TFR", hasSideEffects = 0, isPredicated = 1, hasNewValue = 1 in @@ -699,19 +703,7 @@ defm zxth : ALU32_2op_base<"zxth", 0b110>, PredNewRel; let hasNewValue = 1, opNewValue = 0 in class T_ZXTB: ALU32Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rs), - "$Rd = zxtb($Rs)", [] > { // Rd = and(Rs,255) - bits<5> Rd; - bits<5> Rs; - bits<10> s10 = 255; - - let IClass = 0b0111; - - let Inst{27-22} = 0b011000; - let Inst{4-0} = Rd; - let Inst{20-16} = Rs; - let Inst{21} = s10{9}; - let Inst{13-5} = s10{8-0}; -} + "$Rd=zxtb($Rs)", [] >; //Rd=zxtb(Rs): assembler mapped to "Rd=and(Rs,#255) multiclass ZXTB_base minOp> { diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td index 225f9440507..5b7610a68af 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV3.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td @@ -61,7 +61,7 @@ multiclass T_Calls { defm J2_call: T_Calls<1, "">, PredRel; let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, - Defs = VolatileV3.Regs in + Defs = VolatileV3.Regs, isPseudo = 1 in def PS_call_nr : T_Call<1, "">, PredRel; let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, diff --git a/test/CodeGen/Hexagon/swp-stages4.ll b/test/CodeGen/Hexagon/swp-stages4.ll new file mode 100644 index 00000000000..cdd09845ba5 --- /dev/null +++ b/test/CodeGen/Hexagon/swp-stages4.ll @@ -0,0 +1,94 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner -pipeliner-max-stages=2 -disable-block-placement=0 -hexagon-bit=0 < %s | FileCheck %s + +; Test that we rename registers correctly for multiple stages when there is a +; Phi and depends upon another Phi. + +; CHECK: = and +; CHECK: = and +; CHECK: = and +; CHECK: [[REG0:(r[0-9]+)]] = and([[REG1:(r[0-9]+)]], #255) +; CHECK-NOT: [[REG0]] = and([[REG1]], #255) +; CHECK: loop0(.LBB0_[[LOOP:.]], +; CHECK: .LBB0_[[LOOP]]: +; CHECK: [[REG0]] += add +; CHECK: = and +; CHECK: = and +; CHECK: [[REG0]] = and +; CHECK: endloop + +; Function Attrs: nounwind +define void @test(i8* noalias nocapture %src, i32 %srcWidth, i32 %srcHeight, i32 %srcStride, i8* noalias nocapture %dst, i32 %dstStride) #0 { +entry: + %sub = add i32 %srcWidth, -1 + %sub1 = add i32 %srcHeight, -1 + %add.ptr = getelementptr inbounds i8, i8* %src, i32 %srcStride + %add.ptr.sum = mul i32 %srcStride, 2 + %add.ptr2 = getelementptr inbounds i8, i8* %src, i32 %add.ptr.sum + br label %for.body.lr.ph + +for.body.lr.ph: + %0 = add i32 %srcHeight, -2 + %1 = mul i32 %0, %dstStride + %2 = mul i32 %0, %srcStride + %3 = mul i32 %sub1, %srcStride + br label %for.cond + +for.cond: + %scevgep = getelementptr i8, i8* %dst, i32 %1 + %scevgep220 = getelementptr i8, i8* %src, i32 %2 + %scevgep221 = getelementptr i8, i8* %src, i32 %3 + %arrayidx6 = getelementptr inbounds i8, i8* %src, i32 1 + %add11 = add i32 %srcStride, 1 + %arrayidx12 = getelementptr inbounds i8, i8* %src, i32 %add11 + br label %for.body75.preheader + +for.body75.preheader: + %sri = load i8, i8* %arrayidx6, align 1 + %sri224 = load i8, i8* %src, align 1 + %sri227 = load i8, i8* %arrayidx12, align 1 + %sri229 = load i8, i8* %add.ptr, align 1 + br label %for.body75 + +for.body75: + %j.0211 = phi i32 [ %add82, %for.body75 ], [ 1, %for.body75.preheader ] + %sr = phi i8 [ %4, %for.body75 ], [ %sri, %for.body75.preheader ] + %sr225 = phi i8 [ %sr, %for.body75 ], [ %sri224, %for.body75.preheader ] + %sr230 = phi i8 [ %5, %for.body75 ], [ %sri227, %for.body75.preheader ] + %sr231 = phi i8 [ %sr230, %for.body75 ], [ %sri229, %for.body75.preheader ] + %conv78 = zext i8 %sr225 to i32 + %conv80 = zext i8 %sr to i32 + %add81 = add nsw i32 %conv80, %conv78 + %add82 = add i32 %j.0211, 1 + %arrayidx83 = getelementptr inbounds i8, i8* %src, i32 %add82 + %4 = load i8, i8* %arrayidx83, align 1, !tbaa !0 + %conv84 = zext i8 %4 to i32 + %add85 = add nsw i32 %add81, %conv84 + %conv88 = zext i8 %sr231 to i32 + %add89 = add nsw i32 %add85, %conv88 + %conv91 = zext i8 %sr230 to i32 + %add92 = add nsw i32 %add89, %conv91 + %add.ptr.sum208 = add i32 %add82, %srcStride + %arrayidx94 = getelementptr inbounds i8, i8* %src, i32 %add.ptr.sum208 + %5 = load i8, i8* %arrayidx94, align 1, !tbaa !0 + %conv95 = zext i8 %5 to i32 + %add96 = add nsw i32 %add92, %conv95 + %mul97 = mul nsw i32 %add96, 7282 + %add98 = add nsw i32 %mul97, 32768 + %shr99209 = lshr i32 %add98, 16 + %conv100 = trunc i32 %shr99209 to i8 + %arrayidx101 = getelementptr inbounds i8, i8* %dst, i32 %j.0211 + store i8 %conv100, i8* %arrayidx101, align 1, !tbaa !0 + %exitcond = icmp eq i32 %add82, %sub + br i1 %exitcond, label %for.end104.loopexit, label %for.body75 + +for.end104.loopexit: + br label %for.end104 + +for.end104: + ret void +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!0 = !{!"omnipotent char", !1} +!1 = !{!"Simple C/C++ TBAA"} diff --git a/test/CodeGen/Hexagon/swp-stages5.ll b/test/CodeGen/Hexagon/swp-stages5.ll new file mode 100644 index 00000000000..f83aa32ae0a --- /dev/null +++ b/test/CodeGen/Hexagon/swp-stages5.ll @@ -0,0 +1,78 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner -pipeliner-max-stages=2 -hexagon-bit=0 < %s | FileCheck %s + +; Very similar to swp-stages4.ll, but the pipelined schedule is a little +; different. + +; CHECK: = memub(r{{[0-9]+}}++#1) +; CHECK-DAG: [[REG0:(r[0-9]+)]] = memub(r{{[0-9]+}}++#1) +; CHECK-DAG: loop0(.LBB0_[[LOOP:.]], +; CHECK: .LBB0_[[LOOP]]: +; CHECK: = and([[REG0]], #255) +; CHECK: [[REG0]]{{[:0-9]*}} = +; CHECK: endloop + +define void @fred(i8* noalias nocapture %src, i32 %srcWidth, i32 %srcHeight, i32 %srcStride, i8* noalias nocapture %dst, i32 %dstStride) #0 { +entry: + %sub = add i32 %srcWidth, -1 + %sub1 = add i32 %srcHeight, -1 + %add.ptr = getelementptr inbounds i8, i8* %src, i32 %srcStride + %add.ptr.sum = mul i32 %srcStride, 2 + %add.ptr2 = getelementptr inbounds i8, i8* %src, i32 %add.ptr.sum + %cmp212 = icmp ugt i32 %sub1, 1 + br i1 %cmp212, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: + br label %for.body74.preheader + +for.body74.preheader: + %0 = load i8, i8* %add.ptr, align 1, !tbaa !0 + %arrayidx40 = getelementptr inbounds i8, i8* %add.ptr, i32 1 + %1 = load i8, i8* %arrayidx40, align 1, !tbaa !0 + %2 = load i8, i8* %add.ptr, align 1, !tbaa !0 + %arrayidx46 = getelementptr inbounds i8, i8* %add.ptr, i32 1 + %3 = load i8, i8* %arrayidx46, align 1, !tbaa !0 + br label %for.body74 + +for.body74: + %4 = phi i8 [ %9, %for.body74 ], [ %3, %for.body74.preheader ] + %5 = phi i8 [ %4, %for.body74 ], [ %2, %for.body74.preheader ] + %6 = phi i8 [ %8, %for.body74 ], [ %1, %for.body74.preheader ] + %7 = phi i8 [ %6, %for.body74 ], [ %0, %for.body74.preheader ] + %j.0211 = phi i32 [ %add81, %for.body74 ], [ 1, %for.body74.preheader ] + %conv77 = zext i8 %7 to i32 + %conv79 = zext i8 %6 to i32 + %add80 = add nsw i32 %conv79, %conv77 + %add81 = add i32 %j.0211, 1 + %arrayidx82 = getelementptr inbounds i8, i8* %src, i32 %add81 + %8 = load i8, i8* %arrayidx82, align 1, !tbaa !0 + %conv83 = zext i8 %8 to i32 + %add84 = add nsw i32 %add80, %conv83 + %conv87 = zext i8 %5 to i32 + %add88 = add nsw i32 %add84, %conv87 + %conv90 = zext i8 %4 to i32 + %add91 = add nsw i32 %add88, %conv90 + %arrayidx93 = getelementptr inbounds i8, i8* %add.ptr, i32 %add81 + %9 = load i8, i8* %arrayidx93, align 1, !tbaa !0 + %conv94 = zext i8 %9 to i32 + %add95 = add nsw i32 %add91, %conv94 + %mul96 = mul nsw i32 %add95, 7282 + %add97 = add nsw i32 %mul96, 32768 + %shr98208 = lshr i32 %add97, 16 + %conv99 = trunc i32 %shr98208 to i8 + %add.ptr5.sum209 = add i32 %j.0211, %dstStride + %arrayidx100 = getelementptr inbounds i8, i8* %dst, i32 %add.ptr5.sum209 + store i8 %conv99, i8* %arrayidx100, align 1, !tbaa !0 + %exitcond = icmp eq i32 %add81, %sub + br i1 %exitcond, label %for.end103.loopexit, label %for.body74 + +for.end103.loopexit: + br label %for.end + +for.end: + ret void +} + +attributes #0 = { nounwind } + +!0 = !{!"omnipotent char", !1} +!1 = !{!"Simple C/C++ TBAA"} diff --git a/test/MC/Hexagon/asmMap.s b/test/MC/Hexagon/asmMap.s index 81bb8f31f02..f9dc0afc47c 100644 --- a/test/MC/Hexagon/asmMap.s +++ b/test/MC/Hexagon/asmMap.s @@ -540,10 +540,10 @@ r5=zxtb(r20) p0=cmp.eq(r0,##179976360) } -#CHECK: 74f9c00f { if (!p3) r15{{ *}}={{ *}}r25 +#CHECK: 74f9c00f { if (!p3) r15{{ *}} ={{ *}}add(r25, #0) if (!p3) r15=r25 -#CHECK: 7425c005 { if (p1) r5{{ *}}={{ *}}r5 +#CHECK: 7425c005 { if (p1) r5{{ *}}={{ *}}add(r5, #0) if (p1) r5=r5 #CHECK: e9badae2 { r2{{ *}}={{ *}}vrcmpys(r27:26, r27:26):<<1:rnd:sat:raw:lo @@ -561,7 +561,7 @@ if (!p2) r3:2=r7:6 #CHECK: fd0dcc7e { if (p3) r31:30{{ *}}={{ *}}{{r13:12|combine\(r13, *r12\)}} if (p3) r31:30=r13:12 -#CHECK: 748ae015 if (!p0.new) r21{{ *}}={{ *}}r10 +#CHECK: 748ae015 if (!p0.new) r21{{ *}}={{ *}}add(r10, #0) { p0=cmp.eq(r23,##805633208) if (!p0.new) r21=r10 @@ -605,4 +605,4 @@ r19:18=vrcmpys(r11:10,r16):<<1:sat r21:20=memb_fifo(r2) #CHECK: 9056c01c { r29:28{{ *}}={{ *}}memh_fifo(r22{{ *}}+{{ *}}#0) -r29:28=memh_fifo(r22) \ No newline at end of file +r29:28=memh_fifo(r22)