[MIPS] Add cpu octeon and some instructions

The Octeon cpu from Cavium Networks is mips64r2 based and has an extended instruction set. In order to utilize this with LLVM, a new cpu feature "octeon" and a subtarget feature "cnmips" is added. A small set of new instructions (baddu, dmul, pop, dpop, seq, sne) is also added. LLVM generates dmul, pop and dpop instructions with option -mcpu=octeon or -mattr=+cnmips. llvm-svn: 204337
2025-02-20 04:11:13 +00:00 · 2014-03-20 11:51:58 +00:00 · 2014-03-20 11:51:58 +00:00 · fd99c3fbd8
commit fd99c3fbd8
parent 7f1c389c71
13 changed files with 203 additions and 6 deletions
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@ -83,6 +83,10 @@ def FeatureMSA : SubtargetFeature<"msa", "HasMSA", "true", "Mips MSA ASE">;
 def FeatureMicroMips  : SubtargetFeature<"micromips", "InMicroMipsMode", "true",
                                         "microMips mode">;

+def FeatureCnMips     : SubtargetFeature<"cnmips", "HasCnMips",
+                                "true", "Octeon cnMIPS Support",
+                                [FeatureMips64r2]>;
+
 //===----------------------------------------------------------------------===//
 // Mips processors supported.
 //===----------------------------------------------------------------------===//
@ -95,6 +99,7 @@ def : Proc<"mips32r2", [FeatureMips32r2, FeatureO32]>;
 def : Proc<"mips64", [FeatureMips64, FeatureN64]>;
 def : Proc<"mips64r2", [FeatureMips64r2, FeatureN64]>;
 def : Proc<"mips16", [FeatureMips16, FeatureO32]>;
+def : Proc<"octeon", [FeatureMips64r2, FeatureN64, FeatureCnMips]>;

 def MipsAsmParser : AsmParser {
  let ShouldEmitMatchRegisterName = 0;
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@ -227,6 +227,44 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
  def SLL64_64 : FR<0x0, 0x00, (outs GPR64:$rd), (ins GPR64:$rt),
                    "sll\t$rd, $rt, 0", [], II_SLL>;
 }
+
+// Cavium Octeon cmMIPS instructions
+let Predicates = [HasCnMips] in {
+
+class Count1s<string opstr, RegisterOperand RO>:
+  InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
+         [(set RO:$rd, (ctpop RO:$rs))], II_POP, FrmR, opstr>;
+
+class SetCC64_R<string opstr, PatFrag cond_op> :
+  InstSE<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, GPR64Opnd:$rt),
+         !strconcat(opstr, "\t$rd, $rs, $rt"),
+         [(set GPR64Opnd:$rd, (cond_op GPR64Opnd:$rs, GPR64Opnd:$rt))],
+         II_SEQ_SNE, FrmR, opstr>;
+
+// Unsigned Byte Add
+def BADDu  : InstSE<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, GPR64Opnd:$rt),
+                    "baddu\t$rd, $rs, $rt",
+                    [(set GPR64Opnd:$rd, (and (add GPR64Opnd:$rs,
+                                                   GPR64Opnd:$rt), 255))],
+                    II_BADDU, FrmR, "baddu">, ADD_FM<0x1c, 0x28> {
+  let isCommutable = 1;
+  let isReMaterializable = 1;
+}
+
+// Multiply Doubleword to GPR
+let Defs = [HI0, LO0, P0, P1, P2] in
+def DMUL  : ArithLogicR<"dmul", GPR64Opnd, 1, II_DMUL, mul>,
+                              ADD_FM<0x1c, 0x03>;
+
+// Count Ones in a Word/Doubleword
+def POP   : Count1s<"pop", GPR32Opnd>, POP_FM<0x2c>;
+def DPOP  : Count1s<"dpop", GPR64Opnd>, POP_FM<0x2d>;
+
+// Set on equal/not equal
+def SEQ   : SetCC64_R<"seq", seteq>, SEQ_FM<0x2a>;
+def SNE   : SetCC64_R<"sne", setne>, SEQ_FM<0x2b>;
+}
+
 }
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@ -295,8 +295,13 @@ MipsTargetLowering(MipsTargetMachine &TM)
  setOperationAction(ISD::FP_TO_UINT,        MVT::i32,   Expand);
  setOperationAction(ISD::FP_TO_UINT,        MVT::i64,   Expand);
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1,    Expand);
+  if (Subtarget->hasCnMips()) {
+    setOperationAction(ISD::CTPOP,           MVT::i32,   Legal);
+    setOperationAction(ISD::CTPOP,           MVT::i64,   Legal);
+  } else {
    setOperationAction(ISD::CTPOP,           MVT::i32,   Expand);
    setOperationAction(ISD::CTPOP,           MVT::i64,   Expand);
+  }
  setOperationAction(ISD::CTTZ,              MVT::i32,   Expand);
  setOperationAction(ISD::CTTZ,              MVT::i64,   Expand);
  setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::i32,   Expand);
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@ -489,6 +489,35 @@ class WAIT_FM : StdArch {
  let Inst{5-0}   = 0x20;
 }

+class POP_FM<bits<6> funct> : StdArch {
+  bits<5> rd;
+  bits<5> rs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x1c;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = 0;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = 0;
+  let Inst{5-0}   = funct;
+}
+
+class SEQ_FM<bits<6> funct> : StdArch {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x1c;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = 0;
+  let Inst{5-0}   = funct;
+}
+
 //===----------------------------------------------------------------------===//
 //  System calls format <op|code_|funct>
 //===----------------------------------------------------------------------===//
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@ -170,6 +170,8 @@ def IsN64       :     Predicate<"Subtarget.isABI_N64()">,
                      AssemblerPredicate<"FeatureN64">;
 def InMips16Mode :    Predicate<"Subtarget.inMips16Mode()">,
                      AssemblerPredicate<"FeatureMips16">;
+def HasCnMips    :    Predicate<"Subtarget.hasCnMips()">,
+                      AssemblerPredicate<"FeatureCnMips">;
 def RelocStatic :     Predicate<"TM.getRelocationModel() == Reloc::Static">,
                      AssemblerPredicate<"FeatureMips32">;
 def RelocPIC    :     Predicate<"TM.getRelocationModel() == Reloc::PIC_">,
@ -772,7 +774,6 @@ class CountLeading1<string opstr, RegisterOperand RO>:
         [(set RO:$rd, (ctlz (not RO:$rs)))], II_CLO, FrmR, opstr>,
  Requires<[HasBitCount, HasStdEnc]>;

-
 // Sign Extend in Register.
 class SignExtInReg<string opstr, ValueType vt, RegisterOperand RO,
                   InstrItinClass itin> :
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@ -245,6 +245,15 @@ let Namespace = "Mips" in {
  def MSARequest : MipsReg<5, "5">;
  def MSAMap     : MipsReg<6, "6">;
  def MSAUnmap   : MipsReg<7, "7">;
+
+  // Octeon multiplier and product registers
+  def MPL0 : MipsReg<0, "mpl0">;
+  def MPL1 : MipsReg<1, "mpl1">;
+  def MPL2 : MipsReg<2, "mpl2">;
+  def P0 : MipsReg<0, "p0">;
+  def P1 : MipsReg<1, "p1">;
+  def P2 : MipsReg<2, "p2">;
+
 }

 //===----------------------------------------------------------------------===//
@ -376,6 +385,12 @@ def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>;
 def COP2 : RegisterClass<"Mips", [i32], 32, (sequence "COP2%u", 0, 31)>,
           Unallocatable;

+// Octeon multiplier and product registers
+def OCTEON_MPL : RegisterClass<"Mips", [i64], 64, (add MPL0, MPL1, MPL2)>,
+                 Unallocatable;
+def OCTEON_P : RegisterClass<"Mips", [i64], 64, (add P0, P1, P2)>,
+               Unallocatable;
+
 // Register Operands.

 class MipsAsmRegOperand : AsmOperandClass {
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@ -117,10 +117,14 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
  setOperationAction(ISD::MULHS,              MVT::i32, Custom);
  setOperationAction(ISD::MULHU,              MVT::i32, Custom);

+  if (Subtarget->hasCnMips())
+    setOperationAction(ISD::MUL,              MVT::i64, Legal);
+  else if (HasMips64)
+    setOperationAction(ISD::MUL,              MVT::i64, Custom);
+
  if (HasMips64) {
    setOperationAction(ISD::MULHS,            MVT::i64, Custom);
    setOperationAction(ISD::MULHU,            MVT::i64, Custom);
-    setOperationAction(ISD::MUL,              MVT::i64, Custom);
  }

  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@ -28,6 +28,7 @@ def II_ADD_D            : InstrItinClass;
 def II_ADD_S            : InstrItinClass;
 def II_AND              : InstrItinClass;
 def II_ANDI             : InstrItinClass;
+def II_BADDU            : InstrItinClass;
 def II_CEIL             : InstrItinClass;
 def II_CFC1             : InstrItinClass;
 def II_CLO              : InstrItinClass;
@ -46,6 +47,7 @@ def II_DIV_D            : InstrItinClass;
 def II_DIV_S            : InstrItinClass;
 def II_DMFC1            : InstrItinClass;
 def II_DMTC1            : InstrItinClass;
+def II_DMUL             : InstrItinClass;
 def II_DMULT            : InstrItinClass;
 def II_DMULTU           : InstrItinClass;
 def II_DROTR            : InstrItinClass;
@ -120,6 +122,7 @@ def II_NMSUB_S          : InstrItinClass;
 def II_NOR              : InstrItinClass;
 def II_OR               : InstrItinClass;
 def II_ORI              : InstrItinClass;
+def II_POP              : InstrItinClass;
 def II_RDHWR            : InstrItinClass;
 def II_RESTORE          : InstrItinClass;
 def II_ROTR             : InstrItinClass;
@ -134,6 +137,8 @@ def II_SDR              : InstrItinClass;
 def II_SDXC1            : InstrItinClass;
 def II_SEB              : InstrItinClass;
 def II_SEH              : InstrItinClass;
+def II_SEQ_SNE          : InstrItinClass; // seq and sne
+def II_SEQI_SNEI        : InstrItinClass; // seqi and snei
 def II_SH               : InstrItinClass;
 def II_SLL              : InstrItinClass;
 def II_SLLV             : InstrItinClass;
@ -167,6 +172,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
  InstrItinData<II_ADDIU           , [InstrStage<1,  [ALU]>]>,
  InstrItinData<II_ADDU            , [InstrStage<1,  [ALU]>]>,
  InstrItinData<II_AND             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_BADDU           , [InstrStage<1,  [ALU]>]>,
  InstrItinData<II_SLL             , [InstrStage<1,  [ALU]>]>,
  InstrItinData<II_SRA             , [InstrStage<1,  [ALU]>]>,
  InstrItinData<II_SRL             , [InstrStage<1,  [ALU]>]>,
@ -197,6 +203,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
  InstrItinData<II_MOVZ            , [InstrStage<1,  [ALU]>]>,
  InstrItinData<II_NOR             , [InstrStage<1,  [ALU]>]>,
  InstrItinData<II_OR              , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_POP             , [InstrStage<1,  [ALU]>]>,
  InstrItinData<II_RDHWR           , [InstrStage<1,  [ALU]>]>,
  InstrItinData<II_SUBU            , [InstrStage<1,  [ALU]>]>,
  InstrItinData<II_XOR             , [InstrStage<1,  [ALU]>]>,
@ -223,7 +230,10 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
  InstrItinData<II_SDR             , [InstrStage<1,  [ALU]>]>,
  InstrItinData<II_SD              , [InstrStage<1,  [ALU]>]>,
  InstrItinData<II_SAVE            , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_SEQ_SNE         , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_SEQI_SNEI       , [InstrStage<1,  [ALU]>]>,
  InstrItinData<IIBranch           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_DMUL            , [InstrStage<17, [IMULDIV]>]>,
  InstrItinData<II_DMULT           , [InstrStage<17, [IMULDIV]>]>,
  InstrItinData<II_DMULTU          , [InstrStage<17, [IMULDIV]>]>,
  InstrItinData<II_MADD            , [InstrStage<17, [IMULDIV]>]>,
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@ -81,8 +81,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
  MipsGenSubtargetInfo(TT, CPU, FS),
  MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
  IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
-  IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false),
-  HasBitCount(false), HasFPIdx(false),
+  HasCnMips(false), IsLinux(true), HasSEInReg(false), HasCondMov(false),
+  HasSwap(false), HasBitCount(false), HasFPIdx(false),
  InMips16Mode(false), InMips16HardFloat(Mips16HardFloat),
  InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
  AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false),
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@ -65,6 +65,9 @@ protected:
  // HasVFPU - Processor has a vector floating point unit.
  bool HasVFPU;

+  // CPU supports cnMIPS (Cavium Networks Octeon CPU).
+  bool HasCnMips;
+
  // isLinux - Target system is Linux. Is false we consider ELFOS for now.
  bool IsLinux;

@ -154,6 +157,8 @@ public:
  bool hasMips64() const { return MipsArchVersion >= Mips64; }
  bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }

+  bool hasCnMips() const { return HasCnMips; }
+
  bool isLittle() const { return IsLittle; }
  bool isFP64bit() const { return IsFP64bit; }
  bool isNotFP64bit() const { return !IsFP64bit; }
--- a/test/CodeGen/Mips/octeon.ll
+++ b/test/CodeGen/Mips/octeon.ll
@ -0,0 +1,15 @@
+; RUN: llc -O1 < %s -march=mips64 -mcpu=octeon | FileCheck %s -check-prefix=OCTEON
+; RUN: llc -O1 < %s -march=mips64 -mcpu=mips64 | FileCheck %s -check-prefix=MIPS64
+
+define i64 @mul(i64 %a, i64 %b) nounwind {
+entry:
+; OCTEON-LABEL: mul:
+; OCTEON: jr    $ra
+; OCTEON: dmul  $2, $4, $5
+; MIPS64-LABEL: mul:
+; MIPS64: dmult
+; MIPS64: jr
+; MIPS64: mflo
+  %res = mul i64 %a, %b
+  ret i64 %res
+}
--- a/test/CodeGen/Mips/octeon_popcnt.ll
+++ b/test/CodeGen/Mips/octeon_popcnt.ll
@ -0,0 +1,47 @@
+; RUN: llc -O1 -march=mips64 -mcpu=octeon < %s | FileCheck %s -check-prefix=OCTEON
+; RUN: llc -O1 -march=mips64 -mcpu=mips64 < %s | FileCheck %s -check-prefix=MIPS64
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+  %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+  ret i8 %cnt
+; OCTEON-LABEL: cnt8:
+; OCTEON: jr   $ra
+; OCTEON: pop  $2, $1
+; MIPS64-LABEL: cnt8:
+; MIPS64-NOT: pop
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+  ret i16 %cnt
+; OCTEON-LABEL: cnt16:
+; OCTEON: jr   $ra
+; OCTEON: pop  $2, $1
+; MIPS64-LABEL: cnt16:
+; MIPS64-NOT: pop
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+; OCTEON-LABEL: cnt32:
+; OCTEON: jr   $ra
+; OCTEON: pop  $2, $4
+; MIPS64-LABEL: cnt32:
+; MIPS64-NOT: pop
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+; OCTEON-LABEL: cnt64:
+; OCTEON: jr   $ra
+; OCTEON: dpop $2, $4
+; MIPS64-LABEL: cnt64:
+; MIPS64-NOT: dpop
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
--- a/test/MC/Mips/octeon-instructions.s
+++ b/test/MC/Mips/octeon-instructions.s
@ -0,0 +1,23 @@
+# RUN: llvm-mc  %s -triple=mips64-unknown-linux -show-encoding -mcpu=octeon | FileCheck %s
+
+# CHECK: baddu $9, $6, $7             # encoding: [0x70,0xc7,0x48,0x28]
+# CHECK: baddu $17, $18, $19          # encoding: [0x72,0x53,0x88,0x28]
+# CHECK: dmul  $9, $6, $7             # encoding: [0x70,0xc7,0x48,0x03]
+# CHECK: dmul  $19, $24, $25          # encoding: [0x73,0x19,0x98,0x03]
+# CHECK: dpop  $9, $6                 # encoding: [0x70,0xc0,0x48,0x2d]
+# CHECK: dpop  $15, $22               # encoding: [0x72,0xc0,0x78,0x2d]
+# CHECK: pop   $9, $6                 # encoding: [0x70,0xc0,0x48,0x2c]
+# CHECK: pop   $8, $19                # encoding: [0x72,0x60,0x40,0x2c]
+# CHECK: seq   $25, $23, $24          # encoding: [0x72,0xf8,0xc8,0x2a]
+# CHECK: sne   $25, $23, $24          # encoding: [0x72,0xf8,0xc8,0x2b]
+
+  baddu $9, $6, $7
+  baddu $17, $18, $19
+  dmul  $9, $6, $7
+  dmul  $19, $24, $25
+  dpop  $9, $6
+  dpop  $15, $22
+  pop   $9, $6
+  pop   $8, $19
+  seq   $25, $23, $24
+  sne   $25, $23, $24