Re-instate MOV64r0 and MOV16r0, with adjustments to work with the

new AsmPrinter. This is perhaps less elegant than describing them in terms of MOV32r0 and subreg operations, but it allows the current register to rematerialize them. llvm-svn: 93158
2024-11-29 00:21:14 +00:00 · 2010-01-11 17:37:57 +00:00 · 2010-01-11 17:37:57 +00:00 · 3a55686345
commit 3a55686345
parent 31e8637ac2
6 changed files with 57 additions and 37 deletions
--- a/llvm/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@ -399,6 +399,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
    OutMI.setOpcode(X86::MOVZX32rm16);
    lower_subreg32(&OutMI, 0);
    break;
+  case X86::MOV16r0:
+    OutMI.setOpcode(X86::MOV32r0);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOV64r0:
+    OutMI.setOpcode(X86::MOV32r0);
+    lower_subreg32(&OutMI, 0);
+    break;
  }
 }

--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@ -1873,7 +1873,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {

    unsigned LoReg, HiReg, ClrReg;
    unsigned ClrOpcode, SExtOpcode;
-    EVT ClrVT = NVT;
    switch (NVT.getSimpleVT().SimpleTy) {
    default: llvm_unreachable("Unsupported VT!");
    case MVT::i8:
@ -1883,7 +1882,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
      break;
    case MVT::i16:
      LoReg = X86::AX;  HiReg = X86::DX;
-      ClrOpcode  = X86::MOV32r0;  ClrReg = X86::EDX;  ClrVT = MVT::i32;
+      ClrOpcode  = X86::MOV16r0; ClrReg = X86::DX;
      SExtOpcode = X86::CWD;
      break;
    case MVT::i32:
@ -1893,7 +1892,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
      break;
    case MVT::i64:
      LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
-      ClrOpcode  = ~0U; // NOT USED.
+      ClrOpcode  = X86::MOV64r0;
      SExtOpcode = X86::CQO;
      break;
    }
@ -1932,24 +1931,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
          SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
      } else {
        // Zero out the high part, effectively zero extending the input.
-        SDValue ClrNode;
-
-        if (NVT.getSimpleVT() == MVT::i64) {
-          ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32),
-                            0);
-          // We just did a 32-bit clear, insert it into a 64-bit register to
-          // clear the whole 64-bit reg.
-          SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64);
-          SDValue SubRegNo =
-            CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
-          ClrNode =
-            SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl,
-                                           MVT::i64, Zero, ClrNode, SubRegNo),
-                    0);
-        } else {
-          ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, ClrVT), 0);
-        }
-
+        SDValue ClrNode =
+          SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
        InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
                                      ClrNode, InFlag).getValue(1);
      }
--- a/llvm/lib/Target/X86/X86Instr64bit.td
+++ b/llvm/lib/Target/X86/X86Instr64bit.td
@ -1598,17 +1598,20 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
 // Alias Instructions
 //===----------------------------------------------------------------------===//

-// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's
-// equivalent due to implicit zero-extending, and it sometimes has a smaller
-// encoding.
+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
+// smaller encoding, but doing so at isel time interferes with rematerialization
+// in the current register allocator. For now, this is rewritten when the
+// instruction is lowered to an MCInst.
 // FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
 // when we have a better way to specify isel priority.
-let AddedComplexity = 1 in
-def : Pat<(i64 0),
-          (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>;
+let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins),
+                 "",
+                 [(set GR64:$dst, 0)]>;

-
-// Materialize i64 constant where top 32-bits are zero.
+// Materialize i64 constant where top 32-bits are zero. This could theoretically
+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
+// that would make it more difficult to rematerialize.
 let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
                        "", [(set GR64:$dst, i64immZExt32:$src)]>;
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@ -1019,12 +1019,16 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
  switch (Opc) {
  default: break;
  case X86::MOV8r0:
-  case X86::MOV32r0: {
+  case X86::MOV16r0:
+  case X86::MOV32r0:
+  case X86::MOV64r0: {
    if (!isSafeToClobberEFLAGS(MBB, I)) {
      switch (Opc) {
      default: break;
      case X86::MOV8r0:  Opc = X86::MOV8ri;  break;
+      case X86::MOV16r0: Opc = X86::MOV16ri; break;
      case X86::MOV32r0: Opc = X86::MOV32ri; break;
+      case X86::MOV64r0: Opc = X86::MOV64ri; break;
      }
      Clone = false;
    }
@ -2291,8 +2295,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
    OpcodeTablePtr = &RegOp2MemOpTable2Addr;
    isTwoAddrFold = true;
  } else if (i == 0) { // If operand 0
-    if (MI->getOpcode() == X86::MOV32r0)
+    if (MI->getOpcode() == X86::MOV64r0)
+      NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV32r0)
      NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV16r0)
+      NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
    else if (MI->getOpcode() == X86::MOV8r0)
      NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
    if (NewMI)
@ -2560,7 +2568,9 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
  } else if (OpNum == 0) { // If operand 0
    switch (Opc) {
    case X86::MOV8r0:
+    case X86::MOV16r0:
    case X86::MOV32r0:
+    case X86::MOV64r0:
      return true;
    default: break;
    }
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@ -3718,18 +3718,21 @@ let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
 def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
                 "xor{b}\t$dst, $dst",
                 [(set GR8:$dst, 0)]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+                 "",
+                 [(set GR16:$dst, 0)]>, OpSize;
                 
 def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),
                 "xor{l}\t$dst, $dst",
                 [(set GR32:$dst, 0)]>;
 }

-// Use xorl instead of xorw since we don't care about the high 16 bits,
-// it's smaller, and it avoids a partial-register update.
-let AddedComplexity = 1 in
-def : Pat<(i16 0),
-          (EXTRACT_SUBREG (MOV32r0), x86_subreg_16bit)>;
-
 //===----------------------------------------------------------------------===//
 // Thread Local Storage Instructions
 //
--- a/llvm/test/CodeGen/X86/remat-mov-0.ll
+++ b/llvm/test/CodeGen/X86/remat-mov-0.ll
@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | grep {xorl	%edi, %edi} | count 4
+
+; CodeGen should remat the zero instead of spilling it.
+
+declare void @foo(i64 %p)
+
+define void @bar() nounwind {
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  ret void
+}