mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-14 13:57:51 +00:00
Add support for the rep movs[bwd] instructions, and emit them when code
generating the llvm.memcpy intrinsic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@11351 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
33aec9efa9
commit
915e5e56d7
@ -1157,6 +1157,7 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
|
|||||||
case Intrinsic::va_start:
|
case Intrinsic::va_start:
|
||||||
case Intrinsic::va_copy:
|
case Intrinsic::va_copy:
|
||||||
case Intrinsic::va_end:
|
case Intrinsic::va_end:
|
||||||
|
case Intrinsic::memcpy:
|
||||||
// We directly implement these intrinsics
|
// We directly implement these intrinsics
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -1188,6 +1189,58 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) {
|
|||||||
return;
|
return;
|
||||||
case Intrinsic::va_end: return; // Noop on X86
|
case Intrinsic::va_end: return; // Noop on X86
|
||||||
|
|
||||||
|
case Intrinsic::memcpy: {
|
||||||
|
assert(CI.getNumOperands() == 5 && "Illegal llvm.memcpy call!");
|
||||||
|
unsigned Align = 1;
|
||||||
|
if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) {
|
||||||
|
Align = AlignC->getRawValue();
|
||||||
|
if (Align == 0) Align = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Turn the byte code into # iterations
|
||||||
|
unsigned ByteReg = getReg(CI.getOperand(3));
|
||||||
|
unsigned CountReg;
|
||||||
|
|
||||||
|
switch (Align & 3) {
|
||||||
|
case 2: // WORD aligned
|
||||||
|
CountReg = makeAnotherReg(Type::IntTy);
|
||||||
|
BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(1);
|
||||||
|
break;
|
||||||
|
case 0: // DWORD aligned
|
||||||
|
CountReg = makeAnotherReg(Type::IntTy);
|
||||||
|
BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(2);
|
||||||
|
break;
|
||||||
|
case 1: // BYTE aligned
|
||||||
|
case 3: // BYTE aligned
|
||||||
|
CountReg = ByteReg;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// No matter what the alignment is, we put the source in ESI, the
|
||||||
|
// destination in EDI, and the count in ECX.
|
||||||
|
TmpReg1 = getReg(CI.getOperand(1));
|
||||||
|
TmpReg2 = getReg(CI.getOperand(2));
|
||||||
|
BuildMI(BB, X86::MOVrr32, 1, X86::ECX).addReg(CountReg);
|
||||||
|
BuildMI(BB, X86::MOVrr32, 1, X86::EDI).addReg(TmpReg1);
|
||||||
|
BuildMI(BB, X86::MOVrr32, 1, X86::ESI).addReg(TmpReg2);
|
||||||
|
|
||||||
|
unsigned Bytes = getReg(CI.getOperand(3));
|
||||||
|
switch (Align & 3) {
|
||||||
|
case 1: // BYTE aligned
|
||||||
|
case 3: // BYTE aligned
|
||||||
|
BuildMI(BB, X86::REP_MOVSB, 0);
|
||||||
|
break;
|
||||||
|
case 2: // WORD aligned
|
||||||
|
BuildMI(BB, X86::REP_MOVSW, 0);
|
||||||
|
break;
|
||||||
|
case 0: // DWORD aligned
|
||||||
|
BuildMI(BB, X86::REP_MOVSD, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
default: assert(0 && "Error: unknown intrinsics should have been lowered!");
|
default: assert(0 && "Error: unknown intrinsics should have been lowered!");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -470,6 +470,9 @@ void Emitter::emitInstruction(MachineInstr &MI) {
|
|||||||
unsigned Opcode = MI.getOpcode();
|
unsigned Opcode = MI.getOpcode();
|
||||||
const TargetInstrDescriptor &Desc = II->get(Opcode);
|
const TargetInstrDescriptor &Desc = II->get(Opcode);
|
||||||
|
|
||||||
|
// Emit the repeat opcode prefix as needed.
|
||||||
|
if ((Desc.TSFlags & X86II::Op0Mask) == X86II::REP) MCE.emitByte(0xF3);
|
||||||
|
|
||||||
// Emit instruction prefixes if necessary
|
// Emit instruction prefixes if necessary
|
||||||
if (Desc.TSFlags & X86II::OpSize) MCE.emitByte(0x66);// Operand size...
|
if (Desc.TSFlags & X86II::OpSize) MCE.emitByte(0x66);// Operand size...
|
||||||
|
|
||||||
@ -477,6 +480,7 @@ void Emitter::emitInstruction(MachineInstr &MI) {
|
|||||||
case X86II::TB:
|
case X86II::TB:
|
||||||
MCE.emitByte(0x0F); // Two-byte opcode prefix
|
MCE.emitByte(0x0F); // Two-byte opcode prefix
|
||||||
break;
|
break;
|
||||||
|
case X86II::REP: break; // already handled.
|
||||||
case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
|
case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
|
||||||
case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
|
case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
|
||||||
MCE.emitByte(0xD8+
|
MCE.emitByte(0xD8+
|
||||||
|
@ -1157,6 +1157,7 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
|
|||||||
case Intrinsic::va_start:
|
case Intrinsic::va_start:
|
||||||
case Intrinsic::va_copy:
|
case Intrinsic::va_copy:
|
||||||
case Intrinsic::va_end:
|
case Intrinsic::va_end:
|
||||||
|
case Intrinsic::memcpy:
|
||||||
// We directly implement these intrinsics
|
// We directly implement these intrinsics
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -1188,6 +1189,58 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) {
|
|||||||
return;
|
return;
|
||||||
case Intrinsic::va_end: return; // Noop on X86
|
case Intrinsic::va_end: return; // Noop on X86
|
||||||
|
|
||||||
|
case Intrinsic::memcpy: {
|
||||||
|
assert(CI.getNumOperands() == 5 && "Illegal llvm.memcpy call!");
|
||||||
|
unsigned Align = 1;
|
||||||
|
if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) {
|
||||||
|
Align = AlignC->getRawValue();
|
||||||
|
if (Align == 0) Align = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Turn the byte code into # iterations
|
||||||
|
unsigned ByteReg = getReg(CI.getOperand(3));
|
||||||
|
unsigned CountReg;
|
||||||
|
|
||||||
|
switch (Align & 3) {
|
||||||
|
case 2: // WORD aligned
|
||||||
|
CountReg = makeAnotherReg(Type::IntTy);
|
||||||
|
BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(1);
|
||||||
|
break;
|
||||||
|
case 0: // DWORD aligned
|
||||||
|
CountReg = makeAnotherReg(Type::IntTy);
|
||||||
|
BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(2);
|
||||||
|
break;
|
||||||
|
case 1: // BYTE aligned
|
||||||
|
case 3: // BYTE aligned
|
||||||
|
CountReg = ByteReg;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// No matter what the alignment is, we put the source in ESI, the
|
||||||
|
// destination in EDI, and the count in ECX.
|
||||||
|
TmpReg1 = getReg(CI.getOperand(1));
|
||||||
|
TmpReg2 = getReg(CI.getOperand(2));
|
||||||
|
BuildMI(BB, X86::MOVrr32, 1, X86::ECX).addReg(CountReg);
|
||||||
|
BuildMI(BB, X86::MOVrr32, 1, X86::EDI).addReg(TmpReg1);
|
||||||
|
BuildMI(BB, X86::MOVrr32, 1, X86::ESI).addReg(TmpReg2);
|
||||||
|
|
||||||
|
unsigned Bytes = getReg(CI.getOperand(3));
|
||||||
|
switch (Align & 3) {
|
||||||
|
case 1: // BYTE aligned
|
||||||
|
case 3: // BYTE aligned
|
||||||
|
BuildMI(BB, X86::REP_MOVSB, 0);
|
||||||
|
break;
|
||||||
|
case 2: // WORD aligned
|
||||||
|
BuildMI(BB, X86::REP_MOVSW, 0);
|
||||||
|
break;
|
||||||
|
case 0: // DWORD aligned
|
||||||
|
BuildMI(BB, X86::REP_MOVSD, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
default: assert(0 && "Error: unknown intrinsics should have been lowered!");
|
default: assert(0 && "Error: unknown intrinsics should have been lowered!");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -86,9 +86,9 @@ namespace X86II {
|
|||||||
OpSize = 1 << 5,
|
OpSize = 1 << 5,
|
||||||
|
|
||||||
// Op0Mask - There are several prefix bytes that are used to form two byte
|
// Op0Mask - There are several prefix bytes that are used to form two byte
|
||||||
// opcodes. These are currently 0x0F, and 0xD8-0xDF. This mask is used to
|
// opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
|
||||||
// obtain the setting of this field. If no bits in this field is set, there
|
// used to obtain the setting of this field. If no bits in this field is
|
||||||
// is no prefix byte for obtaining a multibyte opcode.
|
// set, there is no prefix byte for obtaining a multibyte opcode.
|
||||||
//
|
//
|
||||||
Op0Shift = 6,
|
Op0Shift = 6,
|
||||||
Op0Mask = 0xF << Op0Shift,
|
Op0Mask = 0xF << Op0Shift,
|
||||||
@ -97,12 +97,16 @@ namespace X86II {
|
|||||||
// starts with a 0x0F byte before the real opcode.
|
// starts with a 0x0F byte before the real opcode.
|
||||||
TB = 1 << Op0Shift,
|
TB = 1 << Op0Shift,
|
||||||
|
|
||||||
|
// REP - The 0xF3 prefix byte indicating repetition of the following
|
||||||
|
// instruction.
|
||||||
|
REP = 2 << Op0Shift,
|
||||||
|
|
||||||
// D8-DF - These escape opcodes are used by the floating point unit. These
|
// D8-DF - These escape opcodes are used by the floating point unit. These
|
||||||
// values must remain sequential.
|
// values must remain sequential.
|
||||||
D8 = 2 << Op0Shift, D9 = 3 << Op0Shift,
|
D8 = 3 << Op0Shift, D9 = 4 << Op0Shift,
|
||||||
DA = 4 << Op0Shift, DB = 5 << Op0Shift,
|
DA = 5 << Op0Shift, DB = 6 << Op0Shift,
|
||||||
DC = 6 << Op0Shift, DD = 7 << Op0Shift,
|
DC = 7 << Op0Shift, DD = 8 << Op0Shift,
|
||||||
DE = 8 << Op0Shift, DF = 9 << Op0Shift,
|
DE = 9 << Op0Shift, DF = 10 << Op0Shift,
|
||||||
|
|
||||||
//===------------------------------------------------------------------===//
|
//===------------------------------------------------------------------===//
|
||||||
// This three-bit field describes the size of a memory operand. Zero is
|
// This three-bit field describes the size of a memory operand. Zero is
|
||||||
|
@ -92,14 +92,15 @@ class Pattern<dag P> {
|
|||||||
// emitter that various prefix bytes are required.
|
// emitter that various prefix bytes are required.
|
||||||
class OpSize { bit hasOpSizePrefix = 1; }
|
class OpSize { bit hasOpSizePrefix = 1; }
|
||||||
class TB { bits<4> Prefix = 1; }
|
class TB { bits<4> Prefix = 1; }
|
||||||
class D8 { bits<4> Prefix = 2; }
|
class REP { bits<4> Prefix = 2; }
|
||||||
class D9 { bits<4> Prefix = 3; }
|
class D8 { bits<4> Prefix = 3; }
|
||||||
class DA { bits<4> Prefix = 4; }
|
class D9 { bits<4> Prefix = 4; }
|
||||||
class DB { bits<4> Prefix = 5; }
|
class DA { bits<4> Prefix = 5; }
|
||||||
class DC { bits<4> Prefix = 6; }
|
class DB { bits<4> Prefix = 6; }
|
||||||
class DD { bits<4> Prefix = 7; }
|
class DC { bits<4> Prefix = 7; }
|
||||||
class DE { bits<4> Prefix = 8; }
|
class DD { bits<4> Prefix = 8; }
|
||||||
class DF { bits<4> Prefix = 9; }
|
class DE { bits<4> Prefix = 9; }
|
||||||
|
class DF { bits<4> Prefix = 10; }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -172,6 +173,14 @@ def XCHGrr32 : X86Inst<"xchg", 0x87, MRMDestReg, Arg32>; // xchg R32, R32
|
|||||||
def LEAr16 : X86Inst<"lea", 0x8D, MRMSrcMem, Arg16>, OpSize; // R16 = lea [mem]
|
def LEAr16 : X86Inst<"lea", 0x8D, MRMSrcMem, Arg16>, OpSize; // R16 = lea [mem]
|
||||||
def LEAr32 : X86Inst<"lea", 0x8D, MRMSrcMem, Arg32>; // R32 = lea [mem]
|
def LEAr32 : X86Inst<"lea", 0x8D, MRMSrcMem, Arg32>; // R32 = lea [mem]
|
||||||
|
|
||||||
|
|
||||||
|
def REP_MOVSB : X86Inst<"rep movsb", 0xA4, RawFrm, NoArg>, REP,
|
||||||
|
Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>;
|
||||||
|
def REP_MOVSW : X86Inst<"rep movsw", 0xA5, RawFrm, NoArg>, REP, OpSize,
|
||||||
|
Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>;
|
||||||
|
def REP_MOVSD : X86Inst<"rep movsd", 0xA5, RawFrm, NoArg>, REP,
|
||||||
|
Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Move Instructions...
|
// Move Instructions...
|
||||||
//
|
//
|
||||||
|
Loading…
x
Reference in New Issue
Block a user