mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-23 22:00:10 +00:00
[X86][AsmParser] Reapply "Refactor code and optimize more instructions from VEX3 to VEX2"
This was reverted in d4994d0e79
b/c a bolt test failed after the
encoding changed.
Relanded the patch with the updated test.
This commit is contained in:
parent
36b702901a
commit
db39d47928
@ -11,7 +11,7 @@
|
||||
# CHECK-NEXT: DW_CFA_expression: RBP DW_OP_breg6 RBP+0
|
||||
# CHECK-NEXT: DW_CFA_advance_loc: 5
|
||||
# CHECK-NEXT: DW_CFA_def_cfa_expression: DW_OP_breg6 RBP-8, DW_OP_deref
|
||||
# CHECK-NEXT: DW_CFA_advance_loc2: 3174
|
||||
# CHECK-NEXT: DW_CFA_advance_loc2: 3130
|
||||
# CHECK-NEXT: DW_CFA_def_cfa: R10 +0
|
||||
# CHECK-NEXT: DW_CFA_advance_loc: 5
|
||||
# CHECK-NEXT: DW_CFA_def_cfa: RSP +8
|
||||
|
@ -7,6 +7,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "MCTargetDesc/X86BaseInfo.h"
|
||||
#include "MCTargetDesc/X86EncodingOptimization.h"
|
||||
#include "MCTargetDesc/X86IntelInstPrinter.h"
|
||||
#include "MCTargetDesc/X86MCExpr.h"
|
||||
#include "MCTargetDesc/X86MCTargetDesc.h"
|
||||
@ -3633,7 +3634,12 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
|
||||
}
|
||||
|
||||
bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
|
||||
const MCRegisterInfo *MRI = getContext().getRegisterInfo();
|
||||
if (ForcedVEXEncoding != VEXEncoding_VEX3 &&
|
||||
X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode())))
|
||||
return true;
|
||||
|
||||
if (X86::optimizeShiftRotateWithImmediateOne(Inst))
|
||||
return true;
|
||||
|
||||
switch (Inst.getOpcode()) {
|
||||
default: return false;
|
||||
@ -3657,178 +3663,13 @@ bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
|
||||
}
|
||||
|
||||
return false;
|
||||
case X86::VMOVZPQILo2PQIrr:
|
||||
case X86::VMOVAPDrr:
|
||||
case X86::VMOVAPDYrr:
|
||||
case X86::VMOVAPSrr:
|
||||
case X86::VMOVAPSYrr:
|
||||
case X86::VMOVDQArr:
|
||||
case X86::VMOVDQAYrr:
|
||||
case X86::VMOVDQUrr:
|
||||
case X86::VMOVDQUYrr:
|
||||
case X86::VMOVUPDrr:
|
||||
case X86::VMOVUPDYrr:
|
||||
case X86::VMOVUPSrr:
|
||||
case X86::VMOVUPSYrr: {
|
||||
// We can get a smaller encoding by using VEX.R instead of VEX.B if one of
|
||||
// the registers is extended, but other isn't.
|
||||
if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
|
||||
MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
|
||||
MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8)
|
||||
return false;
|
||||
|
||||
unsigned NewOpc;
|
||||
switch (Inst.getOpcode()) {
|
||||
default: llvm_unreachable("Invalid opcode");
|
||||
case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
|
||||
case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
|
||||
case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
|
||||
case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
|
||||
case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
|
||||
case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
|
||||
case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
|
||||
case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
|
||||
case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
|
||||
case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
|
||||
case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
|
||||
case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
|
||||
case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
|
||||
}
|
||||
Inst.setOpcode(NewOpc);
|
||||
return true;
|
||||
}
|
||||
case X86::VMOVSDrr:
|
||||
case X86::VMOVSSrr: {
|
||||
// We can get a smaller encoding by using VEX.R instead of VEX.B if one of
|
||||
// the registers is extended, but other isn't.
|
||||
if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
|
||||
MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
|
||||
MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8)
|
||||
return false;
|
||||
|
||||
unsigned NewOpc;
|
||||
switch (Inst.getOpcode()) {
|
||||
default: llvm_unreachable("Invalid opcode");
|
||||
case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
|
||||
case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
|
||||
}
|
||||
Inst.setOpcode(NewOpc);
|
||||
return true;
|
||||
}
|
||||
case X86::RCR8ri: case X86::RCR16ri: case X86::RCR32ri: case X86::RCR64ri:
|
||||
case X86::RCL8ri: case X86::RCL16ri: case X86::RCL32ri: case X86::RCL64ri:
|
||||
case X86::ROR8ri: case X86::ROR16ri: case X86::ROR32ri: case X86::ROR64ri:
|
||||
case X86::ROL8ri: case X86::ROL16ri: case X86::ROL32ri: case X86::ROL64ri:
|
||||
case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri: case X86::SAR64ri:
|
||||
case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri: case X86::SHR64ri:
|
||||
case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri: case X86::SHL64ri: {
|
||||
// Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
|
||||
// FIXME: It would be great if we could just do this with an InstAlias.
|
||||
if (!Inst.getOperand(2).isImm() || Inst.getOperand(2).getImm() != 1)
|
||||
return false;
|
||||
|
||||
unsigned NewOpc;
|
||||
switch (Inst.getOpcode()) {
|
||||
default: llvm_unreachable("Invalid opcode");
|
||||
case X86::RCR8ri: NewOpc = X86::RCR8r1; break;
|
||||
case X86::RCR16ri: NewOpc = X86::RCR16r1; break;
|
||||
case X86::RCR32ri: NewOpc = X86::RCR32r1; break;
|
||||
case X86::RCR64ri: NewOpc = X86::RCR64r1; break;
|
||||
case X86::RCL8ri: NewOpc = X86::RCL8r1; break;
|
||||
case X86::RCL16ri: NewOpc = X86::RCL16r1; break;
|
||||
case X86::RCL32ri: NewOpc = X86::RCL32r1; break;
|
||||
case X86::RCL64ri: NewOpc = X86::RCL64r1; break;
|
||||
case X86::ROR8ri: NewOpc = X86::ROR8r1; break;
|
||||
case X86::ROR16ri: NewOpc = X86::ROR16r1; break;
|
||||
case X86::ROR32ri: NewOpc = X86::ROR32r1; break;
|
||||
case X86::ROR64ri: NewOpc = X86::ROR64r1; break;
|
||||
case X86::ROL8ri: NewOpc = X86::ROL8r1; break;
|
||||
case X86::ROL16ri: NewOpc = X86::ROL16r1; break;
|
||||
case X86::ROL32ri: NewOpc = X86::ROL32r1; break;
|
||||
case X86::ROL64ri: NewOpc = X86::ROL64r1; break;
|
||||
case X86::SAR8ri: NewOpc = X86::SAR8r1; break;
|
||||
case X86::SAR16ri: NewOpc = X86::SAR16r1; break;
|
||||
case X86::SAR32ri: NewOpc = X86::SAR32r1; break;
|
||||
case X86::SAR64ri: NewOpc = X86::SAR64r1; break;
|
||||
case X86::SHR8ri: NewOpc = X86::SHR8r1; break;
|
||||
case X86::SHR16ri: NewOpc = X86::SHR16r1; break;
|
||||
case X86::SHR32ri: NewOpc = X86::SHR32r1; break;
|
||||
case X86::SHR64ri: NewOpc = X86::SHR64r1; break;
|
||||
case X86::SHL8ri: NewOpc = X86::SHL8r1; break;
|
||||
case X86::SHL16ri: NewOpc = X86::SHL16r1; break;
|
||||
case X86::SHL32ri: NewOpc = X86::SHL32r1; break;
|
||||
case X86::SHL64ri: NewOpc = X86::SHL64r1; break;
|
||||
}
|
||||
|
||||
MCInst TmpInst;
|
||||
TmpInst.setOpcode(NewOpc);
|
||||
TmpInst.addOperand(Inst.getOperand(0));
|
||||
TmpInst.addOperand(Inst.getOperand(1));
|
||||
Inst = TmpInst;
|
||||
return true;
|
||||
}
|
||||
case X86::RCR8mi: case X86::RCR16mi: case X86::RCR32mi: case X86::RCR64mi:
|
||||
case X86::RCL8mi: case X86::RCL16mi: case X86::RCL32mi: case X86::RCL64mi:
|
||||
case X86::ROR8mi: case X86::ROR16mi: case X86::ROR32mi: case X86::ROR64mi:
|
||||
case X86::ROL8mi: case X86::ROL16mi: case X86::ROL32mi: case X86::ROL64mi:
|
||||
case X86::SAR8mi: case X86::SAR16mi: case X86::SAR32mi: case X86::SAR64mi:
|
||||
case X86::SHR8mi: case X86::SHR16mi: case X86::SHR32mi: case X86::SHR64mi:
|
||||
case X86::SHL8mi: case X86::SHL16mi: case X86::SHL32mi: case X86::SHL64mi: {
|
||||
// Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
|
||||
// FIXME: It would be great if we could just do this with an InstAlias.
|
||||
if (!Inst.getOperand(X86::AddrNumOperands).isImm() ||
|
||||
Inst.getOperand(X86::AddrNumOperands).getImm() != 1)
|
||||
return false;
|
||||
|
||||
unsigned NewOpc;
|
||||
switch (Inst.getOpcode()) {
|
||||
default: llvm_unreachable("Invalid opcode");
|
||||
case X86::RCR8mi: NewOpc = X86::RCR8m1; break;
|
||||
case X86::RCR16mi: NewOpc = X86::RCR16m1; break;
|
||||
case X86::RCR32mi: NewOpc = X86::RCR32m1; break;
|
||||
case X86::RCR64mi: NewOpc = X86::RCR64m1; break;
|
||||
case X86::RCL8mi: NewOpc = X86::RCL8m1; break;
|
||||
case X86::RCL16mi: NewOpc = X86::RCL16m1; break;
|
||||
case X86::RCL32mi: NewOpc = X86::RCL32m1; break;
|
||||
case X86::RCL64mi: NewOpc = X86::RCL64m1; break;
|
||||
case X86::ROR8mi: NewOpc = X86::ROR8m1; break;
|
||||
case X86::ROR16mi: NewOpc = X86::ROR16m1; break;
|
||||
case X86::ROR32mi: NewOpc = X86::ROR32m1; break;
|
||||
case X86::ROR64mi: NewOpc = X86::ROR64m1; break;
|
||||
case X86::ROL8mi: NewOpc = X86::ROL8m1; break;
|
||||
case X86::ROL16mi: NewOpc = X86::ROL16m1; break;
|
||||
case X86::ROL32mi: NewOpc = X86::ROL32m1; break;
|
||||
case X86::ROL64mi: NewOpc = X86::ROL64m1; break;
|
||||
case X86::SAR8mi: NewOpc = X86::SAR8m1; break;
|
||||
case X86::SAR16mi: NewOpc = X86::SAR16m1; break;
|
||||
case X86::SAR32mi: NewOpc = X86::SAR32m1; break;
|
||||
case X86::SAR64mi: NewOpc = X86::SAR64m1; break;
|
||||
case X86::SHR8mi: NewOpc = X86::SHR8m1; break;
|
||||
case X86::SHR16mi: NewOpc = X86::SHR16m1; break;
|
||||
case X86::SHR32mi: NewOpc = X86::SHR32m1; break;
|
||||
case X86::SHR64mi: NewOpc = X86::SHR64m1; break;
|
||||
case X86::SHL8mi: NewOpc = X86::SHL8m1; break;
|
||||
case X86::SHL16mi: NewOpc = X86::SHL16m1; break;
|
||||
case X86::SHL32mi: NewOpc = X86::SHL32m1; break;
|
||||
case X86::SHL64mi: NewOpc = X86::SHL64m1; break;
|
||||
}
|
||||
|
||||
MCInst TmpInst;
|
||||
TmpInst.setOpcode(NewOpc);
|
||||
for (int i = 0; i != X86::AddrNumOperands; ++i)
|
||||
TmpInst.addOperand(Inst.getOperand(i));
|
||||
Inst = TmpInst;
|
||||
return true;
|
||||
}
|
||||
case X86::INT: {
|
||||
// Transforms "int $3" into "int3" as a size optimization. We can't write an
|
||||
// instalias with an immediate operand yet.
|
||||
// Transforms "int $3" into "int3" as a size optimization.
|
||||
// We can't write this as an InstAlias.
|
||||
if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
|
||||
return false;
|
||||
|
||||
MCInst TmpInst;
|
||||
TmpInst.setOpcode(X86::INT3);
|
||||
Inst = TmpInst;
|
||||
Inst.clear();
|
||||
Inst.setOpcode(X86::INT3);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ add_llvm_component_library(LLVMX86Desc
|
||||
X86InstComments.cpp
|
||||
X86InstPrinterCommon.cpp
|
||||
X86InstrRelaxTables.cpp
|
||||
X86EncodingOptimization.cpp
|
||||
X86ShuffleDecode.cpp
|
||||
X86AsmBackend.cpp
|
||||
X86MCTargetDesc.cpp
|
||||
|
159
llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp
Normal file
159
llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp
Normal file
@ -0,0 +1,159 @@
|
||||
//===-- X86EncodingOptimization.cpp - X86 Encoding optimization -*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the implementation of the X86 encoding optimization
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "X86EncodingOptimization.h"
|
||||
#include "X86BaseInfo.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCInstrDesc.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static bool shouldExchange(const MCInst &MI, unsigned OpIdx1, unsigned OpIdx2) {
|
||||
return !X86II::isX86_64ExtendedReg(MI.getOperand(OpIdx1).getReg()) &&
|
||||
X86II::isX86_64ExtendedReg(MI.getOperand(OpIdx2).getReg());
|
||||
}
|
||||
|
||||
bool X86::optimizeInstFromVEX3ToVEX2(MCInst &MI, const MCInstrDesc &Desc) {
|
||||
unsigned OpIdx1, OpIdx2;
|
||||
unsigned NewOpc;
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
#define FROM_TO(FROM, TO, IDX1, IDX2) \
|
||||
case X86::FROM: \
|
||||
NewOpc = X86::TO; \
|
||||
OpIdx1 = IDX1; \
|
||||
OpIdx2 = IDX2; \
|
||||
break;
|
||||
#define TO_REV(FROM) FROM_TO(FROM, FROM##_REV, 0, 1)
|
||||
switch (MI.getOpcode()) {
|
||||
default: {
|
||||
// If the instruction is a commutable arithmetic instruction we might be
|
||||
// able to commute the operands to get a 2 byte VEX prefix.
|
||||
uint64_t TSFlags = Desc.TSFlags;
|
||||
if (!Desc.isCommutable() || (TSFlags & X86II::EncodingMask) != X86II::VEX ||
|
||||
(TSFlags & X86II::OpMapMask) != X86II::TB ||
|
||||
(TSFlags & X86II::FormMask) != X86II::MRMSrcReg ||
|
||||
(TSFlags & X86II::REX_W) || !(TSFlags & X86II::VEX_4V) ||
|
||||
MI.getNumOperands() != 3)
|
||||
return false;
|
||||
// These two are not truly commutable.
|
||||
if (Opcode == X86::VMOVHLPSrr || Opcode == X86::VUNPCKHPDrr)
|
||||
return false;
|
||||
OpIdx1 = 1;
|
||||
OpIdx2 = 2;
|
||||
if (!shouldExchange(MI, OpIdx1, OpIdx2))
|
||||
return false;
|
||||
std::swap(MI.getOperand(OpIdx1), MI.getOperand(OpIdx2));
|
||||
return true;
|
||||
}
|
||||
// Commute operands to get a smaller encoding by using VEX.R instead of
|
||||
// VEX.B if one of the registers is extended, but other isn't.
|
||||
FROM_TO(VMOVZPQILo2PQIrr, VMOVPQI2QIrr, 0, 1)
|
||||
TO_REV(VMOVAPDrr)
|
||||
TO_REV(VMOVAPDYrr)
|
||||
TO_REV(VMOVAPSrr)
|
||||
TO_REV(VMOVAPSYrr)
|
||||
TO_REV(VMOVDQArr)
|
||||
TO_REV(VMOVDQAYrr)
|
||||
TO_REV(VMOVDQUrr)
|
||||
TO_REV(VMOVDQUYrr)
|
||||
TO_REV(VMOVUPDrr)
|
||||
TO_REV(VMOVUPDYrr)
|
||||
TO_REV(VMOVUPSrr)
|
||||
TO_REV(VMOVUPSYrr)
|
||||
#undef TO_REV
|
||||
#define TO_REV(FROM) FROM_TO(FROM, FROM##_REV, 0, 2)
|
||||
TO_REV(VMOVSDrr)
|
||||
TO_REV(VMOVSSrr)
|
||||
#undef TO_REV
|
||||
#undef FROM_TO
|
||||
}
|
||||
if (!shouldExchange(MI, OpIdx1, OpIdx2))
|
||||
return false;
|
||||
MI.setOpcode(NewOpc);
|
||||
return true;
|
||||
}
|
||||
|
||||
// NOTE: We may write this as an InstAlias if it's only used by AsmParser. See
|
||||
// validateTargetOperandClass.
|
||||
bool X86::optimizeShiftRotateWithImmediateOne(MCInst &MI) {
|
||||
unsigned NewOpc;
|
||||
#define TO_IMM1(FROM) \
|
||||
case X86::FROM##i: \
|
||||
NewOpc = X86::FROM##1; \
|
||||
break;
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
TO_IMM1(RCR8r)
|
||||
TO_IMM1(RCR16r)
|
||||
TO_IMM1(RCR32r)
|
||||
TO_IMM1(RCR64r)
|
||||
TO_IMM1(RCL8r)
|
||||
TO_IMM1(RCL16r)
|
||||
TO_IMM1(RCL32r)
|
||||
TO_IMM1(RCL64r)
|
||||
TO_IMM1(ROR8r)
|
||||
TO_IMM1(ROR16r)
|
||||
TO_IMM1(ROR32r)
|
||||
TO_IMM1(ROR64r)
|
||||
TO_IMM1(ROL8r)
|
||||
TO_IMM1(ROL16r)
|
||||
TO_IMM1(ROL32r)
|
||||
TO_IMM1(ROL64r)
|
||||
TO_IMM1(SAR8r)
|
||||
TO_IMM1(SAR16r)
|
||||
TO_IMM1(SAR32r)
|
||||
TO_IMM1(SAR64r)
|
||||
TO_IMM1(SHR8r)
|
||||
TO_IMM1(SHR16r)
|
||||
TO_IMM1(SHR32r)
|
||||
TO_IMM1(SHR64r)
|
||||
TO_IMM1(SHL8r)
|
||||
TO_IMM1(SHL16r)
|
||||
TO_IMM1(SHL32r)
|
||||
TO_IMM1(SHL64r)
|
||||
TO_IMM1(RCR8m)
|
||||
TO_IMM1(RCR16m)
|
||||
TO_IMM1(RCR32m)
|
||||
TO_IMM1(RCR64m)
|
||||
TO_IMM1(RCL8m)
|
||||
TO_IMM1(RCL16m)
|
||||
TO_IMM1(RCL32m)
|
||||
TO_IMM1(RCL64m)
|
||||
TO_IMM1(ROR8m)
|
||||
TO_IMM1(ROR16m)
|
||||
TO_IMM1(ROR32m)
|
||||
TO_IMM1(ROR64m)
|
||||
TO_IMM1(ROL8m)
|
||||
TO_IMM1(ROL16m)
|
||||
TO_IMM1(ROL32m)
|
||||
TO_IMM1(ROL64m)
|
||||
TO_IMM1(SAR8m)
|
||||
TO_IMM1(SAR16m)
|
||||
TO_IMM1(SAR32m)
|
||||
TO_IMM1(SAR64m)
|
||||
TO_IMM1(SHR8m)
|
||||
TO_IMM1(SHR16m)
|
||||
TO_IMM1(SHR32m)
|
||||
TO_IMM1(SHR64m)
|
||||
TO_IMM1(SHL8m)
|
||||
TO_IMM1(SHL16m)
|
||||
TO_IMM1(SHL32m)
|
||||
TO_IMM1(SHL64m)
|
||||
}
|
||||
MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1);
|
||||
if (!LastOp.isImm() || LastOp.getImm() != 1)
|
||||
return false;
|
||||
MI.setOpcode(NewOpc);
|
||||
MI.erase(&LastOp);
|
||||
return true;
|
||||
}
|
23
llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h
Normal file
23
llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h
Normal file
@ -0,0 +1,23 @@
|
||||
//===-- X86EncodingOptimization.h - X86 Encoding optimization ---*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the declarations of the X86 encoding optimization
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_X86_X86ENCODINGOPTIMIZATION_H
|
||||
#define LLVM_LIB_TARGET_X86_X86ENCODINGOPTIMIZATION_H
|
||||
namespace llvm {
|
||||
class MCInst;
|
||||
class MCInstrDesc;
|
||||
namespace X86 {
|
||||
bool optimizeInstFromVEX3ToVEX2(MCInst &MI, const MCInstrDesc &Desc);
|
||||
bool optimizeShiftRotateWithImmediateOne(MCInst &MI);
|
||||
} // namespace X86
|
||||
} // namespace llvm
|
||||
#endif
|
@ -551,34 +551,6 @@ def : InstAlias<"shrd{w}\t{$reg, $mem|$mem, $reg}", (SHRD16mrCL i16mem:$mem, GR1
|
||||
def : InstAlias<"shrd{l}\t{$reg, $mem|$mem, $reg}", (SHRD32mrCL i32mem:$mem, GR32:$reg), 0>;
|
||||
def : InstAlias<"shrd{q}\t{$reg, $mem|$mem, $reg}", (SHRD64mrCL i64mem:$mem, GR64:$reg), 0>;
|
||||
|
||||
/* FIXME: This is disabled because the asm matcher is currently incapable of
|
||||
* matching a fixed immediate like $1.
|
||||
// "shl X, $1" is an alias for "shl X".
|
||||
multiclass ShiftRotateByOneAlias<string Mnemonic, string Opc> {
|
||||
def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
|
||||
(!cast<Instruction>(!strconcat(Opc, "8r1")) GR8:$op)>;
|
||||
def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
|
||||
(!cast<Instruction>(!strconcat(Opc, "16r1")) GR16:$op)>;
|
||||
def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
|
||||
(!cast<Instruction>(!strconcat(Opc, "32r1")) GR32:$op)>;
|
||||
def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
|
||||
(!cast<Instruction>(!strconcat(Opc, "64r1")) GR64:$op)>;
|
||||
def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
|
||||
(!cast<Instruction>(!strconcat(Opc, "8m1")) i8mem:$op)>;
|
||||
def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
|
||||
(!cast<Instruction>(!strconcat(Opc, "16m1")) i16mem:$op)>;
|
||||
def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
|
||||
(!cast<Instruction>(!strconcat(Opc, "32m1")) i32mem:$op)>;
|
||||
def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
|
||||
(!cast<Instruction>(!strconcat(Opc, "64m1")) i64mem:$op)>;
|
||||
}
|
||||
|
||||
defm : ShiftRotateByOneAlias<"rcl", "RCL">;
|
||||
defm : ShiftRotateByOneAlias<"rcr", "RCR">;
|
||||
defm : ShiftRotateByOneAlias<"rol", "ROL">;
|
||||
defm : ShiftRotateByOneAlias<"ror", "ROR">;
|
||||
FIXME */
|
||||
|
||||
// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms.
|
||||
def : InstAlias<"test{b}\t{$mem, $val|$val, $mem}",
|
||||
(TEST8mr i8mem :$mem, GR8 :$val), 0>;
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include "MCTargetDesc/X86ATTInstPrinter.h"
|
||||
#include "MCTargetDesc/X86BaseInfo.h"
|
||||
#include "MCTargetDesc/X86EncodingOptimization.h"
|
||||
#include "MCTargetDesc/X86InstComments.h"
|
||||
#include "MCTargetDesc/X86ShuffleDecode.h"
|
||||
#include "MCTargetDesc/X86TargetStreamer.h"
|
||||
@ -501,6 +502,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
|
||||
if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
|
||||
OutMI.addOperand(*MaybeMCOp);
|
||||
|
||||
if (X86::optimizeInstFromVEX3ToVEX2(OutMI, MI->getDesc()))
|
||||
return;
|
||||
|
||||
// Handle a few special cases to eliminate operand modifiers.
|
||||
switch (OutMI.getOpcode()) {
|
||||
case X86::LEA64_32r:
|
||||
@ -534,59 +538,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
|
||||
break;
|
||||
}
|
||||
|
||||
// Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
|
||||
// if one of the registers is extended, but other isn't.
|
||||
case X86::VMOVZPQILo2PQIrr:
|
||||
case X86::VMOVAPDrr:
|
||||
case X86::VMOVAPDYrr:
|
||||
case X86::VMOVAPSrr:
|
||||
case X86::VMOVAPSYrr:
|
||||
case X86::VMOVDQArr:
|
||||
case X86::VMOVDQAYrr:
|
||||
case X86::VMOVDQUrr:
|
||||
case X86::VMOVDQUYrr:
|
||||
case X86::VMOVUPDrr:
|
||||
case X86::VMOVUPDYrr:
|
||||
case X86::VMOVUPSrr:
|
||||
case X86::VMOVUPSYrr: {
|
||||
if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
|
||||
X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
|
||||
unsigned NewOpc;
|
||||
switch (OutMI.getOpcode()) {
|
||||
default: llvm_unreachable("Invalid opcode");
|
||||
case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
|
||||
case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
|
||||
case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
|
||||
case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
|
||||
case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
|
||||
case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
|
||||
case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
|
||||
case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
|
||||
case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
|
||||
case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
|
||||
case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
|
||||
case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
|
||||
case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
|
||||
}
|
||||
OutMI.setOpcode(NewOpc);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case X86::VMOVSDrr:
|
||||
case X86::VMOVSSrr: {
|
||||
if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
|
||||
X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
|
||||
unsigned NewOpc;
|
||||
switch (OutMI.getOpcode()) {
|
||||
default: llvm_unreachable("Invalid opcode");
|
||||
case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
|
||||
case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
|
||||
}
|
||||
OutMI.setOpcode(NewOpc);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case X86::VPCMPBZ128rmi: case X86::VPCMPBZ128rmik:
|
||||
case X86::VPCMPBZ128rri: case X86::VPCMPBZ128rrik:
|
||||
case X86::VPCMPBZ256rmi: case X86::VPCMPBZ256rmik:
|
||||
@ -954,12 +905,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case X86::VMOVHLPSrr:
|
||||
case X86::VUNPCKHPDrr:
|
||||
// These are not truly commutable so hide them from the default case.
|
||||
break;
|
||||
|
||||
case X86::MASKMOVDQU:
|
||||
case X86::VMASKMOVDQU:
|
||||
if (AsmPrinter.getSubtarget().is64Bit())
|
||||
@ -967,19 +912,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
|
||||
break;
|
||||
|
||||
default: {
|
||||
// If the instruction is a commutable arithmetic instruction we might be
|
||||
// able to commute the operands to get a 2 byte VEX prefix.
|
||||
uint64_t TSFlags = MI->getDesc().TSFlags;
|
||||
if (MI->getDesc().isCommutable() &&
|
||||
(TSFlags & X86II::EncodingMask) == X86II::VEX &&
|
||||
(TSFlags & X86II::OpMapMask) == X86II::TB &&
|
||||
(TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
|
||||
!(TSFlags & X86II::REX_W) && (TSFlags & X86II::VEX_4V) &&
|
||||
OutMI.getNumOperands() == 3) {
|
||||
if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
|
||||
X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
|
||||
std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
|
||||
}
|
||||
// Add an REP prefix to BSF instructions so that new processors can
|
||||
// recognize as TZCNT, which has better performance than BSF.
|
||||
if (X86::isBSF(OutMI.getOpcode()) && !MF.getFunction().hasOptSize()) {
|
||||
|
@ -3168,20 +3168,20 @@ vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11
|
||||
// CHECK: encoding: [0xc4,0xc1,0x5d,0x5e,0xf4]
|
||||
vdivpd %ymm12, %ymm4, %ymm6
|
||||
|
||||
// CHECK: vaddps %ymm12, %ymm4, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xc1,0x5c,0x58,0xf4]
|
||||
// CHECK: vaddps %ymm4, %ymm12, %ymm6
|
||||
// CHECK: encoding: [0xc5,0x9c,0x58,0xf4]
|
||||
vaddps %ymm12, %ymm4, %ymm6
|
||||
|
||||
// CHECK: vaddpd %ymm12, %ymm4, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xc1,0x5d,0x58,0xf4]
|
||||
// CHECK: vaddpd %ymm4, %ymm12, %ymm6
|
||||
// CHECK: encoding: [0xc5,0x9d,0x58,0xf4]
|
||||
vaddpd %ymm12, %ymm4, %ymm6
|
||||
|
||||
// CHECK: vmulps %ymm12, %ymm4, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xc1,0x5c,0x59,0xf4]
|
||||
// CHECK: vmulps %ymm4, %ymm12, %ymm6
|
||||
// CHECK: encoding: [0xc5,0x9c,0x59,0xf4]
|
||||
vmulps %ymm12, %ymm4, %ymm6
|
||||
|
||||
// CHECK: vmulpd %ymm12, %ymm4, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xc1,0x5d,0x59,0xf4]
|
||||
// CHECK: vmulpd %ymm4, %ymm12, %ymm6
|
||||
// CHECK: encoding: [0xc5,0x9d,0x59,0xf4]
|
||||
vmulpd %ymm12, %ymm4, %ymm6
|
||||
|
||||
// CHECK: vmaxps (%rax), %ymm4, %ymm6
|
||||
|
@ -51,7 +51,7 @@
|
||||
# NORMAL-NEXT: 1 1 0.50 vpslldq $12, %xmm3, %xmm5
|
||||
# NORMAL-NEXT: 1 3 1.00 vaddps %xmm4, %xmm5, %xmm7
|
||||
# NORMAL-NEXT: 1 3 1.00 vaddps %xmm6, %xmm7, %xmm8
|
||||
# NORMAL-NEXT: 1 3 1.00 vaddps %xmm8, %xmm0, %xmm9
|
||||
# NORMAL-NEXT: 1 3 1.00 vaddps %xmm0, %xmm8, %xmm9
|
||||
# NORMAL-NEXT: 1 1 0.50 vshufps $255, %xmm9, %xmm9, %xmm0
|
||||
# NORMAL-NEXT: 1 1 1.00 * vmovups %xmm9, (%r11,%r9,4)
|
||||
# NORMAL-NEXT: 1 1 0.50 cmpl %r8d, %esi
|
||||
@ -70,7 +70,7 @@
|
||||
# WITHENCODINGS-NEXT: 1 1 0.50 5 c5 d1 73 fb 0c vpslldq $12, %xmm3, %xmm5
|
||||
# WITHENCODINGS-NEXT: 1 3 1.00 4 c5 d0 58 fc vaddps %xmm4, %xmm5, %xmm7
|
||||
# WITHENCODINGS-NEXT: 1 3 1.00 4 c5 40 58 c6 vaddps %xmm6, %xmm7, %xmm8
|
||||
# WITHENCODINGS-NEXT: 1 3 1.00 5 c4 41 78 58 c8 vaddps %xmm8, %xmm0, %xmm9
|
||||
# WITHENCODINGS-NEXT: 1 3 1.00 4 c5 38 58 c8 vaddps %xmm0, %xmm8, %xmm9
|
||||
# WITHENCODINGS-NEXT: 1 1 0.50 6 c4 c1 30 c6 c1 ff vshufps $255, %xmm9, %xmm9, %xmm0
|
||||
# WITHENCODINGS-NEXT: 1 1 1.00 * 6 c4 01 78 11 0c 8b vmovups %xmm9, (%r11,%r9,4)
|
||||
# WITHENCODINGS-NEXT: 1 1 0.50 3 44 39 c6 cmpl %r8d, %esi
|
||||
|
Loading…
Reference in New Issue
Block a user