[x86] invert logic for attribute 'FeatureFastUAMem'

This is a 'no functional change intended' patch. It removes one FIXME, but adds several more.

Motivation: the FeatureFastUAMem attribute may be too general. It is used to determine if any
sized misaligned memory access under 32-bytes is 'fast'. From the added FIXME comments, however,
you can see that we're not consistent about this. Changing the name of the attribute makes it
clearer to see the logic holes.

Changing this to a 'slow' attribute also means we don't have to add an explicit 'fast' attribute
to new chips; fast unaligned accesses have been standard for several generations of CPUs now.

Differential Revision: http://reviews.llvm.org/D12154



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245729 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Sanjay Patel 2015-08-21 20:17:26 +00:00
parent 81e467d352
commit 2071d7abd9
5 changed files with 98 additions and 89 deletions

View File

@ -79,16 +79,12 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
"Bit testing of memory is slow">; "Bit testing of memory is slow">;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">; "SHLD instruction is slow">;
// FIXME: This is a 16-byte (SSE/AVX) feature; we should rename it to make that def FeatureSlowUAMem : SubtargetFeature<"slow-unaligned-mem-under-32",
// explicit. Also, it seems this would be the default state for most chips "IsUAMemUnder32Slow", "true",
// going forward, so it would probably be better to negate the logic and "Slow unaligned 16-byte-or-less memory access">;
// match the 32-byte "slow mem" feature below.
def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
"IsUAMemFast", "true",
"Fast unaligned memory access">;
def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
"IsUAMem32Slow", "true", "IsUAMem32Slow", "true",
"Slow unaligned 32-byte memory access">; "Slow unaligned 32-byte memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions", "Support SSE 4a instructions",
[FeatureSSE3]>; [FeatureSSE3]>;
@ -213,38 +209,42 @@ def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
class Proc<string Name, list<SubtargetFeature> Features> class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>; : ProcessorModel<Name, GenericModel, Features>;
def : Proc<"generic", []>; def : Proc<"generic", [FeatureSlowUAMem]>;
def : Proc<"i386", []>; def : Proc<"i386", [FeatureSlowUAMem]>;
def : Proc<"i486", []>; def : Proc<"i486", [FeatureSlowUAMem]>;
def : Proc<"i586", []>; def : Proc<"i586", [FeatureSlowUAMem]>;
def : Proc<"pentium", []>; def : Proc<"pentium", [FeatureSlowUAMem]>;
def : Proc<"pentium-mmx", [FeatureMMX]>; def : Proc<"pentium-mmx", [FeatureSlowUAMem, FeatureMMX]>;
def : Proc<"i686", []>; def : Proc<"i686", [FeatureSlowUAMem]>;
def : Proc<"pentiumpro", [FeatureCMOV]>; def : Proc<"pentiumpro", [FeatureSlowUAMem, FeatureCMOV]>;
def : Proc<"pentium2", [FeatureMMX, FeatureCMOV]>; def : Proc<"pentium2", [FeatureSlowUAMem, FeatureMMX, FeatureCMOV]>;
def : Proc<"pentium3", [FeatureSSE1]>; def : Proc<"pentium3", [FeatureSlowUAMem, FeatureSSE1]>;
def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>; def : Proc<"pentium3m", [FeatureSlowUAMem, FeatureSSE1, FeatureSlowBTMem]>;
def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>; def : Proc<"pentium-m", [FeatureSlowUAMem, FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSSE2]>; def : Proc<"pentium4", [FeatureSlowUAMem, FeatureSSE2]>;
def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>; def : Proc<"pentium4m", [FeatureSlowUAMem, FeatureSSE2, FeatureSlowBTMem]>;
// Intel Core Duo. // Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel, def : ProcessorModel<"yonah", SandyBridgeModel,
[FeatureSSE3, FeatureSlowBTMem]>; [FeatureSlowUAMem, FeatureSSE3, FeatureSlowBTMem]>;
// NetBurst. // NetBurst.
def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>; def : Proc<"prescott", [FeatureSlowUAMem, FeatureSSE3, FeatureSlowBTMem]>;
def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : Proc<"nocona", [FeatureSlowUAMem, FeatureSSE3, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
// Intel Core 2 Solo/Duo. // Intel Core 2 Solo/Duo.
def : ProcessorModel<"core2", SandyBridgeModel, def : ProcessorModel<"core2", SandyBridgeModel,
[FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>; [FeatureSlowUAMem, FeatureSSSE3, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : ProcessorModel<"penryn", SandyBridgeModel, def : ProcessorModel<"penryn", SandyBridgeModel,
[FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; [FeatureSlowUAMem, FeatureSSE41, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
// Atom CPUs. // Atom CPUs.
class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [ class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
ProcIntelAtom, ProcIntelAtom,
FeatureSlowUAMem,
FeatureSSSE3, FeatureSSSE3,
FeatureCMPXCHG16B, FeatureCMPXCHG16B,
FeatureMOVBE, FeatureMOVBE,
@ -272,8 +272,7 @@ class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
FeaturePRFCHW, FeaturePRFCHW,
FeatureSlowLEA, FeatureSlowLEA,
FeatureSlowIncDec, FeatureSlowIncDec,
FeatureSlowBTMem, FeatureSlowBTMem
FeatureFastUAMem
]>; ]>;
def : SilvermontProc<"silvermont">; def : SilvermontProc<"silvermont">;
def : SilvermontProc<"slm">; // Legacy alias. def : SilvermontProc<"slm">; // Legacy alias.
@ -283,7 +282,6 @@ class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureSSE42, FeatureSSE42,
FeatureCMPXCHG16B, FeatureCMPXCHG16B,
FeatureSlowBTMem, FeatureSlowBTMem,
FeatureFastUAMem,
FeaturePOPCNT FeaturePOPCNT
]>; ]>;
def : NehalemProc<"nehalem">; def : NehalemProc<"nehalem">;
@ -295,7 +293,6 @@ class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureSSE42, FeatureSSE42,
FeatureCMPXCHG16B, FeatureCMPXCHG16B,
FeatureSlowBTMem, FeatureSlowBTMem,
FeatureFastUAMem,
FeaturePOPCNT, FeaturePOPCNT,
FeatureAES, FeatureAES,
FeaturePCLMUL FeaturePCLMUL
@ -308,7 +305,6 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureAVX, FeatureAVX,
FeatureCMPXCHG16B, FeatureCMPXCHG16B,
FeatureSlowBTMem, FeatureSlowBTMem,
FeatureFastUAMem,
FeatureSlowUAMem32, FeatureSlowUAMem32,
FeaturePOPCNT, FeaturePOPCNT,
FeatureAES, FeatureAES,
@ -321,7 +317,6 @@ class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureAVX, FeatureAVX,
FeatureCMPXCHG16B, FeatureCMPXCHG16B,
FeatureSlowBTMem, FeatureSlowBTMem,
FeatureFastUAMem,
FeatureSlowUAMem32, FeatureSlowUAMem32,
FeaturePOPCNT, FeaturePOPCNT,
FeatureAES, FeatureAES,
@ -337,7 +332,6 @@ class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureAVX2, FeatureAVX2,
FeatureCMPXCHG16B, FeatureCMPXCHG16B,
FeatureSlowBTMem, FeatureSlowBTMem,
FeatureFastUAMem,
FeaturePOPCNT, FeaturePOPCNT,
FeatureAES, FeatureAES,
FeaturePCLMUL, FeaturePCLMUL,
@ -360,7 +354,6 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureAVX2, FeatureAVX2,
FeatureCMPXCHG16B, FeatureCMPXCHG16B,
FeatureSlowBTMem, FeatureSlowBTMem,
FeatureFastUAMem,
FeaturePOPCNT, FeaturePOPCNT,
FeatureAES, FeatureAES,
FeaturePCLMUL, FeaturePCLMUL,
@ -383,7 +376,7 @@ def : BroadwellProc<"broadwell">;
// FIXME: define KNL model // FIXME: define KNL model
class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
[FeatureAVX512, FeatureERI, FeatureCDI, FeaturePFI, [FeatureAVX512, FeatureERI, FeatureCDI, FeaturePFI,
FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT, FeatureCMPXCHG16B, FeaturePOPCNT,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
@ -394,7 +387,7 @@ def : KnightsLandingProc<"knl">;
class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
[FeatureAVX512, FeatureCDI, [FeatureAVX512, FeatureCDI,
FeatureDQI, FeatureBWI, FeatureVLX, FeatureDQI, FeatureBWI, FeatureVLX,
FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureFastUAMem, FeatureCMPXCHG16B, FeatureSlowBTMem,
FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM, FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
@ -406,67 +399,77 @@ def : SkylakeProc<"skx">; // Legacy alias.
// AMD CPUs. // AMD CPUs.
def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6", [FeatureSlowUAMem, FeatureMMX]>;
def : Proc<"k6-2", [Feature3DNow]>; def : Proc<"k6-2", [FeatureSlowUAMem, Feature3DNow]>;
def : Proc<"k6-3", [Feature3DNow]>; def : Proc<"k6-3", [FeatureSlowUAMem, Feature3DNow]>;
def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem, def : Proc<"athlon", [FeatureSlowUAMem, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"athlon-tbird", [FeatureSlowUAMem, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"athlon-4", [FeatureSlowUAMem, FeatureSSE1, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"athlon-xp", [FeatureSlowUAMem, FeatureSSE1, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"athlon-mp", [FeatureSlowUAMem, FeatureSSE1, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"k8", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>; FeatureSlowSHLD]>;
def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem, def : Proc<"opteron", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>; FeatureSlowSHLD]>;
def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, def : Proc<"athlon64", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>; FeatureSlowSHLD]>;
def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, def : Proc<"athlon-fx", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>; FeatureSlowSHLD]>;
def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, def : Proc<"k8-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>; FeatureSlowSHLD]>;
def : Proc<"k8", [FeatureSSE2, Feature3DNowA, Feature64Bit, def : Proc<"opteron-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>; FeatureCMPXCHG16B, FeatureSlowBTMem,
def : Proc<"opteron", [FeatureSSE2, Feature3DNowA, Feature64Bit, FeatureSlowSHLD]>;
FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"athlon64-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit, FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowBTMem, FeatureSlowSHLD]>; FeatureSlowSHLD]>;
def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit, def : Proc<"amdfam10", [FeatureSlowUAMem, FeatureSSE4A,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"amdfam10", [FeatureSSE4A,
Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
FeaturePOPCNT, FeatureSlowBTMem, FeaturePOPCNT, FeatureSlowBTMem,
FeatureSlowSHLD]>; FeatureSlowSHLD]>;
def : Proc<"barcelona", [FeatureSSE4A, def : Proc<"barcelona", [FeatureSlowUAMem, FeatureSSE4A,
Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
FeaturePOPCNT, FeatureSlowBTMem, FeaturePOPCNT, FeatureSlowBTMem,
FeatureSlowSHLD]>; FeatureSlowSHLD]>;
// FIXME: We should remove 'FeatureSlowUAMem' from AMD chips under here.
// Bobcat // Bobcat
def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B, def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT, FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,
FeatureSlowSHLD]>; FeatureSlowSHLD, FeatureSlowUAMem]>;
// Jaguar // Jaguar
def : ProcessorModel<"btver2", BtVer2Model, def : ProcessorModel<"btver2", BtVer2Model,
[FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B, [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
FeaturePRFCHW, FeatureAES, FeaturePCLMUL, FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
FeatureBMI, FeatureF16C, FeatureMOVBE, FeatureBMI, FeatureF16C, FeatureMOVBE,
FeatureLZCNT, FeaturePOPCNT, FeatureFastUAMem, FeatureLZCNT, FeaturePOPCNT,
FeatureSlowSHLD]>; FeatureSlowSHLD]>;
// TODO: We should probably add 'FeatureFastUAMem' to all of the AMD chips.
// Bulldozer // Bulldozer
def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureAVX, FeatureSSE4A, FeatureLZCNT, FeatureAVX, FeatureSSE4A, FeatureLZCNT,
FeaturePOPCNT, FeatureSlowSHLD]>; FeaturePOPCNT, FeatureSlowSHLD,
FeatureSlowUAMem]>;
// Piledriver // Piledriver
def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureAVX, FeatureSSE4A, FeatureF16C, FeatureAVX, FeatureSSE4A, FeatureF16C,
FeatureLZCNT, FeaturePOPCNT, FeatureBMI, FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
FeatureTBM, FeatureFMA, FeatureSlowSHLD]>; FeatureTBM, FeatureFMA, FeatureSlowSHLD,
FeatureSlowUAMem]>;
// Steamroller // Steamroller
def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
@ -474,7 +477,7 @@ def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAVX, FeatureSSE4A, FeatureF16C, FeatureAVX, FeatureSSE4A, FeatureF16C,
FeatureLZCNT, FeaturePOPCNT, FeatureBMI, FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
FeatureTBM, FeatureFMA, FeatureSlowSHLD, FeatureTBM, FeatureFMA, FeatureSlowSHLD,
FeatureFSGSBase]>; FeatureFSGSBase, FeatureSlowUAMem]>;
// Excavator // Excavator
def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4, def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
@ -482,14 +485,14 @@ def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
FeaturePCLMUL, FeatureF16C, FeatureLZCNT, FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
FeaturePOPCNT, FeatureBMI, FeatureBMI2, FeaturePOPCNT, FeatureBMI, FeatureBMI2,
FeatureTBM, FeatureFMA, FeatureSSE4A, FeatureTBM, FeatureFMA, FeatureSSE4A,
FeatureFSGSBase]>; FeatureFSGSBase, FeatureSlowUAMem]>;
def : Proc<"geode", [Feature3DNowA]>; def : Proc<"geode", [FeatureSlowUAMem, Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureMMX]>; def : Proc<"winchip-c6", [FeatureSlowUAMem, FeatureMMX]>;
def : Proc<"winchip2", [Feature3DNow]>; def : Proc<"winchip2", [FeatureSlowUAMem, Feature3DNow]>;
def : Proc<"c3", [Feature3DNow]>; def : Proc<"c3", [FeatureSlowUAMem, Feature3DNow]>;
def : Proc<"c3-2", [FeatureSSE1]>; def : Proc<"c3-2", [FeatureSlowUAMem, FeatureSSE1]>;
// We also provide a generic 64-bit specific x86 processor model which tries to // We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the // be good for modern chips without enabling instruction set encodings past the
@ -502,8 +505,7 @@ def : Proc<"c3-2", [FeatureSSE1]>;
// knobs which need to be tuned differently for AMD chips, we might consider // knobs which need to be tuned differently for AMD chips, we might consider
// forming a common base for them. // forming a common base for them.
def : ProcessorModel<"x86-64", SandyBridgeModel, def : ProcessorModel<"x86-64", SandyBridgeModel,
[FeatureSSE2, Feature64Bit, FeatureSlowBTMem, [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
FeatureFastUAMem]>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Register File Description // Register File Description

View File

@ -1876,10 +1876,11 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
if ((!IsMemset || ZeroMemset) && if ((!IsMemset || ZeroMemset) &&
!F->hasFnAttribute(Attribute::NoImplicitFloat)) { !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
if (Size >= 16 && if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() || (!Subtarget->isUnalignedMemUnder32Slow() ||
((DstAlign == 0 || DstAlign >= 16) && ((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16)))) { (SrcAlign == 0 || SrcAlign >= 16)))) {
if (Size >= 32) { if (Size >= 32) {
// FIXME: Check if unaligned 32-byte accesses are slow.
if (Subtarget->hasInt256()) if (Subtarget->hasInt256())
return MVT::v8i32; return MVT::v8i32;
if (Subtarget->hasFp256()) if (Subtarget->hasFp256())
@ -1897,6 +1898,9 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
return MVT::f64; return MVT::f64;
} }
} }
// This is a compromise. If we reach here, unaligned accesses may be slow on
// this target. However, creating smaller, aligned accesses could be even
// slower and would certainly be a lot more code.
if (Subtarget->is64Bit() && Size >= 8) if (Subtarget->is64Bit() && Size >= 8)
return MVT::i64; return MVT::i64;
return MVT::i32; return MVT::i32;
@ -1916,12 +1920,10 @@ X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned, unsigned,
bool *Fast) const { bool *Fast) const {
if (Fast) { if (Fast) {
// FIXME: We should be checking 128-bit accesses separately from smaller
// accesses.
if (VT.getSizeInBits() == 256) if (VT.getSizeInBits() == 256)
*Fast = !Subtarget->isUnalignedMem32Slow(); *Fast = !Subtarget->isUnalignedMem32Slow();
else else
*Fast = Subtarget->isUnalignedMemAccessFast(); *Fast = !Subtarget->isUnalignedMemUnder32Slow();
} }
return true; return true;
} }

View File

@ -5508,9 +5508,10 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
const MCInstrDesc &MCID = get(Opc); const MCInstrDesc &MCID = get(Opc);
const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
// TODO: Check if 32-byte or greater accesses are slow too?
if (!MI->hasOneMemOperand() && if (!MI->hasOneMemOperand() &&
RC == &X86::VR128RegClass && RC == &X86::VR128RegClass &&
!Subtarget.isUnalignedMemAccessFast()) Subtarget.isUnalignedMemUnder32Slow())
// Without memoperands, loadRegFromAddr and storeRegToStackSlot will // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
// conservatively assume the address is unaligned. That's bad for // conservatively assume the address is unaligned. That's bad for
// performance. // performance.
@ -5658,9 +5659,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
cast<MachineSDNode>(N)->memoperands_end()); cast<MachineSDNode>(N)->memoperands_end());
if (!(*MMOs.first) && if (!(*MMOs.first) &&
RC == &X86::VR128RegClass && RC == &X86::VR128RegClass &&
!Subtarget.isUnalignedMemAccessFast()) Subtarget.isUnalignedMemUnder32Slow())
// Do not introduce a slow unaligned load. // Do not introduce a slow unaligned load.
return false; return false;
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
// memory access is slow above.
unsigned Alignment = RC->getSize() == 32 ? 32 : 16; unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
bool isAligned = (*MMOs.first) && bool isAligned = (*MMOs.first) &&
(*MMOs.first)->getAlignment() >= Alignment; (*MMOs.first)->getAlignment() >= Alignment;
@ -5701,9 +5704,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
cast<MachineSDNode>(N)->memoperands_end()); cast<MachineSDNode>(N)->memoperands_end());
if (!(*MMOs.first) && if (!(*MMOs.first) &&
RC == &X86::VR128RegClass && RC == &X86::VR128RegClass &&
!Subtarget.isUnalignedMemAccessFast()) Subtarget.isUnalignedMemUnder32Slow())
// Do not introduce a slow unaligned store. // Do not introduce a slow unaligned store.
return false; return false;
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
// memory access is slow above.
unsigned Alignment = RC->getSize() == 32 ? 32 : 16; unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
bool isAligned = (*MMOs.first) && bool isAligned = (*MMOs.first) &&
(*MMOs.first)->getAlignment() >= Alignment; (*MMOs.first)->getAlignment() >= Alignment;

View File

@ -255,7 +255,7 @@ void X86Subtarget::initializeEnvironment() {
HasMPX = false; HasMPX = false;
IsBTMemSlow = false; IsBTMemSlow = false;
IsSHLDSlow = false; IsSHLDSlow = false;
IsUAMemFast = false; IsUAMemUnder32Slow = false;
IsUAMem32Slow = false; IsUAMem32Slow = false;
HasSSEUnalignedMem = false; HasSSEUnalignedMem = false;
HasCmpxchg16b = false; HasCmpxchg16b = false;

View File

@ -146,10 +146,10 @@ protected:
/// True if SHLD instructions are slow. /// True if SHLD instructions are slow.
bool IsSHLDSlow; bool IsSHLDSlow;
/// True if unaligned memory access is fast. /// True if unaligned memory accesses of 16-bytes or smaller are slow.
bool IsUAMemFast; bool IsUAMemUnder32Slow;
/// True if unaligned 32-byte memory accesses are slow. /// True if unaligned memory accesses of 32-bytes are slow.
bool IsUAMem32Slow; bool IsUAMem32Slow;
/// True if SSE operations can have unaligned memory operands. /// True if SSE operations can have unaligned memory operands.
@ -357,7 +357,7 @@ public:
bool hasRDSEED() const { return HasRDSEED; } bool hasRDSEED() const { return HasRDSEED; }
bool isBTMemSlow() const { return IsBTMemSlow; } bool isBTMemSlow() const { return IsBTMemSlow; }
bool isSHLDSlow() const { return IsSHLDSlow; } bool isSHLDSlow() const { return IsSHLDSlow; }
bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool isUnalignedMemUnder32Slow() const { return IsUAMemUnder32Slow; }
bool isUnalignedMem32Slow() const { return IsUAMem32Slow; } bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool hasCmpxchg16b() const { return HasCmpxchg16b; }