mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-27 07:12:06 +00:00
X86: enable AVX2 under Haswell native compilation
Patch by Adam Strzelecki git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195632 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
dfc615f284
commit
8a6c627fd0
@ -95,6 +95,75 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// GetX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the
|
||||||
|
/// 4 values in the specified arguments. If we can't run cpuid on the host,
|
||||||
|
/// return true.
|
||||||
|
bool GetX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
|
||||||
|
unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
|
||||||
|
#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
// gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
|
||||||
|
asm ("movq\t%%rbx, %%rsi\n\t"
|
||||||
|
"cpuid\n\t"
|
||||||
|
"xchgq\t%%rbx, %%rsi\n\t"
|
||||||
|
: "=a" (*rEAX),
|
||||||
|
"=S" (*rEBX),
|
||||||
|
"=c" (*rECX),
|
||||||
|
"=d" (*rEDX)
|
||||||
|
: "a" (value),
|
||||||
|
"c" (subleaf));
|
||||||
|
return false;
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
// __cpuidex was added in MSVC++ 9.0 SP1
|
||||||
|
#if (_MSC_VER > 1500) || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729)
|
||||||
|
int registers[4];
|
||||||
|
__cpuidex(registers, value, subleaf);
|
||||||
|
*rEAX = registers[0];
|
||||||
|
*rEBX = registers[1];
|
||||||
|
*rECX = registers[2];
|
||||||
|
*rEDX = registers[3];
|
||||||
|
return false;
|
||||||
|
#else
|
||||||
|
return true;
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
return true;
|
||||||
|
#endif
|
||||||
|
#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
asm ("movl\t%%ebx, %%esi\n\t"
|
||||||
|
"cpuid\n\t"
|
||||||
|
"xchgl\t%%ebx, %%esi\n\t"
|
||||||
|
: "=a" (*rEAX),
|
||||||
|
"=S" (*rEBX),
|
||||||
|
"=c" (*rECX),
|
||||||
|
"=d" (*rEDX)
|
||||||
|
: "a" (value),
|
||||||
|
"c" (subleaf));
|
||||||
|
return false;
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
__asm {
|
||||||
|
mov eax,value
|
||||||
|
mov ecx,subleaf
|
||||||
|
cpuid
|
||||||
|
mov esi,rEAX
|
||||||
|
mov dword ptr [esi],eax
|
||||||
|
mov esi,rEBX
|
||||||
|
mov dword ptr [esi],ebx
|
||||||
|
mov esi,rECX
|
||||||
|
mov dword ptr [esi],ecx
|
||||||
|
mov esi,rEDX
|
||||||
|
mov dword ptr [esi],edx
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
#else
|
||||||
|
return true;
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
return true;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static bool OSHasAVXSupport() {
|
static bool OSHasAVXSupport() {
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
// Check xgetbv; this uses a .byte sequence instead of the instruction
|
// Check xgetbv; this uses a .byte sequence instead of the instruction
|
||||||
@ -131,6 +200,14 @@ std::string sys::getHostCPUName() {
|
|||||||
unsigned Model = 0;
|
unsigned Model = 0;
|
||||||
DetectX86FamilyModel(EAX, Family, Model);
|
DetectX86FamilyModel(EAX, Family, Model);
|
||||||
|
|
||||||
|
union {
|
||||||
|
unsigned u[3];
|
||||||
|
char c[12];
|
||||||
|
} text;
|
||||||
|
|
||||||
|
GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
|
||||||
|
|
||||||
|
unsigned MaxLeaf = EAX;
|
||||||
bool HasSSE3 = (ECX & 0x1);
|
bool HasSSE3 = (ECX & 0x1);
|
||||||
bool HasSSE41 = (ECX & 0x80000);
|
bool HasSSE41 = (ECX & 0x80000);
|
||||||
// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
|
// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
|
||||||
@ -138,15 +215,12 @@ std::string sys::getHostCPUName() {
|
|||||||
// switch, then we have full AVX support.
|
// switch, then we have full AVX support.
|
||||||
const unsigned AVXBits = (1 << 27) | (1 << 28);
|
const unsigned AVXBits = (1 << 27) | (1 << 28);
|
||||||
bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport();
|
bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport();
|
||||||
|
bool HasAVX2 = HasAVX && MaxLeaf >= 0x7 &&
|
||||||
|
!GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX) &&
|
||||||
|
(EBX & 0x20);
|
||||||
GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
|
GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
|
||||||
bool Em64T = (EDX >> 29) & 0x1;
|
bool Em64T = (EDX >> 29) & 0x1;
|
||||||
|
|
||||||
union {
|
|
||||||
unsigned u[3];
|
|
||||||
char c[12];
|
|
||||||
} text;
|
|
||||||
|
|
||||||
GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
|
|
||||||
if (memcmp(text.c, "GenuineIntel", 12) == 0) {
|
if (memcmp(text.c, "GenuineIntel", 12) == 0) {
|
||||||
switch (Family) {
|
switch (Family) {
|
||||||
case 3:
|
case 3:
|
||||||
@ -254,10 +328,20 @@ std::string sys::getHostCPUName() {
|
|||||||
|
|
||||||
// Ivy Bridge:
|
// Ivy Bridge:
|
||||||
case 58:
|
case 58:
|
||||||
|
case 62: // Ivy Bridge EP
|
||||||
// Not all Ivy Bridge processors support AVX (such as the Pentium
|
// Not all Ivy Bridge processors support AVX (such as the Pentium
|
||||||
// versions instead of the i7 versions).
|
// versions instead of the i7 versions).
|
||||||
return HasAVX ? "core-avx-i" : "corei7";
|
return HasAVX ? "core-avx-i" : "corei7";
|
||||||
|
|
||||||
|
// Haswell:
|
||||||
|
case 60:
|
||||||
|
case 63:
|
||||||
|
case 69:
|
||||||
|
case 70:
|
||||||
|
// Not all Haswell processors support AVX too (such as the Pentium
|
||||||
|
// versions instead of the i7 versions).
|
||||||
|
return HasAVX2 ? "core-avx2" : "corei7";
|
||||||
|
|
||||||
case 28: // Most 45 nm Intel Atom processors
|
case 28: // Most 45 nm Intel Atom processors
|
||||||
case 38: // 45 nm Atom Lincroft
|
case 38: // 45 nm Atom Lincroft
|
||||||
case 39: // 32 nm Atom Medfield
|
case 39: // 32 nm Atom Medfield
|
||||||
|
@ -285,7 +285,12 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
|
|||||||
(Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
|
(Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
|
||||||
(Family == 6 && Model == 0x2A) || // SandyBridge
|
(Family == 6 && Model == 0x2A) || // SandyBridge
|
||||||
(Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
|
(Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
|
||||||
(Family == 6 && Model == 0x3A))) {// IvyBridge
|
(Family == 6 && Model == 0x3A) || // IvyBridge
|
||||||
|
(Family == 6 && Model == 0x3E) || // IvyBridge EP
|
||||||
|
(Family == 6 && Model == 0x3C) || // Haswell
|
||||||
|
(Family == 6 && Model == 0x3F) || // ...
|
||||||
|
(Family == 6 && Model == 0x45) || // ...
|
||||||
|
(Family == 6 && Model == 0x46))) { // ...
|
||||||
IsUAMemFast = true;
|
IsUAMemFast = true;
|
||||||
ToggleFeature(X86::FeatureFastUAMem);
|
ToggleFeature(X86::FeatureFastUAMem);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user