Fix missing _xgetbv for Visual Studio 2008 and below

This commit is contained in:
Jeffrey Walton 2018-08-19 10:19:29 -04:00
parent d41b3c8211
commit 082ad86132
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 46 additions and 1 deletions

33
cpu.cpp
View File

@ -1,3 +1,4 @@
// cpu.cpp - originally written and placed in the public domain by Wei Dai
#include "pch.h"
@ -56,6 +57,11 @@ unsigned long int getauxval(unsigned long int) { return 0; }
# include <setjmp.h>
#endif
// Visual Studio 2008 and below is missing _xgetbv. See x64dll.asm for the body.
#if defined(_MSC_VER) && defined(_M_X64)
extern "C" unsigned long long __fastcall ExtendedControlRegister(unsigned int);
#endif
ANONYMOUS_NAMESPACE_BEGIN
#if defined(__APPLE__)
@ -309,19 +315,44 @@ void DetectX86Features()
CRYPTOPP_CONSTANT(AVX_FLAG = (3 << 27))
if ((cpuid1[2] & AVX_FLAG) == AVX_FLAG)
{
// GCC 4.1/Binutils 2.17 cannot consume xgetbv
#if defined(__GNUC__) || defined(__SUNPRO_CC) || defined(__BORLANDC__)
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71659 and
// http://www.agner.org/optimize/vectorclass/read.php?i=65
word32 a=0, d=0;
__asm __volatile
(
// GCC 4.1/Binutils 2.17 cannot consume xgetbv
// "xgetbv" : "=a"(a), "=d"(d) : "c"(0) :
".byte 0x0f, 0x01, 0xd0" "\n\t"
: "=a"(a), "=d"(d) : "c"(0) :
);
word64 xcr0 = a | static_cast<word64>(d) << 32;
g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG;
// Visual Studio 2008 and below lack xgetbv
#elif defined(_MSC_VER) && defined(_M_IX86)
word32 a=0, d=0;
__asm {
push eax
push edx
push ecx
mov ecx, 0
_emit 0x0f
_emit 0x01
_emit 0xd0
mov a, eax
mov d, edx
pop ecx
pop edx
pop eax
}
word64 xcr0 = a | static_cast<word64>(d) << 32;
g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG;
// Visual Studio 2008 and below lack xgetbv
#elif defined(_MSC_VER) && defined(_M_X64)
word64 xcr0 = ExtendedControlRegister(0);
g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG;
#else
word64 xcr0 = _xgetbv(0);
g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG;

View File

@ -1964,5 +1964,19 @@ pop rsi
ret
SHA256_HashMultipleBlocks_SSE2 ENDP
ALIGN 8
ExtendedControlRegister PROC
;; First paramter is RCX, and xgetbv expects the CTR in ECX
;; http://www.agner.org/optimize/vectorclass/read.php?i=65
DB 0fh
DB 01h
DB 0d0h
;; xcr = (EDX << 32) | EAX
and rax, 0ffffffffh
shl rdx, 32
or rax, rdx
ret
ExtendedControlRegister ENDP
_TEXT ENDS
END