Add separate ASM file for XGETBV64 and CPUID64 (GH #1240)

This will allow us to define CRYPTOPP_DISABLE_ASM and completely avoid building x64dll.asm and x64masm.asm
This commit is contained in:
Jeffrey Walton 2023-10-06 00:33:33 -04:00
parent 3405dcee96
commit 0432085157
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
5 changed files with 131 additions and 104 deletions

63
cpuid64.asm Normal file
View File

@ -0,0 +1,63 @@
;; https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention
;; The first four integer arguments are passed in registers.
;; Integer values are passed in left-to-right order in RCX,
;; RDX, R8, and R9, respectively. Arguments five and higher
;; are passed on the stack.
;; The registers RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5,
;; and the upper portions of YMM0-15 and ZMM0-15 are
;; considered volatile and must be considered destroyed on
;; function calls.
.CODE
TITLE CPU features source file
SUBTITLE Microsoft specific ASM code to utilize CPUID and XGETBV64 for down level Microsoft toolchains
;; http://www.agner.org/optimize/vectorclass/read.php?i=65
;; word64 Xgetbv(word32 ctrl)
;; ctrl = rcx
ALIGN 8
XGETBV64 PROC FRAME
.endprolog
;; query
DB 0fh, 01h, 0d0h
;; xcr = (EDX << 32) | EAX
and rax, 0ffffffffh
shl rdx, 32
or rax, rdx
ret
XGETBV64 ENDP
;; word64 CpuId(word32 func, word32 subfunc, word32 output[4])
;; func = rcx
;; subfunc = rdx
;; output = r8
ALIGN 8
CPUID64 PROC FRAME
;; preserve per ABI
mov [rsp+8], rbx
.savereg rbx, 8
.endprolog
;; eax = func
mov rax, rcx
;; ecx = subfunc
mov rcx, rdx
;; query
cpuid
;; save
mov [r8+0], eax
mov [r8+4], ebx
mov [r8+8], ecx
mov [r8+12], edx
;; return value
mov rax, 1
;; restore
mov rbx, [rsp+8]
ret
CPUID64 ENDP
_TEXT ENDS
END

View File

@ -57,68 +57,69 @@
LIB_SRCS = \ LIB_SRCS = \
cryptlib.cpp cpu.cpp integer.cpp 3way.cpp adler32.cpp algebra.cpp \ cryptlib.cpp cpu.cpp integer.cpp 3way.cpp adler32.cpp algebra.cpp \
algparam.cpp allocate.cpp arc4.cpp aria.cpp ariatab.cpp \ algparam.cpp allocate.cpp arc4.cpp aria.cpp ariatab.cpp asn.cpp \
asn.cpp authenc.cpp base32.cpp base64.cpp basecode.cpp bfinit.cpp \ authenc.cpp base32.cpp base64.cpp basecode.cpp bfinit.cpp blake2.cpp \
blake2.cpp blake2b_simd.cpp blake2s_simd.cpp blowfish.cpp blumshub.cpp \ blake2b_simd.cpp blake2s_simd.cpp blowfish.cpp blumshub.cpp camellia.cpp \
camellia.cpp cast.cpp casts.cpp cbcmac.cpp ccm.cpp chacha.cpp \ cast.cpp casts.cpp cbcmac.cpp ccm.cpp chacha.cpp chacha_avx.cpp \
chacha_avx.cpp chacha_simd.cpp chachapoly.cpp cham.cpp cham_simd.cpp \ chacha_simd.cpp chachapoly.cpp cham.cpp cham_simd.cpp channels.cpp \
channels.cpp cmac.cpp crc.cpp crc_simd.cpp darn.cpp default.cpp des.cpp \ cmac.cpp crc.cpp crc_simd.cpp darn.cpp default.cpp des.cpp dessp.cpp \
dessp.cpp dh.cpp dh2.cpp dll.cpp donna_32.cpp donna_64.cpp donna_sse.cpp \ dh.cpp dh2.cpp dll.cpp donna_32.cpp donna_64.cpp donna_sse.cpp dsa.cpp \
dsa.cpp eax.cpp ec2n.cpp eccrypto.cpp ecp.cpp elgamal.cpp emsa2.cpp \ eax.cpp ec2n.cpp eccrypto.cpp ecp.cpp elgamal.cpp emsa2.cpp eprecomp.cpp \
eprecomp.cpp esign.cpp files.cpp filters.cpp fips140.cpp fipstest.cpp \ esign.cpp files.cpp filters.cpp fips140.cpp fipstest.cpp gcm.cpp \
gcm.cpp gcm_simd.cpp gf256.cpp gf2_32.cpp gf2n.cpp gf2n_simd.cpp \ gcm_simd.cpp gf256.cpp gf2_32.cpp gf2n.cpp gf2n_simd.cpp gfpcrypt.cpp \
gfpcrypt.cpp gost.cpp gzip.cpp hc128.cpp hc256.cpp hex.cpp hight.cpp \ gost.cpp gzip.cpp hc128.cpp hc256.cpp hex.cpp hight.cpp hmac.cpp \
hmac.cpp hrtimer.cpp ida.cpp idea.cpp iterhash.cpp kalyna.cpp \ hrtimer.cpp ida.cpp idea.cpp iterhash.cpp kalyna.cpp kalynatab.cpp \
kalynatab.cpp keccak.cpp keccak_core.cpp keccak_simd.cpp lea.cpp \ keccak.cpp keccak_core.cpp keccak_simd.cpp lea.cpp lea_simd.cpp \
lea_simd.cpp lsh256.cpp lsh256_avx.cpp lsh256_sse.cpp lsh512.cpp \ lsh256.cpp lsh256_avx.cpp lsh256_sse.cpp lsh512.cpp lsh512_avx.cpp \
lsh512_avx.cpp lsh512_sse.cpp luc.cpp mars.cpp marss.cpp md2.cpp md4.cpp \ lsh512_sse.cpp luc.cpp mars.cpp marss.cpp md2.cpp md4.cpp md5.cpp \
md5.cpp misc.cpp modes.cpp mqueue.cpp mqv.cpp nbtheory.cpp oaep.cpp \ misc.cpp modes.cpp mqueue.cpp mqv.cpp nbtheory.cpp oaep.cpp osrng.cpp \
osrng.cpp padlkrng.cpp panama.cpp pkcspad.cpp poly1305.cpp polynomi.cpp \ padlkrng.cpp panama.cpp pkcspad.cpp poly1305.cpp polynomi.cpp \
pssr.cpp pubkey.cpp queue.cpp rabbit.cpp rabin.cpp randpool.cpp rc2.cpp \ ppc_simd.cpp primetab.cpp pssr.cpp pubkey.cpp queue.cpp rabbit.cpp \
rc5.cpp rc6.cpp rdrand.cpp rdtables.cpp rijndael.cpp rijndael_simd.cpp \ rabin.cpp randpool.cpp rc2.cpp rc5.cpp rc6.cpp rdrand.cpp rdtables.cpp \
ripemd.cpp rng.cpp rsa.cpp rw.cpp safer.cpp salsa.cpp scrypt.cpp \ rijndael.cpp rijndael_simd.cpp ripemd.cpp rng.cpp rsa.cpp rw.cpp \
seal.cpp seed.cpp serpent.cpp sha.cpp sha3.cpp sha_simd.cpp shacal2.cpp \ safer.cpp salsa.cpp scrypt.cpp seal.cpp seed.cpp serpent.cpp sha.cpp \
shacal2_simd.cpp shake.cpp shark.cpp sharkbox.cpp simeck.cpp simon.cpp \ sha3.cpp sha_simd.cpp shacal2.cpp shacal2_simd.cpp shake.cpp shark.cpp \
simon128_simd.cpp skipjack.cpp sm3.cpp sm4.cpp sm4_simd.cpp \ sharkbox.cpp simeck.cpp simon.cpp simon128_simd.cpp skipjack.cpp sm3.cpp \
sosemanuk.cpp speck.cpp speck128_simd.cpp square.cpp squaretb.cpp \ sm4.cpp sm4_simd.cpp sosemanuk.cpp speck.cpp speck128_simd.cpp \
sse_simd.cpp strciphr.cpp tea.cpp tftables.cpp threefish.cpp tiger.cpp \ square.cpp squaretb.cpp sse_simd.cpp strciphr.cpp tea.cpp tftables.cpp \
tigertab.cpp ttmac.cpp tweetnacl.cpp twofish.cpp vmac.cpp wake.cpp \ threefish.cpp tiger.cpp tigertab.cpp ttmac.cpp tweetnacl.cpp twofish.cpp \
whrlpool.cpp xed25519.cpp xtr.cpp xtrcrypt.cpp xts.cpp zdeflate.cpp \ vmac.cpp wake.cpp whrlpool.cpp xed25519.cpp xtr.cpp xtrcrypt.cpp xts.cpp \
zinflate.cpp zlib.cpp zdeflate.cpp zinflate.cpp zlib.cpp
LIB_OBJS = \ LIB_OBJS = \
cryptlib.obj cpu.obj integer.obj 3way.obj adler32.obj algebra.obj \ cryptlib.obj cpu.obj integer.obj 3way.obj adler32.obj algebra.obj \
algparam.obj allocate.obj arc4.obj aria.obj ariatab.obj \ algparam.obj allocate.obj arc4.obj aria.obj ariatab.obj asn.obj \
asn.obj authenc.obj base32.obj base64.obj basecode.obj bfinit.obj \ authenc.obj base32.obj base64.obj basecode.obj bfinit.obj blake2.obj \
blake2.obj blake2b_simd.obj blake2s_simd.obj blowfish.obj blumshub.obj \ blake2b_simd.obj blake2s_simd.obj blowfish.obj blumshub.obj camellia.obj \
camellia.obj cast.obj casts.obj cbcmac.obj ccm.obj chacha.obj \ cast.obj casts.obj cbcmac.obj ccm.obj chacha.obj chacha_avx.obj \
chacha_avx.obj chacha_simd.obj chachapoly.obj cham.obj cham_simd.obj \ chacha_simd.obj chachapoly.obj cham.obj cham_simd.obj channels.obj \
channels.obj cmac.obj crc.obj crc_simd.obj darn.obj default.obj des.obj \ cmac.obj crc.obj crc_simd.obj darn.obj default.obj des.obj dessp.obj \
dessp.obj dh.obj dh2.obj dll.obj donna_32.obj donna_64.obj donna_sse.obj \ dh.obj dh2.obj dll.obj donna_32.obj donna_64.obj donna_sse.obj dsa.obj \
dsa.obj eax.obj ec2n.obj eccrypto.obj ecp.obj elgamal.obj emsa2.obj \ eax.obj ec2n.obj eccrypto.obj ecp.obj elgamal.obj emsa2.obj eprecomp.obj \
eprecomp.obj esign.obj files.obj filters.obj fips140.obj fipstest.obj \ esign.obj files.obj filters.obj fips140.obj fipstest.obj gcm.obj \
gcm.obj gcm_simd.obj gf256.obj gf2_32.obj gf2n.obj gf2n_simd.obj \ gcm_simd.obj gf256.obj gf2_32.obj gf2n.obj gf2n_simd.obj gfpcrypt.obj \
gfpcrypt.obj gost.obj gzip.obj hc128.obj hc256.obj hex.obj hight.obj \ gost.obj gzip.obj hc128.obj hc256.obj hex.obj hight.obj hmac.obj \
hmac.obj hrtimer.obj ida.obj idea.obj iterhash.obj kalyna.obj \ hrtimer.obj ida.obj idea.obj iterhash.obj kalyna.obj kalynatab.obj \
kalynatab.obj keccak.obj keccak_core.obj keccak_simd.obj lea.obj \ keccak.obj keccak_core.obj keccak_simd.obj lea.obj lea_simd.obj \
lea_simd.obj lsh256.obj lsh256_avx.obj lsh256_sse.obj lsh512.obj \ lsh256.obj lsh256_avx.obj lsh256_sse.obj lsh512.obj lsh512_avx.obj \
lsh512_avx.obj lsh512_sse.obj luc.obj mars.obj marss.obj md2.obj md4.obj \ lsh512_sse.obj luc.obj mars.obj marss.obj md2.obj md4.obj md5.obj \
md5.obj misc.obj modes.obj mqueue.obj mqv.obj nbtheory.obj oaep.obj \ misc.obj modes.obj mqueue.obj mqv.obj nbtheory.obj oaep.obj osrng.obj \
osrng.obj padlkrng.obj panama.obj pkcspad.obj poly1305.obj polynomi.obj \ padlkrng.obj panama.obj pkcspad.obj poly1305.obj polynomi.obj \
pssr.obj pubkey.obj queue.obj rabbit.obj rabin.obj randpool.obj rc2.obj \ ppc_simd.obj primetab.obj pssr.obj pubkey.obj queue.obj rabbit.obj \
rc5.obj rc6.obj rdrand.obj rdtables.obj rijndael.obj rijndael_simd.obj \ rabin.obj randpool.obj rc2.obj rc5.obj rc6.obj rdrand.obj rdtables.obj \
ripemd.obj rng.obj rsa.obj rw.obj safer.obj salsa.obj scrypt.obj \ rijndael.obj rijndael_simd.obj ripemd.obj rng.obj rsa.obj rw.obj \
seal.obj seed.obj serpent.obj sha.obj sha3.obj sha_simd.obj shacal2.obj \ safer.obj salsa.obj scrypt.obj seal.obj seed.obj serpent.obj sha.obj \
shacal2_simd.obj shake.obj shark.obj sharkbox.obj simeck.obj simon.obj \ sha3.obj sha_simd.obj shacal2.obj shacal2_simd.obj shake.obj shark.obj \
simon128_simd.obj skipjack.obj sm3.obj sm4.obj sm4_simd.obj \ sharkbox.obj simeck.obj simon.obj simon128_simd.obj skipjack.obj sm3.obj \
sosemanuk.obj speck.obj speck128_simd.obj square.obj squaretb.obj \ sm4.obj sm4_simd.obj sosemanuk.obj speck.obj speck128_simd.obj \
sse_simd.obj strciphr.obj tea.obj tftables.obj threefish.obj tiger.obj \ square.obj squaretb.obj sse_simd.obj strciphr.obj tea.obj tftables.obj \
tigertab.obj ttmac.obj tweetnacl.obj twofish.obj vmac.obj wake.obj \ threefish.obj tiger.obj tigertab.obj ttmac.obj tweetnacl.obj twofish.obj \
whrlpool.obj xed25519.obj xtr.obj xtrcrypt.obj xts.obj zdeflate.obj \ vmac.obj wake.obj whrlpool.obj xed25519.obj xtr.obj xtrcrypt.obj xts.obj \
zinflate.obj zlib.obj zdeflate.obj zinflate.obj zlib.obj
ASM_OBJS = \ ASM_OBJS = \
rdrand-x86.obj rdrand-x64.obj rdseed-x86.obj rdseed-x64.obj x64masm.obj x64dll.obj cpuid64.obj rdrand-x86.obj rdrand-x64.obj rdseed-x86.obj rdseed-x64.obj \
x64masm.obj x64dll.obj
TEST_SRCS = \ TEST_SRCS = \
test.cpp bench1.cpp bench2.cpp bench3.cpp datatest.cpp \ test.cpp bench1.cpp bench2.cpp bench3.cpp datatest.cpp \
@ -211,8 +212,8 @@ RDSEED_OBJ = rdseed-x86.obj
# CXXFLAGS = $(CXXFLAGS) /DWINAPI_FAMILY=WINAPI_FAMILY_APP # CXXFLAGS = $(CXXFLAGS) /DWINAPI_FAMILY=WINAPI_FAMILY_APP
AS = ml64.exe AS = ml64.exe
ASFLAGS = /nologo /D_M_X64 /W3 /Cx /Zi ASFLAGS = /nologo /D_M_X64 /W3 /Cx /Zi
LIB_SRCS = $(LIB_SRCS) rdrand.cpp rdrand.asm rdseed.asm LIB_SRCS = $(LIB_SRCS) cpuid64.asm rdrand.cpp rdrand.asm rdseed.asm
LIB_OBJS = $(LIB_OBJS) rdrand-x64.obj rdseed-x64.obj x64masm.obj x64dll.obj LIB_OBJS = $(LIB_OBJS) cpuid64.obj rdrand-x64.obj rdseed-x64.obj x64masm.obj x64dll.obj
LDFLAGS = $(LDFLAGS) /MACHINE:X64 LDFLAGS = $(LDFLAGS) /MACHINE:X64
LDLIBS = $(LDLIBS) kernel32.lib LDLIBS = $(LDLIBS) kernel32.lib
RDRAND_OBJ = rdrand-x64.obj RDRAND_OBJ = rdrand-x64.obj

View File

@ -368,6 +368,11 @@
<None Include="Readme.txt" /> <None Include="Readme.txt" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<CustomBuild Condition="'$(Platform)'=='x64'" Include="cpuid64.asm">
<Message>Building and Assembling cpuid64.asm</Message>
<Command>ml64.exe /c /nologo /D_M_X64 /W3 /Zi /Fo"$(IntDir)cpuid64.obj" "%(FullPath)"</Command>
<Outputs>$(IntDir)cpuid64.obj;%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Condition="'$(Platform)'=='Win32'" Include="rdrand.asm"> <CustomBuild Condition="'$(Platform)'=='Win32'" Include="rdrand.asm">
<Message>Building and assembling rdrand.asm</Message> <Message>Building and assembling rdrand.asm</Message>
<Command>ml.exe /c /nologo /D_M_X86 /W3 /Cx /Zi /safeseh /Fo"$(IntDir)rdrand-x86.obj" "%(FullPath)"</Command> <Command>ml.exe /c /nologo /D_M_X86 /W3 /Cx /Zi /safeseh /Fo"$(IntDir)rdrand-x86.obj" "%(FullPath)"</Command>

View File

@ -1088,6 +1088,9 @@
</ClInclude> </ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<CustomBuild Include="cpuid64.asm">
<Filter>Source Files</Filter>
</CustomBuild>
<CustomBuild Include="rdrand.asm"> <CustomBuild Include="rdrand.asm">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</CustomBuild> </CustomBuild>

View File

@ -1975,50 +1975,5 @@ pop rsi
ret ret
SHA256_HashMultipleBlocks_SSE2 ENDP SHA256_HashMultipleBlocks_SSE2 ENDP
;; http://www.agner.org/optimize/vectorclass/read.php?i=65
;; word64 Xgetbv(word32 ctrl)
;; ctrl = rcx
ALIGN 8
XGETBV64 PROC FRAME
.endprolog
;; query
DB 0fh, 01h, 0d0h
;; xcr = (EDX << 32) | EAX
and rax, 0ffffffffh
shl rdx, 32
or rax, rdx
ret
XGETBV64 ENDP
;; word64 CpuId(word32 func, word32 subfunc, word32 output[4])
;; func = rcx
;; subfunc = rdx
;; output = r8
ALIGN 8
CPUID64 PROC FRAME
;; preserve per ABI
mov [rsp+8], rbx
.savereg rbx, 8
.endprolog
;; eax = func
mov rax, rcx
;; ecx = subfunc
mov rcx, rdx
;; query
cpuid
;; save
mov [r8+0], eax
mov [r8+4], ebx
mov [r8+8], ecx
mov [r8+12], edx
;; return value
mov rax, 1
;; restore
mov rbx, [rsp+8]
ret
CPUID64 ENDP
_TEXT ENDS _TEXT ENDS
END END