mirror of
https://github.com/shadps4-emu/ext-cryptopp.git
synced 2024-11-23 09:59:42 +00:00
Add separate ASM file for XGETBV64 and CPUID64 (GH #1240)
This will allow us to define CRYPTOPP_DISABLE_ASM and completely avoid building x64dll.asm and x64masm.asm
This commit is contained in:
parent
3405dcee96
commit
0432085157
63
cpuid64.asm
Normal file
63
cpuid64.asm
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
;; https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention
|
||||||
|
;; The first four integer arguments are passed in registers.
|
||||||
|
;; Integer values are passed in left-to-right order in RCX,
|
||||||
|
;; RDX, R8, and R9, respectively. Arguments five and higher
|
||||||
|
;; are passed on the stack.
|
||||||
|
|
||||||
|
;; The registers RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5,
|
||||||
|
;; and the upper portions of YMM0-15 and ZMM0-15 are
|
||||||
|
;; considered volatile and must be considered destroyed on
|
||||||
|
;; function calls.
|
||||||
|
|
||||||
|
.CODE
|
||||||
|
|
||||||
|
TITLE CPU features source file
|
||||||
|
SUBTITLE Microsoft specific ASM code to utilize CPUID and XGETBV64 for down level Microsoft toolchains
|
||||||
|
|
||||||
|
;; http://www.agner.org/optimize/vectorclass/read.php?i=65
|
||||||
|
;; word64 Xgetbv(word32 ctrl)
|
||||||
|
;; ctrl = rcx
|
||||||
|
|
||||||
|
ALIGN 8
|
||||||
|
XGETBV64 PROC FRAME
|
||||||
|
.endprolog
|
||||||
|
;; query
|
||||||
|
DB 0fh, 01h, 0d0h
|
||||||
|
;; xcr = (EDX << 32) | EAX
|
||||||
|
and rax, 0ffffffffh
|
||||||
|
shl rdx, 32
|
||||||
|
or rax, rdx
|
||||||
|
ret
|
||||||
|
XGETBV64 ENDP
|
||||||
|
|
||||||
|
;; word64 CpuId(word32 func, word32 subfunc, word32 output[4])
|
||||||
|
;; func = rcx
|
||||||
|
;; subfunc = rdx
|
||||||
|
;; output = r8
|
||||||
|
|
||||||
|
ALIGN 8
|
||||||
|
CPUID64 PROC FRAME
|
||||||
|
;; preserve per ABI
|
||||||
|
mov [rsp+8], rbx
|
||||||
|
.savereg rbx, 8
|
||||||
|
.endprolog
|
||||||
|
;; eax = func
|
||||||
|
mov rax, rcx
|
||||||
|
;; ecx = subfunc
|
||||||
|
mov rcx, rdx
|
||||||
|
;; query
|
||||||
|
cpuid
|
||||||
|
;; save
|
||||||
|
mov [r8+0], eax
|
||||||
|
mov [r8+4], ebx
|
||||||
|
mov [r8+8], ecx
|
||||||
|
mov [r8+12], edx
|
||||||
|
;; return value
|
||||||
|
mov rax, 1
|
||||||
|
;; restore
|
||||||
|
mov rbx, [rsp+8]
|
||||||
|
ret
|
||||||
|
CPUID64 ENDP
|
||||||
|
|
||||||
|
_TEXT ENDS
|
||||||
|
END
|
119
cryptest.nmake
119
cryptest.nmake
@ -57,68 +57,69 @@
|
|||||||
|
|
||||||
LIB_SRCS = \
|
LIB_SRCS = \
|
||||||
cryptlib.cpp cpu.cpp integer.cpp 3way.cpp adler32.cpp algebra.cpp \
|
cryptlib.cpp cpu.cpp integer.cpp 3way.cpp adler32.cpp algebra.cpp \
|
||||||
algparam.cpp allocate.cpp arc4.cpp aria.cpp ariatab.cpp \
|
algparam.cpp allocate.cpp arc4.cpp aria.cpp ariatab.cpp asn.cpp \
|
||||||
asn.cpp authenc.cpp base32.cpp base64.cpp basecode.cpp bfinit.cpp \
|
authenc.cpp base32.cpp base64.cpp basecode.cpp bfinit.cpp blake2.cpp \
|
||||||
blake2.cpp blake2b_simd.cpp blake2s_simd.cpp blowfish.cpp blumshub.cpp \
|
blake2b_simd.cpp blake2s_simd.cpp blowfish.cpp blumshub.cpp camellia.cpp \
|
||||||
camellia.cpp cast.cpp casts.cpp cbcmac.cpp ccm.cpp chacha.cpp \
|
cast.cpp casts.cpp cbcmac.cpp ccm.cpp chacha.cpp chacha_avx.cpp \
|
||||||
chacha_avx.cpp chacha_simd.cpp chachapoly.cpp cham.cpp cham_simd.cpp \
|
chacha_simd.cpp chachapoly.cpp cham.cpp cham_simd.cpp channels.cpp \
|
||||||
channels.cpp cmac.cpp crc.cpp crc_simd.cpp darn.cpp default.cpp des.cpp \
|
cmac.cpp crc.cpp crc_simd.cpp darn.cpp default.cpp des.cpp dessp.cpp \
|
||||||
dessp.cpp dh.cpp dh2.cpp dll.cpp donna_32.cpp donna_64.cpp donna_sse.cpp \
|
dh.cpp dh2.cpp dll.cpp donna_32.cpp donna_64.cpp donna_sse.cpp dsa.cpp \
|
||||||
dsa.cpp eax.cpp ec2n.cpp eccrypto.cpp ecp.cpp elgamal.cpp emsa2.cpp \
|
eax.cpp ec2n.cpp eccrypto.cpp ecp.cpp elgamal.cpp emsa2.cpp eprecomp.cpp \
|
||||||
eprecomp.cpp esign.cpp files.cpp filters.cpp fips140.cpp fipstest.cpp \
|
esign.cpp files.cpp filters.cpp fips140.cpp fipstest.cpp gcm.cpp \
|
||||||
gcm.cpp gcm_simd.cpp gf256.cpp gf2_32.cpp gf2n.cpp gf2n_simd.cpp \
|
gcm_simd.cpp gf256.cpp gf2_32.cpp gf2n.cpp gf2n_simd.cpp gfpcrypt.cpp \
|
||||||
gfpcrypt.cpp gost.cpp gzip.cpp hc128.cpp hc256.cpp hex.cpp hight.cpp \
|
gost.cpp gzip.cpp hc128.cpp hc256.cpp hex.cpp hight.cpp hmac.cpp \
|
||||||
hmac.cpp hrtimer.cpp ida.cpp idea.cpp iterhash.cpp kalyna.cpp \
|
hrtimer.cpp ida.cpp idea.cpp iterhash.cpp kalyna.cpp kalynatab.cpp \
|
||||||
kalynatab.cpp keccak.cpp keccak_core.cpp keccak_simd.cpp lea.cpp \
|
keccak.cpp keccak_core.cpp keccak_simd.cpp lea.cpp lea_simd.cpp \
|
||||||
lea_simd.cpp lsh256.cpp lsh256_avx.cpp lsh256_sse.cpp lsh512.cpp \
|
lsh256.cpp lsh256_avx.cpp lsh256_sse.cpp lsh512.cpp lsh512_avx.cpp \
|
||||||
lsh512_avx.cpp lsh512_sse.cpp luc.cpp mars.cpp marss.cpp md2.cpp md4.cpp \
|
lsh512_sse.cpp luc.cpp mars.cpp marss.cpp md2.cpp md4.cpp md5.cpp \
|
||||||
md5.cpp misc.cpp modes.cpp mqueue.cpp mqv.cpp nbtheory.cpp oaep.cpp \
|
misc.cpp modes.cpp mqueue.cpp mqv.cpp nbtheory.cpp oaep.cpp osrng.cpp \
|
||||||
osrng.cpp padlkrng.cpp panama.cpp pkcspad.cpp poly1305.cpp polynomi.cpp \
|
padlkrng.cpp panama.cpp pkcspad.cpp poly1305.cpp polynomi.cpp \
|
||||||
pssr.cpp pubkey.cpp queue.cpp rabbit.cpp rabin.cpp randpool.cpp rc2.cpp \
|
ppc_simd.cpp primetab.cpp pssr.cpp pubkey.cpp queue.cpp rabbit.cpp \
|
||||||
rc5.cpp rc6.cpp rdrand.cpp rdtables.cpp rijndael.cpp rijndael_simd.cpp \
|
rabin.cpp randpool.cpp rc2.cpp rc5.cpp rc6.cpp rdrand.cpp rdtables.cpp \
|
||||||
ripemd.cpp rng.cpp rsa.cpp rw.cpp safer.cpp salsa.cpp scrypt.cpp \
|
rijndael.cpp rijndael_simd.cpp ripemd.cpp rng.cpp rsa.cpp rw.cpp \
|
||||||
seal.cpp seed.cpp serpent.cpp sha.cpp sha3.cpp sha_simd.cpp shacal2.cpp \
|
safer.cpp salsa.cpp scrypt.cpp seal.cpp seed.cpp serpent.cpp sha.cpp \
|
||||||
shacal2_simd.cpp shake.cpp shark.cpp sharkbox.cpp simeck.cpp simon.cpp \
|
sha3.cpp sha_simd.cpp shacal2.cpp shacal2_simd.cpp shake.cpp shark.cpp \
|
||||||
simon128_simd.cpp skipjack.cpp sm3.cpp sm4.cpp sm4_simd.cpp \
|
sharkbox.cpp simeck.cpp simon.cpp simon128_simd.cpp skipjack.cpp sm3.cpp \
|
||||||
sosemanuk.cpp speck.cpp speck128_simd.cpp square.cpp squaretb.cpp \
|
sm4.cpp sm4_simd.cpp sosemanuk.cpp speck.cpp speck128_simd.cpp \
|
||||||
sse_simd.cpp strciphr.cpp tea.cpp tftables.cpp threefish.cpp tiger.cpp \
|
square.cpp squaretb.cpp sse_simd.cpp strciphr.cpp tea.cpp tftables.cpp \
|
||||||
tigertab.cpp ttmac.cpp tweetnacl.cpp twofish.cpp vmac.cpp wake.cpp \
|
threefish.cpp tiger.cpp tigertab.cpp ttmac.cpp tweetnacl.cpp twofish.cpp \
|
||||||
whrlpool.cpp xed25519.cpp xtr.cpp xtrcrypt.cpp xts.cpp zdeflate.cpp \
|
vmac.cpp wake.cpp whrlpool.cpp xed25519.cpp xtr.cpp xtrcrypt.cpp xts.cpp \
|
||||||
zinflate.cpp zlib.cpp
|
zdeflate.cpp zinflate.cpp zlib.cpp
|
||||||
|
|
||||||
LIB_OBJS = \
|
LIB_OBJS = \
|
||||||
cryptlib.obj cpu.obj integer.obj 3way.obj adler32.obj algebra.obj \
|
cryptlib.obj cpu.obj integer.obj 3way.obj adler32.obj algebra.obj \
|
||||||
algparam.obj allocate.obj arc4.obj aria.obj ariatab.obj \
|
algparam.obj allocate.obj arc4.obj aria.obj ariatab.obj asn.obj \
|
||||||
asn.obj authenc.obj base32.obj base64.obj basecode.obj bfinit.obj \
|
authenc.obj base32.obj base64.obj basecode.obj bfinit.obj blake2.obj \
|
||||||
blake2.obj blake2b_simd.obj blake2s_simd.obj blowfish.obj blumshub.obj \
|
blake2b_simd.obj blake2s_simd.obj blowfish.obj blumshub.obj camellia.obj \
|
||||||
camellia.obj cast.obj casts.obj cbcmac.obj ccm.obj chacha.obj \
|
cast.obj casts.obj cbcmac.obj ccm.obj chacha.obj chacha_avx.obj \
|
||||||
chacha_avx.obj chacha_simd.obj chachapoly.obj cham.obj cham_simd.obj \
|
chacha_simd.obj chachapoly.obj cham.obj cham_simd.obj channels.obj \
|
||||||
channels.obj cmac.obj crc.obj crc_simd.obj darn.obj default.obj des.obj \
|
cmac.obj crc.obj crc_simd.obj darn.obj default.obj des.obj dessp.obj \
|
||||||
dessp.obj dh.obj dh2.obj dll.obj donna_32.obj donna_64.obj donna_sse.obj \
|
dh.obj dh2.obj dll.obj donna_32.obj donna_64.obj donna_sse.obj dsa.obj \
|
||||||
dsa.obj eax.obj ec2n.obj eccrypto.obj ecp.obj elgamal.obj emsa2.obj \
|
eax.obj ec2n.obj eccrypto.obj ecp.obj elgamal.obj emsa2.obj eprecomp.obj \
|
||||||
eprecomp.obj esign.obj files.obj filters.obj fips140.obj fipstest.obj \
|
esign.obj files.obj filters.obj fips140.obj fipstest.obj gcm.obj \
|
||||||
gcm.obj gcm_simd.obj gf256.obj gf2_32.obj gf2n.obj gf2n_simd.obj \
|
gcm_simd.obj gf256.obj gf2_32.obj gf2n.obj gf2n_simd.obj gfpcrypt.obj \
|
||||||
gfpcrypt.obj gost.obj gzip.obj hc128.obj hc256.obj hex.obj hight.obj \
|
gost.obj gzip.obj hc128.obj hc256.obj hex.obj hight.obj hmac.obj \
|
||||||
hmac.obj hrtimer.obj ida.obj idea.obj iterhash.obj kalyna.obj \
|
hrtimer.obj ida.obj idea.obj iterhash.obj kalyna.obj kalynatab.obj \
|
||||||
kalynatab.obj keccak.obj keccak_core.obj keccak_simd.obj lea.obj \
|
keccak.obj keccak_core.obj keccak_simd.obj lea.obj lea_simd.obj \
|
||||||
lea_simd.obj lsh256.obj lsh256_avx.obj lsh256_sse.obj lsh512.obj \
|
lsh256.obj lsh256_avx.obj lsh256_sse.obj lsh512.obj lsh512_avx.obj \
|
||||||
lsh512_avx.obj lsh512_sse.obj luc.obj mars.obj marss.obj md2.obj md4.obj \
|
lsh512_sse.obj luc.obj mars.obj marss.obj md2.obj md4.obj md5.obj \
|
||||||
md5.obj misc.obj modes.obj mqueue.obj mqv.obj nbtheory.obj oaep.obj \
|
misc.obj modes.obj mqueue.obj mqv.obj nbtheory.obj oaep.obj osrng.obj \
|
||||||
osrng.obj padlkrng.obj panama.obj pkcspad.obj poly1305.obj polynomi.obj \
|
padlkrng.obj panama.obj pkcspad.obj poly1305.obj polynomi.obj \
|
||||||
pssr.obj pubkey.obj queue.obj rabbit.obj rabin.obj randpool.obj rc2.obj \
|
ppc_simd.obj primetab.obj pssr.obj pubkey.obj queue.obj rabbit.obj \
|
||||||
rc5.obj rc6.obj rdrand.obj rdtables.obj rijndael.obj rijndael_simd.obj \
|
rabin.obj randpool.obj rc2.obj rc5.obj rc6.obj rdrand.obj rdtables.obj \
|
||||||
ripemd.obj rng.obj rsa.obj rw.obj safer.obj salsa.obj scrypt.obj \
|
rijndael.obj rijndael_simd.obj ripemd.obj rng.obj rsa.obj rw.obj \
|
||||||
seal.obj seed.obj serpent.obj sha.obj sha3.obj sha_simd.obj shacal2.obj \
|
safer.obj salsa.obj scrypt.obj seal.obj seed.obj serpent.obj sha.obj \
|
||||||
shacal2_simd.obj shake.obj shark.obj sharkbox.obj simeck.obj simon.obj \
|
sha3.obj sha_simd.obj shacal2.obj shacal2_simd.obj shake.obj shark.obj \
|
||||||
simon128_simd.obj skipjack.obj sm3.obj sm4.obj sm4_simd.obj \
|
sharkbox.obj simeck.obj simon.obj simon128_simd.obj skipjack.obj sm3.obj \
|
||||||
sosemanuk.obj speck.obj speck128_simd.obj square.obj squaretb.obj \
|
sm4.obj sm4_simd.obj sosemanuk.obj speck.obj speck128_simd.obj \
|
||||||
sse_simd.obj strciphr.obj tea.obj tftables.obj threefish.obj tiger.obj \
|
square.obj squaretb.obj sse_simd.obj strciphr.obj tea.obj tftables.obj \
|
||||||
tigertab.obj ttmac.obj tweetnacl.obj twofish.obj vmac.obj wake.obj \
|
threefish.obj tiger.obj tigertab.obj ttmac.obj tweetnacl.obj twofish.obj \
|
||||||
whrlpool.obj xed25519.obj xtr.obj xtrcrypt.obj xts.obj zdeflate.obj \
|
vmac.obj wake.obj whrlpool.obj xed25519.obj xtr.obj xtrcrypt.obj xts.obj \
|
||||||
zinflate.obj zlib.obj
|
zdeflate.obj zinflate.obj zlib.obj
|
||||||
|
|
||||||
ASM_OBJS = \
|
ASM_OBJS = \
|
||||||
rdrand-x86.obj rdrand-x64.obj rdseed-x86.obj rdseed-x64.obj x64masm.obj x64dll.obj
|
cpuid64.obj rdrand-x86.obj rdrand-x64.obj rdseed-x86.obj rdseed-x64.obj \
|
||||||
|
x64masm.obj x64dll.obj
|
||||||
|
|
||||||
TEST_SRCS = \
|
TEST_SRCS = \
|
||||||
test.cpp bench1.cpp bench2.cpp bench3.cpp datatest.cpp \
|
test.cpp bench1.cpp bench2.cpp bench3.cpp datatest.cpp \
|
||||||
@ -211,8 +212,8 @@ RDSEED_OBJ = rdseed-x86.obj
|
|||||||
# CXXFLAGS = $(CXXFLAGS) /DWINAPI_FAMILY=WINAPI_FAMILY_APP
|
# CXXFLAGS = $(CXXFLAGS) /DWINAPI_FAMILY=WINAPI_FAMILY_APP
|
||||||
AS = ml64.exe
|
AS = ml64.exe
|
||||||
ASFLAGS = /nologo /D_M_X64 /W3 /Cx /Zi
|
ASFLAGS = /nologo /D_M_X64 /W3 /Cx /Zi
|
||||||
LIB_SRCS = $(LIB_SRCS) rdrand.cpp rdrand.asm rdseed.asm
|
LIB_SRCS = $(LIB_SRCS) cpuid64.asm rdrand.cpp rdrand.asm rdseed.asm
|
||||||
LIB_OBJS = $(LIB_OBJS) rdrand-x64.obj rdseed-x64.obj x64masm.obj x64dll.obj
|
LIB_OBJS = $(LIB_OBJS) cpuid64.obj rdrand-x64.obj rdseed-x64.obj x64masm.obj x64dll.obj
|
||||||
LDFLAGS = $(LDFLAGS) /MACHINE:X64
|
LDFLAGS = $(LDFLAGS) /MACHINE:X64
|
||||||
LDLIBS = $(LDLIBS) kernel32.lib
|
LDLIBS = $(LDLIBS) kernel32.lib
|
||||||
RDRAND_OBJ = rdrand-x64.obj
|
RDRAND_OBJ = rdrand-x64.obj
|
||||||
|
@ -368,6 +368,11 @@
|
|||||||
<None Include="Readme.txt" />
|
<None Include="Readme.txt" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<CustomBuild Condition="'$(Platform)'=='x64'" Include="cpuid64.asm">
|
||||||
|
<Message>Building and Assembling cpuid64.asm</Message>
|
||||||
|
<Command>ml64.exe /c /nologo /D_M_X64 /W3 /Zi /Fo"$(IntDir)cpuid64.obj" "%(FullPath)"</Command>
|
||||||
|
<Outputs>$(IntDir)cpuid64.obj;%(Outputs)</Outputs>
|
||||||
|
</CustomBuild>
|
||||||
<CustomBuild Condition="'$(Platform)'=='Win32'" Include="rdrand.asm">
|
<CustomBuild Condition="'$(Platform)'=='Win32'" Include="rdrand.asm">
|
||||||
<Message>Building and assembling rdrand.asm</Message>
|
<Message>Building and assembling rdrand.asm</Message>
|
||||||
<Command>ml.exe /c /nologo /D_M_X86 /W3 /Cx /Zi /safeseh /Fo"$(IntDir)rdrand-x86.obj" "%(FullPath)"</Command>
|
<Command>ml.exe /c /nologo /D_M_X86 /W3 /Cx /Zi /safeseh /Fo"$(IntDir)rdrand-x86.obj" "%(FullPath)"</Command>
|
||||||
|
@ -1088,6 +1088,9 @@
|
|||||||
</ClInclude>
|
</ClInclude>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="cpuid64.asm">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</CustomBuild>
|
||||||
<CustomBuild Include="rdrand.asm">
|
<CustomBuild Include="rdrand.asm">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
|
45
x64dll.asm
45
x64dll.asm
@ -1975,50 +1975,5 @@ pop rsi
|
|||||||
ret
|
ret
|
||||||
SHA256_HashMultipleBlocks_SSE2 ENDP
|
SHA256_HashMultipleBlocks_SSE2 ENDP
|
||||||
|
|
||||||
;; http://www.agner.org/optimize/vectorclass/read.php?i=65
|
|
||||||
;; word64 Xgetbv(word32 ctrl)
|
|
||||||
;; ctrl = rcx
|
|
||||||
|
|
||||||
ALIGN 8
|
|
||||||
XGETBV64 PROC FRAME
|
|
||||||
.endprolog
|
|
||||||
;; query
|
|
||||||
DB 0fh, 01h, 0d0h
|
|
||||||
;; xcr = (EDX << 32) | EAX
|
|
||||||
and rax, 0ffffffffh
|
|
||||||
shl rdx, 32
|
|
||||||
or rax, rdx
|
|
||||||
ret
|
|
||||||
XGETBV64 ENDP
|
|
||||||
|
|
||||||
;; word64 CpuId(word32 func, word32 subfunc, word32 output[4])
|
|
||||||
;; func = rcx
|
|
||||||
;; subfunc = rdx
|
|
||||||
;; output = r8
|
|
||||||
|
|
||||||
ALIGN 8
|
|
||||||
CPUID64 PROC FRAME
|
|
||||||
;; preserve per ABI
|
|
||||||
mov [rsp+8], rbx
|
|
||||||
.savereg rbx, 8
|
|
||||||
.endprolog
|
|
||||||
;; eax = func
|
|
||||||
mov rax, rcx
|
|
||||||
;; ecx = subfunc
|
|
||||||
mov rcx, rdx
|
|
||||||
;; query
|
|
||||||
cpuid
|
|
||||||
;; save
|
|
||||||
mov [r8+0], eax
|
|
||||||
mov [r8+4], ebx
|
|
||||||
mov [r8+8], ecx
|
|
||||||
mov [r8+12], edx
|
|
||||||
;; return value
|
|
||||||
mov rax, 1
|
|
||||||
;; restore
|
|
||||||
mov rbx, [rsp+8]
|
|
||||||
ret
|
|
||||||
CPUID64 ENDP
|
|
||||||
|
|
||||||
_TEXT ENDS
|
_TEXT ENDS
|
||||||
END
|
END
|
||||||
|
Loading…
Reference in New Issue
Block a user