mirror of
https://github.com/shadps4-emu/ext-cryptopp.git
synced 2025-01-21 00:34:53 +00:00
597 lines
11 KiB
ArmAsm
597 lines
11 KiB
ArmAsm
;; rdrand.asm - written and placed in public domain by Jeffrey Walton and Uri Blumenthal.
|
|
;; Copyright assigned to the Crypto++ project.
|
|
|
|
;; This ASM file provides RDRAND and RDSEED to downlevel Unix and Linux tool chains.
|
|
;; Additionally, the inline assembly code produced by GCC and Clang is not that
|
|
;; impressive. However, using this code requires NASM and an edit to the GNUmakefile.
|
|
|
|
;; nasm -f elf32 rdrand.S -DX86 -g -o rdrand-x86.o
|
|
;; nasm -f elfx32 rdrand.S -DX32 -g -o rdrand-x32.o
|
|
;; nasm -f elf64 rdrand.S -DX64 -g -o rdrand-x64.o
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Naming convention used in rdrand.{h|cpp|asm|S}
|
|
;; MSC = Microsoft Compiler (and compatibles)
|
|
;; GCC = GNU Compiler (and compatibles)
|
|
;; ALL = MSC and GCC (and compatibles)
|
|
;; RRA = RDRAND, Assembly
|
|
;; RSA = RDSEED, Assembly
|
|
;; RRI = RDRAND, Intrinsic
|
|
;; RSA = RDSEED, Intrinsic
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; C/C++ Function prototypes
|
|
;; X86, X32 and X64:
|
|
;; extern "C" int NASM_RRA_GenerateBlock(byte* ptr, size_t size, unsigned int safety);
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Return values
|
|
%define RDRAND_SUCCESS 1
|
|
%define RDRAND_FAILURE 0
|
|
|
|
%define RDSEED_SUCCESS 1
|
|
%define RDSEED_FAILURE 0
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
%ifdef X86 or X32 ;; Set via the command line
|
|
|
|
;; Arg1, byte* buffer
|
|
;; Arg2, size_t bsize
|
|
;; Arg3, unsigned int safety
|
|
;; EAX (out): success (1), failure (0)
|
|
|
|
global NASM_RRA_GenerateBlock
|
|
section .text
|
|
|
|
%ifdef X86
|
|
align 8
|
|
cpu 486
|
|
%else
|
|
align 16
|
|
%endif
|
|
|
|
NASM_RRA_GenerateBlock:
|
|
|
|
%ifdef X86
|
|
%define arg1 [ebp+04h]
|
|
%define arg2 [ebp+08h]
|
|
%define arg3 [ebp+0ch]
|
|
%define MWSIZE 04h ;; machine word size
|
|
%else
|
|
%define MWSIZE 08h ;; machine word size
|
|
%endif
|
|
|
|
%define buffer edi
|
|
%define bsize esi
|
|
%define safety edx
|
|
|
|
%ifdef X86
|
|
.Load_Arguments:
|
|
|
|
mov buffer, arg1
|
|
mov bsize, arg2
|
|
mov safety, arg3
|
|
%endif
|
|
|
|
.Validate_Pointer:
|
|
|
|
cmp buffer, 0
|
|
je .GenerateBlock_PreRet
|
|
|
|
;; Top of While loop
|
|
.GenerateBlock_Top:
|
|
|
|
;; Check remaining size
|
|
cmp bsize, 0
|
|
je .GenerateBlock_Success
|
|
|
|
%ifdef X86
|
|
.Call_RDRAND_EAX:
|
|
%else
|
|
.Call_RDRAND_RAX:
|
|
DB 48h ;; X32 can use the full register, issue the REX.w prefix
|
|
%endif
|
|
;; RDRAND is not available prior to VS2012. Just emit
|
|
;; the byte codes using DB. This is `rdrand eax`.
|
|
DB 0Fh, 07h, 0F0h
|
|
|
|
;; If CF=1, the number returned by RDRAND is valid.
|
|
;; If CF=0, a random number was not available.
|
|
jc .RDRAND_succeeded
|
|
|
|
.RDRAND_failed:
|
|
|
|
;; Exit if we've reached the limit
|
|
cmp safety, 0
|
|
je .GenerateBlock_Failure
|
|
|
|
dec safety
|
|
jmp .GenerateBlock_Top
|
|
|
|
.RDRAND_succeeded:
|
|
|
|
cmp bsize, MWSIZE
|
|
jb .Partial_Machine_Word
|
|
|
|
.Full_Machine_Word:
|
|
|
|
%ifdef X32
|
|
mov [buffer+4], eax ;; We can only move 4 at a time
|
|
DB 048h ;; Combined, these result in
|
|
shr eax, 32 ;; `shr rax, 32`
|
|
%endif
|
|
|
|
mov [buffer], eax
|
|
add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds,
|
|
sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds
|
|
|
|
;; Continue
|
|
jmp .GenerateBlock_Top
|
|
|
|
;; 1,2,3 bytes remain for X86
|
|
;; 1,2,3,4,5,6,7 remain for X32
|
|
.Partial_Machine_Word:
|
|
|
|
%ifdef X32
|
|
;; Test bit 2 to see if size is at least 4
|
|
test bsize, 4
|
|
jz .Bit_2_Not_Set
|
|
|
|
mov [buffer], eax
|
|
add buffer, 4
|
|
|
|
DB 048h ;; Combined, these result in
|
|
shr eax, 32 ;; `shr rax, 32`
|
|
|
|
.Bit_2_Not_Set:
|
|
%endif
|
|
|
|
;; Test bit 1 to see if size is at least 2
|
|
test bsize, 2
|
|
jz .Bit_1_Not_Set
|
|
|
|
mov [buffer], ax
|
|
shr eax, 16
|
|
add buffer, 2
|
|
|
|
.Bit_1_Not_Set:
|
|
|
|
;; Test bit 0 to see if size is at least 1
|
|
test bsize, 1
|
|
jz .GenerateBlock_Success
|
|
|
|
mov [buffer], al
|
|
|
|
.Bit_0_Not_Set:
|
|
|
|
;; We've hit all the bits
|
|
jmp .GenerateBlock_Success
|
|
|
|
.GenerateBlock_PreRet:
|
|
|
|
;; Test for success (was the request completely fulfilled?)
|
|
cmp bsize, 0
|
|
je .GenerateBlock_Success
|
|
|
|
.GenerateBlock_Failure:
|
|
|
|
xor eax, eax
|
|
mov al, RDRAND_FAILURE
|
|
ret
|
|
|
|
.GenerateBlock_Success:
|
|
|
|
xor eax, eax
|
|
mov al, RDRAND_SUCCESS
|
|
ret
|
|
|
|
%endif ;; X86 and X32
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
%ifdef X64 ;; Set via the command line
|
|
|
|
global NASM_RRA_GenerateBlock
|
|
section .text
|
|
align 16
|
|
|
|
;; Arg1, byte* buffer
|
|
;; Arg2, size_t bsize
|
|
;; Arg3, unsigned int safety
|
|
;; RAX (out): success (1), failure (0)
|
|
|
|
NASM_RRA_GenerateBlock:
|
|
|
|
%define MWSIZE 08h ;; machine word size
|
|
%define buffer rdi
|
|
%define bsize rsi
|
|
%define safety edx
|
|
|
|
;; No need for Load_Arguments due to fastcall
|
|
|
|
.Validate_Pointer:
|
|
|
|
;; Validate pointer
|
|
cmp buffer, 0
|
|
je .GenerateBlock_PreRet
|
|
|
|
;; Top of While loop
|
|
.GenerateBlock_Top:
|
|
|
|
;; Check remaining size
|
|
cmp bsize, 0
|
|
je .GenerateBlock_Success
|
|
|
|
.Call_RDRAND_RAX:
|
|
;; RDRAND is not available prior to VS2012. Just emit
|
|
;; the byte codes using DB. This is `rdrand rax`.
|
|
DB 048h, 0Fh, 0C7h, 0F0h
|
|
|
|
;; If CF=1, the number returned by RDRAND is valid.
|
|
;; If CF=0, a random number was not available.
|
|
jc .RDRAND_succeeded
|
|
|
|
.RDRAND_failed:
|
|
|
|
;; Exit if we've reached the limit
|
|
cmp safety, 0h
|
|
je .GenerateBlock_Failure
|
|
|
|
dec safety
|
|
jmp .GenerateBlock_Top
|
|
|
|
.RDRAND_succeeded:
|
|
|
|
cmp bsize, MWSIZE
|
|
jb .Partial_Machine_Word
|
|
|
|
.Full_Machine_Word:
|
|
|
|
mov [buffer], rax
|
|
add buffer, MWSIZE
|
|
sub bsize, MWSIZE
|
|
|
|
;; Continue
|
|
jmp .GenerateBlock_Top
|
|
|
|
;; 1,2,3,4,5,6,7 bytes remain
|
|
.Partial_Machine_Word:
|
|
|
|
;; Test bit 2 to see if size is at least 4
|
|
test bsize, 4
|
|
jz .Bit_2_Not_Set
|
|
|
|
mov [buffer], eax
|
|
shr rax, 32
|
|
add buffer, 4
|
|
|
|
.Bit_2_Not_Set:
|
|
|
|
;; Test bit 1 to see if size is at least 2
|
|
test bsize, 2
|
|
jz .Bit_1_Not_Set
|
|
|
|
mov [buffer], ax
|
|
shr eax, 16
|
|
add buffer, 2
|
|
|
|
.Bit_1_Not_Set:
|
|
|
|
;; Test bit 0 to see if size is at least 1
|
|
test bsize, 1
|
|
jz .GenerateBlock_Success
|
|
|
|
mov [buffer], al
|
|
|
|
.Bit_0_Not_Set:
|
|
|
|
;; We've hit all the bits
|
|
jmp .GenerateBlock_Success
|
|
|
|
.GenerateBlock_PreRet:
|
|
|
|
;; Test for success (was the request completely fulfilled?)
|
|
cmp bsize, 0
|
|
je .GenerateBlock_Success
|
|
|
|
.GenerateBlock_Failure:
|
|
|
|
xor rax, rax
|
|
mov al, RDRAND_FAILURE
|
|
ret
|
|
|
|
.GenerateBlock_Success:
|
|
|
|
xor rax, rax
|
|
mov al, RDRAND_SUCCESS
|
|
ret
|
|
|
|
%endif ;; X64
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
%ifdef X86 or X32 ;; Set via the command line
|
|
|
|
;; Arg1, byte* buffer
|
|
;; Arg2, size_t bsize
|
|
;; Arg3, unsigned int safety
|
|
;; EAX (out): success (1), failure (0)
|
|
|
|
global NASM_RSA_GenerateBlock
|
|
section .text
|
|
align 8
|
|
|
|
%ifdef X86
|
|
align 8
|
|
cpu 486
|
|
%else
|
|
align 16
|
|
%endif
|
|
|
|
NASM_RSA_GenerateBlock:
|
|
|
|
%ifdef X86
|
|
%define arg1 [ebp+04h]
|
|
%define arg2 [ebp+08h]
|
|
%define arg3 [ebp+0ch]
|
|
%define MWSIZE 04h ;; machine word size
|
|
%else
|
|
%define MWSIZE 08h ;; machine word size
|
|
%endif
|
|
|
|
%define buffer edi
|
|
%define bsize esi
|
|
%define safety edx
|
|
|
|
%ifdef X86
|
|
.Load_Arguments:
|
|
|
|
mov buffer, arg1
|
|
mov bsize, arg2
|
|
mov safety, arg3
|
|
%endif
|
|
|
|
.Validate_Pointer:
|
|
|
|
cmp buffer, 0
|
|
je .GenerateBlock_PreRet
|
|
|
|
;; Top of While loop
|
|
.GenerateBlock_Top:
|
|
|
|
;; Check remaining size
|
|
cmp bsize, 0
|
|
je .GenerateBlock_Success
|
|
|
|
%ifdef X86
|
|
.Call_RDSEED_EAX:
|
|
%else
|
|
.Call_RDSEED_RAX:
|
|
DB 48h ;; X32 can use the full register, issue the REX.w prefix
|
|
%endif
|
|
;; RDSEED is not available prior to VS2012. Just emit
|
|
;; the byte codes using DB. This is `rdseed eax`.
|
|
DB 0Fh, 0C7h, 0F8h
|
|
|
|
;; If CF=1, the number returned by RDSEED is valid.
|
|
;; If CF=0, a random number was not available.
|
|
jc .RDSEED_succeeded
|
|
|
|
.RDSEED_failed:
|
|
|
|
;; Exit if we've reached the limit
|
|
cmp safety, 0
|
|
je .GenerateBlock_Failure
|
|
|
|
dec safety
|
|
jmp .GenerateBlock_Top
|
|
|
|
.RDSEED_succeeded:
|
|
|
|
cmp bsize, MWSIZE
|
|
jb .Partial_Machine_Word
|
|
|
|
.Full_Machine_Word:
|
|
|
|
mov [buffer], eax
|
|
add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds,
|
|
sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds
|
|
|
|
;; Continue
|
|
jmp .GenerateBlock_Top
|
|
|
|
;; 1,2,3 bytes remain for X86
|
|
;; 1,2,3,4,5,6,7 remain for X32
|
|
.Partial_Machine_Word:
|
|
|
|
%ifdef X32
|
|
;; Test bit 2 to see if size is at least 4
|
|
test bsize, 4
|
|
jz .Bit_2_Not_Set
|
|
|
|
mov [buffer], eax
|
|
add buffer, 4
|
|
|
|
DB 048h ;; Combined, these result in
|
|
shr eax, 32 ;; `shr rax, 32`
|
|
|
|
.Bit_2_Not_Set:
|
|
%endif
|
|
|
|
;; Test bit 1 to see if size is at least 2
|
|
test bsize, 2
|
|
jz .Bit_1_Not_Set
|
|
|
|
mov [buffer], ax
|
|
shr eax, 16
|
|
add buffer, 2
|
|
|
|
.Bit_1_Not_Set:
|
|
|
|
;; Test bit 0 to see if size is at least 1
|
|
test bsize, 1
|
|
jz .GenerateBlock_Success
|
|
|
|
mov [buffer], al
|
|
|
|
.Bit_0_Not_Set:
|
|
|
|
;; We've hit all the bits
|
|
jmp .GenerateBlock_Success
|
|
|
|
.GenerateBlock_PreRet:
|
|
|
|
;; Test for success (was the request completely fulfilled?)
|
|
cmp bsize, 0
|
|
je .GenerateBlock_Success
|
|
|
|
.GenerateBlock_Failure:
|
|
|
|
xor eax, eax
|
|
mov al, RDSEED_FAILURE
|
|
ret
|
|
|
|
.GenerateBlock_Success:
|
|
|
|
xor eax, eax
|
|
mov al, RDSEED_SUCCESS
|
|
ret
|
|
|
|
%endif ;; X86 and X32
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
%ifdef X64 ;; Set via the command line
|
|
|
|
global NASM_RSA_GenerateBlock
|
|
section .text
|
|
align 16
|
|
|
|
;; Arg1, byte* buffer
|
|
;; Arg2, size_t bsize
|
|
;; Arg3, unsigned int safety
|
|
;; RAX (out): success (1), failure (0)
|
|
|
|
NASM_RSA_GenerateBlock:
|
|
|
|
%define MWSIZE 08h ;; machine word size
|
|
%define buffer rdi
|
|
%define bsize rsi
|
|
%define safety edx
|
|
|
|
;; No need for Load_Arguments due to fastcall
|
|
|
|
.Validate_Pointer:
|
|
|
|
;; Validate pointer
|
|
cmp buffer, 0
|
|
je .GenerateBlock_PreRet
|
|
|
|
;; Top of While loop
|
|
.GenerateBlock_Top:
|
|
|
|
;; Check remaining size
|
|
cmp bsize, 0
|
|
je .GenerateBlock_Success
|
|
|
|
.Call_RDSEED_RAX:
|
|
;; RDSEED is not available prior to VS2012. Just emit
|
|
;; the byte codes using DB. This is `rdseed rax`.
|
|
DB 048h, 0Fh, 0C7h, 0F8h
|
|
|
|
;; If CF=1, the number returned by RDSEED is valid.
|
|
;; If CF=0, a random number was not available.
|
|
jc .RDSEED_succeeded
|
|
|
|
.RDSEED_failed:
|
|
|
|
;; Exit if we've reached the limit
|
|
cmp safety, 0
|
|
je .GenerateBlock_Failure
|
|
|
|
dec safety
|
|
jmp .GenerateBlock_Top
|
|
|
|
.RDSEED_succeeded:
|
|
|
|
cmp bsize, MWSIZE
|
|
jb .Partial_Machine_Word
|
|
|
|
.Full_Machine_Word:
|
|
|
|
mov [buffer], rax
|
|
add buffer, MWSIZE
|
|
sub bsize, MWSIZE
|
|
|
|
;; Continue
|
|
jmp .GenerateBlock_Top
|
|
|
|
;; 1,2,3,4,5,6,7 bytes remain
|
|
.Partial_Machine_Word:
|
|
|
|
;; Test bit 2 to see if size is at least 4
|
|
test bsize, 4
|
|
jz .Bit_2_Not_Set
|
|
|
|
mov [buffer], eax
|
|
shr rax, 32
|
|
add buffer, 4
|
|
|
|
.Bit_2_Not_Set:
|
|
|
|
;; Test bit 1 to see if size is at least 2
|
|
test bsize, 2
|
|
jz .Bit_1_Not_Set
|
|
|
|
mov [buffer], ax
|
|
shr eax, 16
|
|
add buffer, 2
|
|
|
|
.Bit_1_Not_Set:
|
|
|
|
;; Test bit 0 to see if size is at least 1
|
|
test bsize, 1
|
|
jz .GenerateBlock_Success
|
|
|
|
mov [buffer], al
|
|
|
|
.Bit_0_Not_Set:
|
|
|
|
;; We've hit all the bits
|
|
jmp .GenerateBlock_Success
|
|
|
|
.GenerateBlock_PreRet:
|
|
|
|
;; Test for success (was the request completely fulfilled?)
|
|
cmp bsize, 0
|
|
je .GenerateBlock_Success
|
|
|
|
.GenerateBlock_Failure:
|
|
|
|
xor rax, rax
|
|
mov al, RDSEED_FAILURE
|
|
ret
|
|
|
|
.GenerateBlock_Success:
|
|
|
|
xor rax, rax
|
|
mov al, RDSEED_SUCCESS
|
|
ret
|
|
|
|
%endif ;; _M_X64
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|