mirror of
https://github.com/ptitSeb/box64.git
synced 2024-11-26 16:21:17 +00:00
Added full support for the SHA cpu extension
This commit is contained in:
parent
39568bff2f
commit
afe6fbe3b3
@ -303,6 +303,7 @@ set(ELFLOADER_SRC
|
||||
"${BOX64_ROOT}/src/emu/x87emu_private.c"
|
||||
"${BOX64_ROOT}/src/emu/x64primop.c"
|
||||
"${BOX64_ROOT}/src/emu/x64run_private.c"
|
||||
"${BOX64_ROOT}/src/emu/x64shaext.c"
|
||||
"${BOX64_ROOT}/src/emu/x64syscall.c"
|
||||
"${BOX64_ROOT}/src/emu/x86syscall.c"
|
||||
"${BOX64_ROOT}/src/emu/x64tls.c"
|
||||
@ -1020,18 +1021,23 @@ add_test(x87cache ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
|
||||
-D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test25 -D TEST_OUTPUT=tmpfile25.txt
|
||||
-D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref25.txt
|
||||
-P ${CMAKE_SOURCE_DIR}/runTest.cmake )
|
||||
|
||||
|
||||
add_test(feround ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
|
||||
-D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test26 -D TEST_OUTPUT=tmpfile26.txt
|
||||
-D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref26.txt
|
||||
-P ${CMAKE_SOURCE_DIR}/runTest.cmake )
|
||||
|
||||
set_tests_properties(feround PROPERTIES ENVIRONMENT "BOX64_SYNC_ROUNDING=1")
|
||||
|
||||
add_test(sse4_2 ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
|
||||
-D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test27 -D TEST_OUTPUT=tmpfile27.txt
|
||||
-D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref27.txt
|
||||
-P ${CMAKE_SOURCE_DIR}/runTest.cmake )
|
||||
|
||||
set_tests_properties(feround PROPERTIES ENVIRONMENT "BOX64_SYNC_ROUNDING=1")
|
||||
|
||||
add_test(shaext ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
|
||||
-D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test28 -D TEST_OUTPUT=tmpfile28.txt
|
||||
-D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref28.txt
|
||||
-P ${CMAKE_SOURCE_DIR}/runTest.cmake )
|
||||
|
||||
|
||||
file(GLOB extension_tests "${CMAKE_SOURCE_DIR}/tests/extensions/*.c")
|
||||
foreach(file ${extension_tests})
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "my_cpuid.h"
|
||||
#include "bridge.h"
|
||||
#include "signals.h"
|
||||
#include "x64shaext.h"
|
||||
#ifdef DYNAREC
|
||||
#include "custommem.h"
|
||||
#include "../dynarec/native_lock.h"
|
||||
@ -340,6 +341,43 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
|
||||
}
|
||||
break;
|
||||
|
||||
case 0xC8: /* SHA1NEXTE Gx, Ex */
|
||||
nextop = F8;
|
||||
GETGX;
|
||||
GETEX(0);
|
||||
sha1nexte(emu, GX, EX);
|
||||
break;
|
||||
case 0xC9: /* SHA1MSG1 Gx, Ex */
|
||||
nextop = F8;
|
||||
GETGX;
|
||||
GETEX(0);
|
||||
sha1msg1(emu, GX, EX);
|
||||
break;
|
||||
case 0xCA: /* SHA1MSG2 Gx, Ex */
|
||||
nextop = F8;
|
||||
GETGX;
|
||||
GETEX(0);
|
||||
sha1msg2(emu, GX, EX);
|
||||
break;
|
||||
case 0xCB: /* SHA256RNDS2 Gx, Ex (, XMM0) */
|
||||
nextop = F8;
|
||||
GETGX;
|
||||
GETEX(0);
|
||||
sha256rnds2(emu, GX, EX);
|
||||
break;
|
||||
case 0xCC: /* SHA256MSG1 Gx, Ex */
|
||||
nextop = F8;
|
||||
GETGX;
|
||||
GETEX(0);
|
||||
sha256msg1(emu, GX, EX);
|
||||
break;
|
||||
case 0xCD: /* SHA256MSG2 Gx, Ex */
|
||||
nextop = F8;
|
||||
GETGX;
|
||||
GETEX(0);
|
||||
sha256msg2(emu, GX, EX);
|
||||
break;
|
||||
|
||||
case 0xF0: /* MOVBE Gd, Ed*/
|
||||
nextop = F8;
|
||||
GETGD;
|
||||
@ -389,6 +427,14 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
|
||||
}
|
||||
break;
|
||||
|
||||
case 0xCC: /* SHA1RNDS4 Gx, Ex, Ib */
|
||||
nextop = F8;
|
||||
GETGX;
|
||||
GETEX(1);
|
||||
tmp8u = F8;
|
||||
sha1rnds4(emu, GX, EX, tmp8u);
|
||||
break;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
194
src/emu/x64shaext.c
Normal file
194
src/emu/x64shaext.c
Normal file
@ -0,0 +1,194 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#include "box64stack.h"
|
||||
#include "x64emu.h"
|
||||
#include "x64run_private.h"
|
||||
#include "x64emu_private.h"
|
||||
#include "x64shaext.h"
|
||||
|
||||
static uint32_t rol(uint32_t a, int n)
|
||||
{
|
||||
n = n&31;
|
||||
if(!n)
|
||||
return a;
|
||||
return (a<<n) | (a>>(32-n));
|
||||
}
|
||||
|
||||
static uint32_t ror(uint32_t a, int n)
|
||||
{
|
||||
n = n&31;
|
||||
if(!n)
|
||||
return a;
|
||||
return (a>>n) | (a<<(32-n));
|
||||
}
|
||||
|
||||
static uint32_t f0(uint32_t B, uint32_t C, uint32_t D)
|
||||
{
|
||||
return (B & C) ^ ((~B) & D);
|
||||
}
|
||||
|
||||
static uint32_t f1(uint32_t B, uint32_t C, uint32_t D)
|
||||
{
|
||||
return B ^ C ^ D;
|
||||
}
|
||||
|
||||
static uint32_t f2(uint32_t B, uint32_t C, uint32_t D)
|
||||
{
|
||||
return (B & C) ^ (B & D) ^ (C & D);
|
||||
}
|
||||
|
||||
static uint32_t f3(uint32_t B, uint32_t C, uint32_t D)
|
||||
{
|
||||
return B ^ C ^ D;
|
||||
}
|
||||
|
||||
static uint32_t Ch(uint32_t E, uint32_t F, uint32_t G)
|
||||
{
|
||||
return (E & F) ^ ((~E) & G);
|
||||
}
|
||||
|
||||
static uint32_t Maj(uint32_t A, uint32_t B, uint32_t C)
|
||||
{
|
||||
return (A & B) ^ (A & C) ^ (B & C);
|
||||
}
|
||||
|
||||
static uint32_t sigma0(uint32_t A)
|
||||
{
|
||||
return ror(A, 2) ^ ror(A, 13) ^ ror(A, 22);
|
||||
}
|
||||
static uint32_t sigma1(uint32_t E)
|
||||
{
|
||||
return ror(E, 6) ^ ror(E, 11) ^ ror(E, 25);
|
||||
}
|
||||
static uint32_t tho0(uint32_t W)
|
||||
{
|
||||
return ror(W, 7) ^ ror(W, 18) ^ (W>>3);
|
||||
}
|
||||
static uint32_t tho1(uint32_t W)
|
||||
{
|
||||
return ror(W, 17) ^ ror(W, 19) ^ (W>>10);
|
||||
}
|
||||
|
||||
static const uint32_t Ks[] = { 0x5A827999, 0x6ED9EBA1, 0X8F1BBCDC, 0xCA62C1D6 };
|
||||
|
||||
void sha1nexte(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2)
|
||||
{
|
||||
uint32_t tmp = rol(xmm1->ud[3], 30);
|
||||
xmm1->ud[3] = xmm2->ud[3] + tmp;
|
||||
xmm1->ud[2] = xmm2->ud[2];
|
||||
xmm1->ud[1] = xmm2->ud[1];
|
||||
xmm1->ud[0] = xmm2->ud[0];
|
||||
}
|
||||
|
||||
void sha1msg1(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2)
|
||||
{
|
||||
uint32_t w0 = xmm1->ud[3];
|
||||
uint32_t w1 = xmm1->ud[2];
|
||||
uint32_t w2 = xmm1->ud[1];
|
||||
uint32_t w3 = xmm1->ud[0];
|
||||
uint32_t w4 = xmm2->ud[3];
|
||||
uint32_t w5 = xmm2->ud[2];
|
||||
xmm1->ud[3] = w2 ^ w0;
|
||||
xmm1->ud[2] = w3 ^ w1;
|
||||
xmm1->ud[1] = w4 ^ w2;
|
||||
xmm1->ud[0] = w5 ^ w3;
|
||||
}
|
||||
|
||||
void sha1msg2(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2)
|
||||
{
|
||||
uint32_t w13 = xmm2->ud[2];
|
||||
uint32_t w14 = xmm2->ud[1];
|
||||
uint32_t w15 = xmm2->ud[0];
|
||||
uint32_t w16 = rol(xmm1->ud[3] ^ w13, 1);
|
||||
uint32_t w17 = rol(xmm1->ud[2] ^ w14, 1);
|
||||
uint32_t w18 = rol(xmm1->ud[1] ^ w15, 1);
|
||||
uint32_t w19 = rol(xmm1->ud[0] ^ w16, 1);
|
||||
xmm1->ud[3] = w16;
|
||||
xmm1->ud[2] = w17;
|
||||
xmm1->ud[1] = w18;
|
||||
xmm1->ud[0] = w19;
|
||||
}
|
||||
|
||||
void sha256msg1(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2)
|
||||
{
|
||||
uint32_t w4 = xmm2->ud[0];
|
||||
uint32_t w3 = xmm1->ud[3];
|
||||
uint32_t w2 = xmm1->ud[2];
|
||||
uint32_t w1 = xmm1->ud[1];
|
||||
uint32_t w0 = xmm1->ud[0];
|
||||
xmm1->ud[3] = w3 + tho0(w4);
|
||||
xmm1->ud[2] = w2 + tho0(w3);
|
||||
xmm1->ud[1] = w1 + tho0(w2);
|
||||
xmm1->ud[0] = w0 + tho0(w1);
|
||||
}
|
||||
|
||||
void sha256msg2(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2)
|
||||
{
|
||||
uint32_t w14 = xmm2->ud[2];
|
||||
uint32_t w15 = xmm2->ud[3];
|
||||
uint32_t w16 = xmm1->ud[0] + tho1(w14);
|
||||
uint32_t w17 = xmm1->ud[1] + tho1(w15);
|
||||
uint32_t w18 = xmm1->ud[2] + tho1(w16);
|
||||
uint32_t w19 = xmm1->ud[3] + tho1(w17);
|
||||
xmm1->ud[3] = w19;
|
||||
xmm1->ud[2] = w18;
|
||||
xmm1->ud[1] = w17;
|
||||
xmm1->ud[0] = w16;
|
||||
}
|
||||
|
||||
void sha1rnds4(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2, uint8_t ib)
|
||||
{
|
||||
uint32_t K = Ks[ib&3];
|
||||
uint32_t(*f)(uint32_t , uint32_t , uint32_t) = NULL;
|
||||
switch (ib&3) {
|
||||
case 0: f = f0; break;
|
||||
case 1: f = f1; break;
|
||||
case 2: f = f2; break;
|
||||
case 3: f = f3; break;
|
||||
}
|
||||
uint32_t A = xmm1->ud[3];
|
||||
uint32_t B = xmm1->ud[2];
|
||||
uint32_t C = xmm1->ud[1];
|
||||
uint32_t D = xmm1->ud[0];
|
||||
uint32_t E = 0;
|
||||
for(int i=0; i<4; ++i) {
|
||||
uint32_t new_A = f(B, C, D) + rol(A, 5) + xmm2->ud[3-i] + E + K;
|
||||
E = D;
|
||||
D = C;
|
||||
C = rol(B, 30);
|
||||
B = A;
|
||||
A = new_A;
|
||||
}
|
||||
xmm1->ud[3] = A;
|
||||
xmm1->ud[2] = B;
|
||||
xmm1->ud[1] = C;
|
||||
xmm1->ud[0] = D;
|
||||
}
|
||||
|
||||
void sha256rnds2(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2)
|
||||
{
|
||||
uint32_t A = xmm2->ud[3];
|
||||
uint32_t B = xmm2->ud[2];
|
||||
uint32_t C = xmm1->ud[3];
|
||||
uint32_t D = xmm1->ud[2];
|
||||
uint32_t E = xmm2->ud[1];
|
||||
uint32_t F = xmm2->ud[0];
|
||||
uint32_t G = xmm1->ud[1];
|
||||
uint32_t H = xmm1->ud[0];
|
||||
for(int i=0; i<2; ++i) {
|
||||
uint32_t new_A = Ch(E, F, G) + sigma1(E) + emu->xmm[0].ud[i] + H + Maj(A, B, C) + sigma0(A);
|
||||
uint32_t new_E = Ch(E, F, G) + sigma1(E) + emu->xmm[0].ud[i] + H + D;
|
||||
H = G;
|
||||
G = F;
|
||||
F = E;
|
||||
E = new_E;
|
||||
D = C;
|
||||
C = B;
|
||||
B = A;
|
||||
A = new_A;
|
||||
}
|
||||
xmm1->ud[3] = A;
|
||||
xmm1->ud[2] = B;
|
||||
xmm1->ud[1] = E;
|
||||
xmm1->ud[0] = F;
|
||||
}
|
18
src/emu/x64shaext.h
Normal file
18
src/emu/x64shaext.h
Normal file
@ -0,0 +1,18 @@
|
||||
#ifndef __X64_SHAEXT_H__
|
||||
#define __X64_SHAEXT_H__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "regs.h"
|
||||
|
||||
typedef struct x64emu_s x64emu_t;
|
||||
|
||||
void sha1nexte(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2);
|
||||
void sha1msg1(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2);
|
||||
void sha1msg2(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2);
|
||||
void sha256msg1(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2);
|
||||
void sha256msg2(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2);
|
||||
void sha1rnds4(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2, uint8_t ib);
|
||||
void sha256rnds2(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2);
|
||||
|
||||
#endif //__X64_SHAEXT_H__
|
@ -37,6 +37,8 @@ extern int arm64_aes;
|
||||
extern int arm64_pmull;
|
||||
extern int arm64_crc32;
|
||||
extern int arm64_atomics;
|
||||
extern int arm64_sha1;
|
||||
extern int arm64_sha2;
|
||||
extern int arm64_flagm;
|
||||
extern int arm64_flagm2;
|
||||
#elif defined(RV64)
|
||||
|
14
src/main.c
14
src/main.c
@ -77,6 +77,8 @@ int arm64_aes = 0;
|
||||
int arm64_pmull = 0;
|
||||
int arm64_crc32 = 0;
|
||||
int arm64_atomics = 0;
|
||||
int arm64_sha1 = 0;
|
||||
int arm64_sha2 = 0;
|
||||
int arm64_uscat = 0;
|
||||
int arm64_flagm = 0;
|
||||
int arm64_flagm2 = 0;
|
||||
@ -377,6 +379,14 @@ HWCAP2_ECV
|
||||
arm64_aes = 1;
|
||||
if(hwcap&HWCAP_ATOMICS)
|
||||
arm64_atomics = 1;
|
||||
#ifdef HWCAP_SHA1
|
||||
if(hwcap&HWCAP_SHA1)
|
||||
arm64_sha1 = 1;
|
||||
#endif
|
||||
#ifdef HWCAP_SHA2
|
||||
if(hwcap&HWCAP_SHA2)
|
||||
arm64_sha2 = 1;
|
||||
#endif
|
||||
#ifdef HWCAP_USCAT
|
||||
if(hwcap&HWCAP_USCAT)
|
||||
arm64_uscat = 1;
|
||||
@ -407,6 +417,10 @@ HWCAP2_ECV
|
||||
printf_log(LOG_INFO, " PMULL");
|
||||
if(arm64_atomics)
|
||||
printf_log(LOG_INFO, " ATOMICS");
|
||||
if(arm64_sha1)
|
||||
printf_log(LOG_INFO, " SHA1");
|
||||
if(arm64_sha2)
|
||||
printf_log(LOG_INFO, " SHA2");
|
||||
if(arm64_uscat)
|
||||
printf_log(LOG_INFO, " USCAT");
|
||||
if(arm64_flagm)
|
||||
|
@ -270,10 +270,11 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
|
||||
R_EDX = 0;
|
||||
break;
|
||||
case 0x7: // extended bits...
|
||||
if(R_ECX==1) {
|
||||
R_EAX = 0; // Bit 5 is avx512_bf16
|
||||
} else
|
||||
R_EAX = R_ECX = R_EBX = R_EDX = 0; // TODO
|
||||
if(R_ECX==0) {
|
||||
R_EAX = 0;
|
||||
R_EBX = 0 |
|
||||
1<<29; // SHA extension
|
||||
} else {R_EAX = R_ECX = R_EBX = R_EDX = 0;}
|
||||
break;
|
||||
case 0xB: // Extended Topology Enumeration Leaf
|
||||
//TODO!
|
||||
|
31
tests/ref28.txt
Normal file
31
tests/ref28.txt
Normal file
@ -0,0 +1,31 @@
|
||||
test SHA Ext
|
||||
sha1rnds4(00000000-00000000-00000000-00000000 ,00000004-00000003-00000002-00000001, 0x0) => f40757f7-b4b82290-eab46b51-56a09e67
|
||||
sha1rnds4(00000000-00000000-00000000-00000000 ,00000004-00000003-00000002-00000001, 0x1) => 9477347e-209fe171-5285d814-5bb67ae9
|
||||
sha1rnds4(00000000-00000000-00000000-00000000 ,00000004-00000003-00000002-00000001, 0x2) => e27962a9-e186daec-1ca4d63c-23c6ef38
|
||||
sha1rnds4(00000000-00000000-00000000-00000000 ,00000004-00000003-00000002-00000001, 0x3) => f32a4da8-6c2529f4-85aebf4c-b298b076
|
||||
sha1rnds4(00000000-00000000-00000000-00000000 ,ffffffff-00000000-01234567-80000000, 0x0) => 986d7873-b5db5395-2ab46b29-16a09e66
|
||||
sha1rnds4(00000000-00000000-00000000-00000000 ,ffffffff-00000000-01234567-80000000, 0x1) => 78dd5f0b-21c31271-9285d7eb-1bb67ae8
|
||||
sha1rnds4(00000000-00000000-00000000-00000000 ,ffffffff-00000000-01234567-80000000, 0x2) => c6df831c-e2aa0bf1-5ca4d613-e3c6ef36
|
||||
sha1rnds4(00000000-00000000-00000000-00000000 ,ffffffff-00000000-01234567-80000000, 0x3) => 57906d5d-6d485af4-c5aebf23-7298b075
|
||||
sha1rnds4(00000004-00000003-00000002-00000001 ,ffffffff-00000000-01234567-80000000, 0x0) => d8ae9505-35dd5bf7-eab46f39-96a09e86
|
||||
sha1rnds4(00000004-00000003-00000002-00000001 ,ffffffff-00000000-01234567-80000000, 0x1) => f91d79e9-61c513cc-4285dbed-1bb67b08
|
||||
sha1rnds4(00000004-00000003-00000002-00000001 ,ffffffff-00000000-01234567-80000000, 0x2) => 4721274f-62ac1813-9ca4da2b-a3c6ef57
|
||||
sha1rnds4(00000004-00000003-00000002-00000001 ,ffffffff-00000000-01234567-80000000, 0x3) => 57d20b8d-ad4a5c6d-b5aec325-7298b095
|
||||
sha1msg1(00000000-00000000-00000000-00000000 ,00000004-00000003-00000002-00000001) => 00000000-00000000-00000004-00000003
|
||||
sha1msg1(00000000-00000000-00000000-00000000 ,ffffffff-00000000-01234567-80000000) => 00000000-00000000-ffffffff-00000000
|
||||
sha1msg1(00000004-00000003-00000002-00000001 ,ffffffff-00000000-01234567-80000000) => 00000006-00000002-fffffffd-00000001
|
||||
sha1msg2(00000000-00000000-00000000-00000000 ,00000004-00000003-00000002-00000001) => 00000006-00000004-00000002-0000000c
|
||||
sha1msg2(00000000-00000000-00000000-00000000 ,ffffffff-00000000-01234567-80000000) => 00000000-02468ace-00000001-00000000
|
||||
sha1msg2(00000004-00000003-00000002-00000001 ,ffffffff-00000000-01234567-80000000) => 00000008-02468ac8-00000005-00000012
|
||||
sha1nexte(00000000-00000000-00000000-00000000 ,00000004-00000003-00000002-00000001) => 00000004-00000003-00000002-00000001
|
||||
sha1nexte(00000000-00000000-00000000-00000000 ,ffffffff-00000000-01234567-80000000) => ffffffff-00000000-01234567-80000000
|
||||
sha1nexte(00000004-00000003-00000002-00000001 ,ffffffff-00000000-01234567-80000000) => 00000000-00000000-01234567-80000000
|
||||
sha256msg1(00000000-00000000-00000000-00000000 ,00000004-00000003-00000002-00000001) => 02004000-00000000-00000000-00000000
|
||||
sha256msg1(00000000-00000000-00000000-00000000 ,ffffffff-00000000-01234567-80000000) => 11002000-00000000-00000000-00000000
|
||||
sha256msg1(00000004-00000003-00000002-00000001 ,ffffffff-00000000-01234567-80000000) => 11002004-08010003-0600c002-04008001
|
||||
sha256msg2(00000000-00000000-00000000-00000000 ,00000004-00000003-00000002-00000001) => 100000a1-cc000078-00028000-0001e000
|
||||
sha256msg2(00000000-00000000-00000000-00000000 ,ffffffff-00000000-01234567-80000000) => 00006fe7-00000000-003fffff-00000000
|
||||
sha256msg2(00000004-00000003-00000002-00000001 ,ffffffff-00000000-01234567-80000000) => 0000b02c-0000a003-00400001-00000001
|
||||
sha256rnds2(00000000-00000000-00000000-00000000 ,00000004-00000003-00000002-00000001 ,ffffffff-00000000-01234567-80000000) => 6da40f0a-88601101-0353cda8-88400100
|
||||
sha256rnds2(00000004-00000003-00000002-00000001 ,00000000-00000000-00000000-00000000 ,ffffffff-00000000-01234567-80000000) => fc0f56a9-80000003-1bf348ad-80000006
|
||||
sha256rnds2(ffffffff-00000000-01234567-80000000 ,00000004-00000003-00000002-00000001 ,00000000-00000000-00000000-00000000) => c8b008c8-8983566d-ec5ad91b-89634665
|
BIN
tests/test28
Executable file
BIN
tests/test28
Executable file
Binary file not shown.
134
tests/test28.c
Normal file
134
tests/test28.c
Normal file
@ -0,0 +1,134 @@
|
||||
// build with gcc -O0 -g -msha -msse4.2 test28.c -o test28
|
||||
// and -m32 for 32bits version
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include <pmmintrin.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
typedef unsigned char u8x16 __attribute__ ((vector_size (16)));
|
||||
typedef unsigned short u16x8 __attribute__ ((vector_size (16)));
|
||||
typedef unsigned int u32x4 __attribute__ ((vector_size (16)));
|
||||
typedef unsigned long int u64x2 __attribute__ ((vector_size (16)));
|
||||
typedef float f32x4 __attribute__ ((vector_size (16)));
|
||||
typedef double d64x2 __attribute__ ((vector_size (16)));
|
||||
|
||||
typedef union {
|
||||
__m128i mm;
|
||||
__m128 mf;
|
||||
__m128d md;
|
||||
u8x16 u8;
|
||||
u16x8 u16;
|
||||
u32x4 u32;
|
||||
u64x2 u64;
|
||||
f32x4 f32;
|
||||
d64x2 d64;
|
||||
} v128;
|
||||
|
||||
static const uint32_t A[] = {0, 0, 0, 0};
|
||||
static const uint32_t B[] = {1, 2, 3, 4};
|
||||
static const uint32_t C[] = {0x80000000, 0x1234567, 0, 0xffffffff};
|
||||
|
||||
static void print_u32(v128 a) {
|
||||
printf("%08x-%08x-%08x-%08x", a.u32[3], a.u32[2], a.u32[1], a.u32[0]);
|
||||
}
|
||||
|
||||
static void print_u32_res(v128 a, v128 b, const char* op, v128 res) {
|
||||
printf("%s(", op);
|
||||
print_u32(a);
|
||||
printf(" ,");
|
||||
print_u32(b);
|
||||
printf(") => ");
|
||||
print_u32(res);
|
||||
printf("\n");
|
||||
}
|
||||
static void print_u32_u8_res(v128 a, v128 b, const char* op, uint8_t ib, v128 res) {
|
||||
printf("%s(", op);
|
||||
print_u32(a);
|
||||
printf(" ,");
|
||||
print_u32(b);
|
||||
printf(", 0x%x) => ", ib);
|
||||
print_u32(res);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void print_3u32_res(v128 a, v128 b, v128 c, const char* op, v128 res) {
|
||||
printf("%s(", op);
|
||||
print_u32(a);
|
||||
printf(" ,");
|
||||
print_u32(b);
|
||||
printf(" ,");
|
||||
print_u32(c);
|
||||
printf(") => ");
|
||||
print_u32(res);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
printf("test SHA Ext\n");
|
||||
|
||||
v128 a, b, c, d;
|
||||
int ret;
|
||||
#define LOAD(a, A) a.u32[0] = A[0]; a.u32[1] = A[1]; a.u32[2] = A[2]; a.u32[3] = A[3]
|
||||
|
||||
#define GO2I_(A, B, C, I) \
|
||||
LOAD(a, A); \
|
||||
LOAD(b, B); \
|
||||
c.mm = _mm_##C##_epu32(a.mm, b.mm, I); \
|
||||
print_u32_u8_res(a, b, #C, I, c)
|
||||
|
||||
#define GO2(A, B, C) \
|
||||
LOAD(a, A); \
|
||||
LOAD(b, B); \
|
||||
c.mm = _mm_##C##_epu32(a.mm, b.mm); \
|
||||
print_u32_res(a, b, #C, c)
|
||||
|
||||
#define GO3(A, B, C, D) \
|
||||
LOAD(a, A); \
|
||||
LOAD(b, B); \
|
||||
LOAD(c, C); \
|
||||
d.mm = _mm_##D##_epu32(a.mm, b.mm, c.mm); \
|
||||
print_3u32_res(a, b, c, #D, d)
|
||||
|
||||
|
||||
#define GO2I(A, B, C) \
|
||||
GO2I_(A, B, C, 0x00); \
|
||||
GO2I_(A, B, C, 0x01); \
|
||||
GO2I_(A, B, C, 0x02); \
|
||||
GO2I_(A, B, C, 0x03)
|
||||
|
||||
GO2I(A, B, sha1rnds4);
|
||||
GO2I(A, C, sha1rnds4);
|
||||
GO2I(B, C, sha1rnds4);
|
||||
|
||||
GO2(A, B, sha1msg1);
|
||||
GO2(A, C, sha1msg1);
|
||||
GO2(B, C, sha1msg1);
|
||||
|
||||
GO2(A, B, sha1msg2);
|
||||
GO2(A, C, sha1msg2);
|
||||
GO2(B, C, sha1msg2);
|
||||
|
||||
GO2(A, B, sha1nexte);
|
||||
GO2(A, C, sha1nexte);
|
||||
GO2(B, C, sha1nexte);
|
||||
|
||||
GO2(A, B, sha256msg1);
|
||||
GO2(A, C, sha256msg1);
|
||||
GO2(B, C, sha256msg1);
|
||||
|
||||
GO2(A, B, sha256msg2);
|
||||
GO2(A, C, sha256msg2);
|
||||
GO2(B, C, sha256msg2);
|
||||
|
||||
GO3(A, B, C, sha256rnds2);
|
||||
GO3(B, A, C, sha256rnds2);
|
||||
GO3(C, B, A, sha256rnds2);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user