Added x86_64 tests (based on box86)

This commit is contained in:
ptitSeb 2021-02-28 13:19:23 +01:00
parent 80a8bf0ce1
commit abee0fb560
47 changed files with 2280 additions and 0 deletions

BIN
tests/benchfloat Executable file

Binary file not shown.

888
tests/benchfloat.c Executable file
View File

@ -0,0 +1,888 @@
/*
**
** LINPACK.C Linpack benchmark, calculates FLOPS.
** (FLoating Point Operations Per Second)
**
** Translated to C by Bonnie Toy 5/88
**
** Modified by Will Menninger, 10/93, with these features:
** (modified on 2/25/94 to fix a problem with daxpy for
** unequal increments or equal increments not equal to 1.
** Jack Dongarra)
**
** - Defaults to double precision.
** - Averages ROLLed and UNROLLed performance.
** - User selectable array sizes.
** - Automatically does enough repetitions to take at least 10 CPU seconds.
** - Prints machine precision.
** - ANSI prototyping.
**
** To compile: cc -O -o linpack linpack.c -lm
**
**
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <float.h>
#define DP
#ifdef SP
#define ZERO 0.0
#define ONE 1.0
#define PREC "Single"
#define BASE10DIG FLT_DIG
typedef float REAL;
#endif
#ifdef DP
#define ZERO 0.0e0
#define ONE 1.0e0
#define PREC "Double"
#define BASE10DIG DBL_DIG
typedef double REAL;
#endif
static REAL linpack (long nreps,int arsize);
static void matgen (REAL *a,int lda,int n,REAL *b,REAL *norma);
static void dgefa (REAL *a,int lda,int n,int *ipvt,int *info,int roll);
static void dgesl (REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll);
static void daxpy_r (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
static REAL ddot_r (int n,REAL *dx,int incx,REAL *dy,int incy);
static void dscal_r (int n,REAL da,REAL *dx,int incx);
static void daxpy_ur (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
static REAL ddot_ur (int n,REAL *dx,int incx,REAL *dy,int incy);
static void dscal_ur (int n,REAL da,REAL *dx,int incx);
static int idamax (int n,REAL *dx,int incx);
static REAL second (void);
static void *mempool;
void main(int argc, const char** argv)
{
char buf[80];
int arsize;
long arsize2d,memreq,nreps;
size_t malloc_arg;
if(argc>1)
strcpy(buf, argv[1]);
while (1)
{
/*printf("Enter array size (q to quit) [200]: ");
fgets(buf,79,stdin);*/
if (buf[0]=='q' || buf[0]=='Q')
break;
if (buf[0]=='\0' || buf[0]=='\n')
arsize=200;
else
arsize=atoi(buf);
arsize/=2;
arsize*=2;
if (arsize<10)
{
printf("Too small.\n");
continue;
}
arsize2d = (long)arsize*(long)arsize;
memreq=arsize2d*sizeof(REAL)+(long)arsize*sizeof(REAL)+(long)arsize*sizeof(int);
printf("Memory required: %ldK.\n",(memreq+512L)>>10);
malloc_arg=(size_t)memreq;
if (malloc_arg!=memreq || (mempool=malloc(malloc_arg))==NULL)
{
printf("Not enough memory available for given array size.\n\n");
continue;
}
printf("\n\nLINPACK benchmark, %s precision.\n",PREC);
printf("Machine precision: %d digits.\n",BASE10DIG);
printf("Array size %d X %d.\n",arsize,arsize);
printf("Average rolled and unrolled performance:\n\n");
printf(" Reps Time(s) DGEFA DGESL OVERHEAD KFLOPS\n");
printf("----------------------------------------------------\n");
nreps=1;
while (linpack(nreps,arsize)<10.)
nreps*=2;
free(mempool);
printf("\n");
strcpy(buf, "q");
}
}
static REAL linpack(long nreps,int arsize)
{
REAL *a,*b;
REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
int *ipvt,n,info,lda;
long i,arsize2d;
lda = arsize;
n = arsize/2;
arsize2d = (long)arsize*(long)arsize;
ops=((2.0*n*n*n)/3.0+2.0*n*n);
a=(REAL *)mempool;
b=a+arsize2d;
ipvt=(int *)&b[arsize];
tdgesl=0;
tdgefa=0;
totalt=second();
for (i=0;i<nreps;i++)
{
matgen(a,lda,n,b,&norma);
t1 = second();
dgefa(a,lda,n,ipvt,&info,1);
tdgefa += second()-t1;
t1 = second();
dgesl(a,lda,n,ipvt,b,0,1);
tdgesl += second()-t1;
}
for (i=0;i<nreps;i++)
{
matgen(a,lda,n,b,&norma);
t1 = second();
dgefa(a,lda,n,ipvt,&info,0);
tdgefa += second()-t1;
t1 = second();
dgesl(a,lda,n,ipvt,b,0,0);
tdgesl += second()-t1;
}
totalt=second()-totalt;
if (totalt<0.5 || tdgefa+tdgesl<0.2)
return(0.);
kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl));
toverhead=totalt-tdgefa-tdgesl;
if (tdgefa<0.)
tdgefa=0.;
if (tdgesl<0.)
tdgesl=0.;
if (toverhead<0.)
toverhead=0.;
printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n",
nreps,totalt,100.*tdgefa/totalt,
100.*tdgesl/totalt,100.*toverhead/totalt,
kflops);
return(totalt);
}
/*
** For matgen,
** We would like to declare a[][lda], but c does not allow it. In this
** function, references to a[i][j] are written a[lda*i+j].
*/
static void matgen(REAL *a,int lda,int n,REAL *b,REAL *norma)
{
int init,i,j;
init = 1325;
*norma = 0.0;
for (j = 0; j < n; j++)
for (i = 0; i < n; i++)
{
init = (int)((long)3125*(long)init % 65536L);
a[lda*j+i] = (init - 32768.0)/16384.0;
*norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma;
}
for (i = 0; i < n; i++)
b[i] = 0.0;
for (j = 0; j < n; j++)
for (i = 0; i < n; i++)
b[i] = b[i] + a[lda*j+i];
}
/*
**
** DGEFA benchmark
**
** We would like to declare a[][lda], but c does not allow it. In this
** function, references to a[i][j] are written a[lda*i+j].
**
** dgefa factors a double precision matrix by gaussian elimination.
**
** dgefa is usually called by dgeco, but it can be called
** directly with a saving in time if rcond is not needed.
** (time for dgeco) = (1 + 9/n)*(time for dgefa) .
**
** on entry
**
** a REAL precision[n][lda]
** the matrix to be factored.
**
** lda integer
** the leading dimension of the array a .
**
** n integer
** the order of the matrix a .
**
** on return
**
** a an upper triangular matrix and the multipliers
** which were used to obtain it.
** the factorization can be written a = l*u where
** l is a product of permutation and unit lower
** triangular matrices and u is upper triangular.
**
** ipvt integer[n]
** an integer vector of pivot indices.
**
** info integer
** = 0 normal value.
** = k if u[k][k] .eq. 0.0 . this is not an error
** condition for this subroutine, but it does
** indicate that dgesl or dgedi will divide by zero
** if called. use rcond in dgeco for a reliable
** indication of singularity.
**
** linpack. this version dated 08/14/78 .
** cleve moler, university of New Mexico, argonne national lab.
**
** functions
**
** blas daxpy,dscal,idamax
**
*/
static void dgefa(REAL *a,int lda,int n,int *ipvt,int *info,int roll)
{
REAL t;
int idamax(),j,k,kp1,l,nm1;
/* gaussian elimination with partial pivoting */
if (roll)
{
*info = 0;
nm1 = n - 1;
if (nm1 >= 0)
for (k = 0; k < nm1; k++)
{
kp1 = k + 1;
/* find l = pivot index */
l = idamax(n-k,&a[lda*k+k],1) + k;
ipvt[k] = l;
/* zero pivot implies this column already
triangularized */
if (a[lda*k+l] != ZERO)
{
/* interchange if necessary */
if (l != k)
{
t = a[lda*k+l];
a[lda*k+l] = a[lda*k+k];
a[lda*k+k] = t;
}
/* compute multipliers */
t = -ONE/a[lda*k+k];
dscal_r(n-(k+1),t,&a[lda*k+k+1],1);
/* row elimination with column indexing */
for (j = kp1; j < n; j++)
{
t = a[lda*j+l];
if (l != k)
{
a[lda*j+l] = a[lda*j+k];
a[lda*j+k] = t;
}
daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
}
}
else
(*info) = k;
}
ipvt[n-1] = n-1;
if (a[lda*(n-1)+(n-1)] == ZERO)
(*info) = n-1;
}
else
{
*info = 0;
nm1 = n - 1;
if (nm1 >= 0)
for (k = 0; k < nm1; k++)
{
kp1 = k + 1;
/* find l = pivot index */
l = idamax(n-k,&a[lda*k+k],1) + k;
ipvt[k] = l;
/* zero pivot implies this column already
triangularized */
if (a[lda*k+l] != ZERO)
{
/* interchange if necessary */
if (l != k)
{
t = a[lda*k+l];
a[lda*k+l] = a[lda*k+k];
a[lda*k+k] = t;
}
/* compute multipliers */
t = -ONE/a[lda*k+k];
dscal_ur(n-(k+1),t,&a[lda*k+k+1],1);
/* row elimination with column indexing */
for (j = kp1; j < n; j++)
{
t = a[lda*j+l];
if (l != k)
{
a[lda*j+l] = a[lda*j+k];
a[lda*j+k] = t;
}
daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
}
}
else
(*info) = k;
}
ipvt[n-1] = n-1;
if (a[lda*(n-1)+(n-1)] == ZERO)
(*info) = n-1;
}
}
/*
**
** DGESL benchmark
**
** We would like to declare a[][lda], but c does not allow it. In this
** function, references to a[i][j] are written a[lda*i+j].
**
** dgesl solves the double precision system
** a * x = b or trans(a) * x = b
** using the factors computed by dgeco or dgefa.
**
** on entry
**
** a double precision[n][lda]
** the output from dgeco or dgefa.
**
** lda integer
** the leading dimension of the array a .
**
** n integer
** the order of the matrix a .
**
** ipvt integer[n]
** the pivot vector from dgeco or dgefa.
**
** b double precision[n]
** the right hand side vector.
**
** job integer
** = 0 to solve a*x = b ,
** = nonzero to solve trans(a)*x = b where
** trans(a) is the transpose.
**
** on return
**
** b the solution vector x .
**
** error condition
**
** a division by zero will occur if the input factor contains a
** zero on the diagonal. technically this indicates singularity
** but it is often caused by improper arguments or improper
** setting of lda . it will not occur if the subroutines are
** called correctly and if dgeco has set rcond .gt. 0.0
** or dgefa has set info .eq. 0 .
**
** to compute inverse(a) * c where c is a matrix
** with p columns
** dgeco(a,lda,n,ipvt,rcond,z)
** if (!rcond is too small){
** for (j=0,j<p,j++)
** dgesl(a,lda,n,ipvt,c[j][0],0);
** }
**
** linpack. this version dated 08/14/78 .
** cleve moler, university of new mexico, argonne national lab.
**
** functions
**
** blas daxpy,ddot
*/
static void dgesl(REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll)
{
REAL t;
int k,kb,l,nm1;
if (roll)
{
nm1 = n - 1;
if (job == 0)
{
/* job = 0 , solve a * x = b */
/* first solve l*y = b */
if (nm1 >= 1)
for (k = 0; k < nm1; k++)
{
l = ipvt[k];
t = b[l];
if (l != k)
{
b[l] = b[k];
b[k] = t;
}
daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
}
/* now solve u*x = y */
for (kb = 0; kb < n; kb++)
{
k = n - (kb + 1);
b[k] = b[k]/a[lda*k+k];
t = -b[k];
daxpy_r(k,t,&a[lda*k+0],1,&b[0],1);
}
}
else
{
/* job = nonzero, solve trans(a) * x = b */
/* first solve trans(u)*y = b */
for (k = 0; k < n; k++)
{
t = ddot_r(k,&a[lda*k+0],1,&b[0],1);
b[k] = (b[k] - t)/a[lda*k+k];
}
/* now solve trans(l)*x = y */
if (nm1 >= 1)
for (kb = 1; kb < nm1; kb++)
{
k = n - (kb+1);
b[k] = b[k] + ddot_r(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
l = ipvt[k];
if (l != k)
{
t = b[l];
b[l] = b[k];
b[k] = t;
}
}
}
}
else
{
nm1 = n - 1;
if (job == 0)
{
/* job = 0 , solve a * x = b */
/* first solve l*y = b */
if (nm1 >= 1)
for (k = 0; k < nm1; k++)
{
l = ipvt[k];
t = b[l];
if (l != k)
{
b[l] = b[k];
b[k] = t;
}
daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
}
/* now solve u*x = y */
for (kb = 0; kb < n; kb++)
{
k = n - (kb + 1);
b[k] = b[k]/a[lda*k+k];
t = -b[k];
daxpy_ur(k,t,&a[lda*k+0],1,&b[0],1);
}
}
else
{
/* job = nonzero, solve trans(a) * x = b */
/* first solve trans(u)*y = b */
for (k = 0; k < n; k++)
{
t = ddot_ur(k,&a[lda*k+0],1,&b[0],1);
b[k] = (b[k] - t)/a[lda*k+k];
}
/* now solve trans(l)*x = y */
if (nm1 >= 1)
for (kb = 1; kb < nm1; kb++)
{
k = n - (kb+1);
b[k] = b[k] + ddot_ur(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
l = ipvt[k];
if (l != k)
{
t = b[l];
b[l] = b[k];
b[k] = t;
}
}
}
}
}
/*
** Constant times a vector plus a vector.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
static void daxpy_r(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)
{
int i,ix,iy;
if (n <= 0)
return;
if (da == ZERO)
return;
if (incx != 1 || incy != 1)
{
/* code for unequal increments or equal increments != 1 */
ix = 1;
iy = 1;
if(incx < 0) ix = (-n+1)*incx + 1;
if(incy < 0)iy = (-n+1)*incy + 1;
for (i = 0;i < n; i++)
{
dy[iy] = dy[iy] + da*dx[ix];
ix = ix + incx;
iy = iy + incy;
}
return;
}
/* code for both increments equal to 1 */
for (i = 0;i < n; i++)
dy[i] = dy[i] + da*dx[i];
}
/*
** Forms the dot product of two vectors.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
static REAL ddot_r(int n,REAL *dx,int incx,REAL *dy,int incy)
{
REAL dtemp;
int i,ix,iy;
dtemp = ZERO;
if (n <= 0)
return(ZERO);
if (incx != 1 || incy != 1)
{
/* code for unequal increments or equal increments != 1 */
ix = 0;
iy = 0;
if (incx < 0) ix = (-n+1)*incx;
if (incy < 0) iy = (-n+1)*incy;
for (i = 0;i < n; i++)
{
dtemp = dtemp + dx[ix]*dy[iy];
ix = ix + incx;
iy = iy + incy;
}
return(dtemp);
}
/* code for both increments equal to 1 */
for (i=0;i < n; i++)
dtemp = dtemp + dx[i]*dy[i];
return(dtemp);
}
/*
** Scales a vector by a constant.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
static void dscal_r(int n,REAL da,REAL *dx,int incx)
{
int i,nincx;
if (n <= 0)
return;
if (incx != 1)
{
/* code for increment not equal to 1 */
nincx = n*incx;
for (i = 0; i < nincx; i = i + incx)
dx[i] = da*dx[i];
return;
}
/* code for increment equal to 1 */
for (i = 0; i < n; i++)
dx[i] = da*dx[i];
}
/*
** constant times a vector plus a vector.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
static void daxpy_ur(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)
{
int i,ix,iy,m;
if (n <= 0)
return;
if (da == ZERO)
return;
if (incx != 1 || incy != 1)
{
/* code for unequal increments or equal increments != 1 */
ix = 1;
iy = 1;
if(incx < 0) ix = (-n+1)*incx + 1;
if(incy < 0)iy = (-n+1)*incy + 1;
for (i = 0;i < n; i++)
{
dy[iy] = dy[iy] + da*dx[ix];
ix = ix + incx;
iy = iy + incy;
}
return;
}
/* code for both increments equal to 1 */
m = n % 4;
if ( m != 0)
{
for (i = 0; i < m; i++)
dy[i] = dy[i] + da*dx[i];
if (n < 4)
return;
}
for (i = m; i < n; i = i + 4)
{
dy[i] = dy[i] + da*dx[i];
dy[i+1] = dy[i+1] + da*dx[i+1];
dy[i+2] = dy[i+2] + da*dx[i+2];
dy[i+3] = dy[i+3] + da*dx[i+3];
}
}
/*
** Forms the dot product of two vectors.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
static REAL ddot_ur(int n,REAL *dx,int incx,REAL *dy,int incy)
{
REAL dtemp;
int i,ix,iy,m;
dtemp = ZERO;
if (n <= 0)
return(ZERO);
if (incx != 1 || incy != 1)
{
/* code for unequal increments or equal increments != 1 */
ix = 0;
iy = 0;
if (incx < 0) ix = (-n+1)*incx;
if (incy < 0) iy = (-n+1)*incy;
for (i = 0;i < n; i++)
{
dtemp = dtemp + dx[ix]*dy[iy];
ix = ix + incx;
iy = iy + incy;
}
return(dtemp);
}
/* code for both increments equal to 1 */
m = n % 5;
if (m != 0)
{
for (i = 0; i < m; i++)
dtemp = dtemp + dx[i]*dy[i];
if (n < 5)
return(dtemp);
}
for (i = m; i < n; i = i + 5)
{
dtemp = dtemp + dx[i]*dy[i] +
dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] +
dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4];
}
return(dtemp);
}
/*
** Scales a vector by a constant.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
static void dscal_ur(int n,REAL da,REAL *dx,int incx)
{
int i,m,nincx;
if (n <= 0)
return;
if (incx != 1)
{
/* code for increment not equal to 1 */
nincx = n*incx;
for (i = 0; i < nincx; i = i + incx)
dx[i] = da*dx[i];
return;
}
/* code for increment equal to 1 */
m = n % 5;
if (m != 0)
{
for (i = 0; i < m; i++)
dx[i] = da*dx[i];
if (n < 5)
return;
}
for (i = m; i < n; i = i + 5)
{
dx[i] = da*dx[i];
dx[i+1] = da*dx[i+1];
dx[i+2] = da*dx[i+2];
dx[i+3] = da*dx[i+3];
dx[i+4] = da*dx[i+4];
}
}
/*
** Finds the index of element having max. absolute value.
** Jack Dongarra, linpack, 3/11/78.
*/
static int idamax(int n,REAL *dx,int incx)
{
REAL dmax;
int i, ix, itemp;
if (n < 1)
return(-1);
if (n ==1 )
return(0);
if(incx != 1)
{
/* code for increment not equal to 1 */
ix = 1;
dmax = fabs((double)dx[0]);
ix = ix + incx;
for (i = 1; i < n; i++)
{
if(fabs((double)dx[ix]) > dmax)
{
itemp = i;
dmax = fabs((double)dx[ix]);
}
ix = ix + incx;
}
}
else
{
/* code for increment equal to 1 */
itemp = 0;
dmax = fabs((double)dx[0]);
for (i = 1; i < n; i++)
if(fabs((double)dx[i]) > dmax)
{
itemp = i;
dmax = fabs((double)dx[i]);
}
}
return (itemp);
}
static REAL second(void)
{
return ((REAL)((REAL)clock()/(REAL)CLOCKS_PER_SEC));
}

BIN
tests/extensions/mmx Executable file

Binary file not shown.

674
tests/extensions/mmx.c Normal file
View File

@ -0,0 +1,674 @@
#include<stdint.h>
#include<stdio.h>
#include<stdbool.h>
#include<limits.h>
#include<immintrin.h>
#include<cpuid.h>
typedef uint8_t u8;
typedef int8_t i8;
typedef uint16_t u16;
typedef int16_t i16;
typedef uint32_t u32;
typedef int32_t i32;
typedef uint64_t u64;
typedef int64_t i64;
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define I8_MAX 0x7F
#define I8_MIN -0x80
#define U8_MAX 0xFF
#define U8_MIN 0
#define I16_MAX 0x7FFF
#define I16_MIN -0x8000
#define U16_MAX 0xFFFF
#define U16_MIN 0
#define I32_MAX 0x7FFFFFFF
#define I32_MIN -0x80000000
#define U32_MAX 0xFFFFFFFF
#define U32_MIN 0
#define I64_MAX 0x7FFFFFFFFFFFFFFF
#define I64_MIN -0x8000000000000000
#define U64_MAX 0xFFFFFFFFFFFFFFFF
#define U64_MIN 0
#define MMX_TEST_STRUCT(sz) \
typedef struct mmx_##sz##_test { \
sz a; \
sz b; \
sz result; \
} mmx_##sz##_test_t
MMX_TEST_STRUCT(u8);
MMX_TEST_STRUCT(i8);
MMX_TEST_STRUCT(u16);
MMX_TEST_STRUCT(i16);
MMX_TEST_STRUCT(u32);
MMX_TEST_STRUCT(i32);
MMX_TEST_STRUCT(u64);
MMX_TEST_STRUCT(i64);
// Binary compare two mm registers
bool mm_raw_compare(__m64 a, __m64 b) {
__m64 a_upper_reg = _mm_srli_si64(a, 32);
__m64 b_upper_reg = _mm_srli_si64(b, 32);
int a_lower = _m_to_int(a);
int a_upper = _m_to_int(a_upper_reg);
int b_lower = _m_to_int(b);
int b_upper = _m_to_int(b_upper_reg);
return (a_lower == b_lower) && (a_upper == b_upper);
}
// Load a 64 bit value into a mm register
__m64 mm_load64(u64 val) {
__m64 lower = _m_from_int(val & 0xFFFFFFFF);
__m64 upper = _m_from_int((val >> 32) & 0xFFFFFFFF);
__m64 shifted = _mm_slli_si64(upper, 32);
__m64 final = _m_por(shifted, lower);
return final;
}
#define MMX_ARITH_TEST(name, testcases, testcase_type, type, size, testfunc) \
bool name() { \
printf("TEST: " #name "\n"); \
int errors = 0; \
\
for (size_t i = 0; i < ARRAY_SIZE(testcases); i++ ) { \
testcase_type test_data = testcases[i]; \
\
__m64 a = _mm_set1_pi##size(test_data.a); \
__m64 b = _mm_set1_pi##size(test_data.b); \
__m64 expected = _mm_set1_pi##size(test_data.result); \
__m64 result = testfunc(a, b); \
\
bool success = mm_raw_compare(expected, result); \
errors += (int) (!success); \
} \
\
_m_empty(); \
printf("TEST: finished with: %d errors\n", errors); \
return errors; \
}
#define MMX_SHIFT_TEST(name, testcases, testfunc) \
bool name() { \
printf("TEST: " #name "\n"); \
int errors = 0; \
\
for (size_t i = 0; i < ARRAY_SIZE(testcases); i++ ) { \
mmx_u64_test_t test_data = testcases[i]; \
\
__m64 a = mm_load64(test_data.a); \
__m64 expected = mm_load64(test_data.result); \
__m64 result = testfunc(a, test_data.b); \
\
bool success = mm_raw_compare(expected, result); \
if (!success) { \
printf( \
"Failed; Expected: 0x%08x_%08x\tGot: 0x%08x_%08x\n", \
_m_to_int(_mm_srli_si64(expected, 32)), \
_m_to_int(expected), \
_m_to_int(_mm_srli_si64(result, 32)), \
_m_to_int(result) \
); \
} \
errors += (int) (!success); \
} \
\
_m_empty(); \
printf("TEST: finished with: %d errors\n", errors); \
return errors; \
}
// Loads 2 64 bit immediates and compares with the third
// Test data must be of type mmx_u64_test_t
#define MMX_64_TEST(name, testcases, testfunc) \
bool name() { \
printf("TEST: " #name "\n"); \
int errors = 0; \
\
for (size_t i = 0; i < ARRAY_SIZE(testcases); i++ ) { \
mmx_u64_test_t test_data = testcases[i]; \
\
__m64 a = mm_load64(test_data.a); \
__m64 b = mm_load64(test_data.b); \
__m64 expected = mm_load64(test_data.result); \
__m64 result = testfunc(a, b); \
\
bool success = mm_raw_compare(expected, result); \
if (!success) { \
printf( \
"Failed; Expected: 0x%08x_%08x\tGot: 0x%08x_%08x\n", \
_m_to_int(_mm_srli_si64(expected, 32)), \
_m_to_int(expected), \
_m_to_int(_mm_srli_si64(result, 32)), \
_m_to_int(result) \
); \
} \
errors += (int) (!success); \
} \
\
_m_empty(); \
printf("TEST: finished with: %d errors\n", errors); \
return errors; \
}
mmx_i8_test_t mmx_i8_add_test_data[] = {
{ .a = 1, .b = 2, .result = 3 },
{ .a = 0, .b = 1, .result = 1 },
{ .a = I8_MAX, .b = 1, .result = I8_MIN },
{ .a = I8_MIN, .b = -1, .result = I8_MAX },
{ .a = 0, .b = U8_MAX, .result = U8_MAX },
};
mmx_i8_test_t mmx_i8_add_sat_test_data[] = {
{ .a = 1, .b = 2, .result = 3 },
{ .a = 0, .b = 1, .result = 1 },
{ .a = I8_MAX, .b = 1, .result = I8_MAX },
{ .a = I8_MIN, .b = -1, .result = I8_MIN },
};
mmx_u8_test_t mmx_u8_add_sat_test_data[] = {
{ .a = 1, .b = 2, .result = 3 },
{ .a = 0, .b = 1, .result = 1 },
{ .a = U8_MAX, .b = 1, .result = U8_MAX },
{ .a = 0, .b = U8_MAX, .result = U8_MAX },
};
mmx_i16_test_t mmx_i16_add_test_data[] = {
{ .a = 1, .b = 2, .result = 3 },
{ .a = 0, .b = 1, .result = 1 },
{ .a = I16_MAX, .b = 1, .result = I16_MIN },
{ .a = I16_MIN, .b = -1, .result = I16_MAX },
};
mmx_i16_test_t mmx_i16_add_sat_test_data[] = {
{ .a = 1, .b = 2, .result = 3 },
{ .a = 0, .b = 1, .result = 1 },
{ .a = I16_MAX, .b = 1, .result = I16_MAX },
{ .a = I16_MIN, .b = -1, .result = I16_MIN },
};
mmx_u16_test_t mmx_u16_add_sat_test_data[] = {
{ .a = 1, .b = 2, .result = 3 },
{ .a = 0, .b = 1, .result = 1 },
{ .a = U16_MAX, .b = 1, .result = U16_MAX },
{ .a = 0, .b = U16_MAX, .result = U16_MAX },
};
mmx_i32_test_t mmx_i32_add_test_data[] = {
{ .a = 1, .b = 2, .result = 3 },
{ .a = 0, .b = 1, .result = 1 },
{ .a = I32_MAX, .b = 1, .result = I32_MIN },
{ .a = I32_MIN, .b = -1, .result = I32_MAX },
};
MMX_ARITH_TEST(test_mmx_paddb, mmx_i8_add_test_data, mmx_i8_test_t, i8, 8, _m_paddb);
MMX_ARITH_TEST(test_mmx_paddsb, mmx_i8_add_sat_test_data, mmx_i8_test_t, i8, 8, _m_paddsb);
MMX_ARITH_TEST(test_mmx_paddusb, mmx_u8_add_sat_test_data, mmx_u8_test_t, u8, 8, _m_paddusb);
MMX_ARITH_TEST(test_mmx_paddw, mmx_i16_add_test_data, mmx_i16_test_t, i16, 16, _m_paddw);
MMX_ARITH_TEST(test_mmx_paddsw, mmx_i16_add_sat_test_data, mmx_i16_test_t, i16, 16, _m_paddsw);
MMX_ARITH_TEST(test_mmx_paddusw, mmx_u16_add_sat_test_data, mmx_u16_test_t, u16, 16, _m_paddusw);
MMX_ARITH_TEST(test_mmx_paddd, mmx_i32_add_test_data, mmx_i32_test_t, i32, 32, _m_paddd);
mmx_i8_test_t mmx_i8_sub_test_data[] = {
{ .a = 3, .b = 2, .result = 1 },
{ .a = 1, .b = 1, .result = 0 },
{ .a = I8_MIN, .b = 1, .result = I8_MAX },
{ .a = I8_MAX, .b = -1, .result = I8_MIN },
{ .a = U8_MAX, .b = U8_MAX, .result = 0 },
};
mmx_i8_test_t mmx_i8_sub_sat_test_data[] = {
{ .a = 3, .b = 2, .result = 1 },
{ .a = 1, .b = 1, .result = 0 },
{ .a = I8_MIN, .b = 1, .result = I8_MIN },
{ .a = I8_MAX, .b = -1, .result = I8_MAX },
};
mmx_u8_test_t mmx_u8_sub_sat_test_data[] = {
{ .a = 3, .b = 2, .result = 1 },
{ .a = 1, .b = 1, .result = 0 },
{ .a = U8_MIN, .b = 1, .result = U8_MIN },
{ .a = U8_MAX, .b = U8_MAX, .result = 0 },
};
mmx_i16_test_t mmx_i16_sub_test_data[] = {
{ .a = 3, .b = 2, .result = 1 },
{ .a = 1, .b = 1, .result = 0 },
{ .a = I16_MIN, .b = 1, .result = I16_MAX },
{ .a = I16_MAX, .b = -1, .result = I16_MIN },
};
mmx_i16_test_t mmx_i16_sub_sat_test_data[] = {
{ .a = 3, .b = 2, .result = 1 },
{ .a = 1, .b = 1, .result = 0 },
{ .a = I16_MIN, .b = 1, .result = I16_MIN },
{ .a = I16_MAX, .b = -1, .result = I16_MAX },
};
mmx_u16_test_t mmx_u16_sub_sat_test_data[] = {
{ .a = 3, .b = 2, .result = 1 },
{ .a = 1, .b = 1, .result = 0 },
{ .a = U16_MIN, .b = 1, .result = U16_MIN },
{ .a = U16_MIN, .b = U16_MIN, .result = 0 },
};
mmx_i32_test_t mmx_i32_sub_test_data[] = {
{ .a = 3, .b = 2, .result = 1 },
{ .a = 1, .b = 1, .result = 0 },
{ .a = I32_MIN, .b = 1, .result = I32_MAX },
{ .a = I32_MAX, .b = -1, .result = I32_MIN },
};
MMX_ARITH_TEST(test_mmx_psubb, mmx_i8_sub_test_data, mmx_i8_test_t, i8, 8, _m_psubb);
MMX_ARITH_TEST(test_mmx_psubsb, mmx_i8_sub_sat_test_data, mmx_i8_test_t, i8, 8, _m_psubsb);
MMX_ARITH_TEST(test_mmx_psubusb, mmx_u8_sub_sat_test_data, mmx_u8_test_t, u8, 8, _m_psubusb);
MMX_ARITH_TEST(test_mmx_psubw, mmx_i16_sub_test_data, mmx_i16_test_t, i16, 16, _m_psubw);
MMX_ARITH_TEST(test_mmx_psubuw, mmx_i16_sub_sat_test_data, mmx_i16_test_t, i16, 16, _m_psubsw);
MMX_ARITH_TEST(test_mmx_psubusw, mmx_u16_sub_sat_test_data, mmx_u16_test_t, u16, 16, _m_psubusw);
MMX_ARITH_TEST(test_mmx_psubd, mmx_i32_sub_test_data, mmx_i32_test_t, i32, 32, _m_psubd);
mmx_u64_test_t mmx_por_test_data[] = {
{ .a = 0xAAAAAAAAAAAAAAAA,
.b = 0x5555555555555555,
.result = 0xFFFFFFFFFFFFFFFF },
{ .a = 0x0000000000000000,
.b = 0x1111111111111111,
.result = 0x1111111111111111 },
};
mmx_u64_test_t mmx_pand_test_data[] = {
{ .a = 0xAAAAAAAAAAAAAAAA,
.b = 0x5555555555555555,
.result = 0x0000000000000000 },
{ .a = 0xFFFFFFFFFFFFFFFF,
.b = 0xFFFFFFFFFFFFFFFF,
.result = 0xFFFFFFFFFFFFFFFF },
};
mmx_u64_test_t mmx_pandn_test_data[] = {
{ .a = 0x0000000000000000,
.b = 0xFFFFFFFFFFFFFFFF,
.result = 0xFFFFFFFFFFFFFFFF },
{ .a = 0xFFFFFFFFFFFFFFFF,
.b = 0x0000000000000000,
.result = 0x0000000000000000 },
};
mmx_u64_test_t mmx_pxor_test_data[] = {
{ .a = 0xAAAAAAAAAAAAAAAA,
.b = 0x5555555555555555,
.result = 0xFFFFFFFFFFFFFFFF },
{ .a = 0xFFFFFFFFFFFFFFFF,
.b = 0xFFFFFFFFFFFFFFFF,
.result = 0x0000000000000000 },
};
MMX_64_TEST(test_mmx_por, mmx_por_test_data, _m_por);
MMX_64_TEST(test_mmx_pand, mmx_pand_test_data, _m_pand);
MMX_64_TEST(test_mmx_pandn, mmx_pandn_test_data, _m_pandn);
MMX_64_TEST(test_mmx_pxor, mmx_pxor_test_data, _m_pxor);
mmx_i16_test_t mmx_pmullw_test_data[] = {
{ .a = 10, .b = 10, .result = 100 },
{ .a = 32000, .b = 10, .result = 0xE200 },
{ .a = 20000, .b = 20000, .result = 0x8400 },
};
mmx_i16_test_t mmx_pmulhw_test_data[] = {
{ .a = 10, .b = 10, .result = 0 },
{ .a = 32000, .b = 10, .result = 4 },
{ .a = 20000, .b = 20000, .result = 0x17D7 },
};
mmx_u64_test_t mmx_pmaddwd_test_data[] = {
{ .a = 0x0000000100000001,
.b = 0x0000000100000001,
.result = 0x0000000100000001 },
{ .a = 0x0000000200000004,
.b = 0x0000000200000004,
.result = 0x0000000400000010 },
{ .a = 0x000000007FFFFFFF,
.b = 0x000000007FFFFFFF,
.result = 0x000000003FFF0002 },
// -1 * -1 = 2
{ .a = 0x00000000FFFFFFFF,
.b = 0x00000000FFFFFFFF,
.result = 0x0000000000000002 },
};
MMX_ARITH_TEST(test_mmx_pmullw, mmx_pmullw_test_data, mmx_i16_test_t, i16, 16, _m_pmullw);
MMX_ARITH_TEST(test_mmx_pmulhw, mmx_pmulhw_test_data, mmx_i16_test_t, i16, 16, _m_pmulhw);
MMX_64_TEST(test_mmx_pmaddwd, mmx_pmaddwd_test_data, _m_pmaddwd);
mmx_u64_test_t mmx_packssdw_test_data[] = {
{ .a = 0x0000000200000001,
.b = 0x0000000400000003,
.result = 0x0004000300020001 },
{ .a = 0x7FFFFFFF7FFFFFFF,
.b = 0x7FFFFFFF7FFFFFFF,
.result = 0x7FFF7FFF7FFF7FFF },
{ .a = 0x8000000080000000,
.b = 0x8000000080000000,
.result = 0x8000800080008000 },
};
mmx_u64_test_t mmx_packsswb_test_data[] = {
{ .a = 0x0004000300020001,
.b = 0x0008000700060005,
.result = 0x0807060504030201 },
{ .a = 0x7FFF7FFF7FFF7FFF,
.b = 0x7FFF7FFF7FFF7FFF,
.result = 0x7F7F7F7F7F7F7F7F },
{ .a = 0x8000800080008000,
.b = 0x8000800080008000,
.result = 0x8080808080808080 },
};
mmx_u64_test_t mmx_packuswb_test_data[] = {
{ .a = 0x0004000300020001,
.b = 0x0008000700060005,
.result = 0x0807060504030201 },
{ .a = 0x7FFF7FFF7FFF7FFF,
.b = 0x7FFF7FFF7FFF7FFF,
.result = 0xFFFFFFFFFFFFFFFF },
{ .a = 0x8000800080008000,
.b = 0x8000800080008000,
.result = 0x0000000000000000 },
};
MMX_64_TEST(test_mmx_packssdw, mmx_packssdw_test_data, _m_packssdw);
MMX_64_TEST(test_mmx_packsswb, mmx_packsswb_test_data, _m_packsswb);
MMX_64_TEST(test_mmx_packuswb, mmx_packuswb_test_data, _m_packuswb);
mmx_u64_test_t mmx_punpckhbw_test_data[] = {
{ .a = 0x4433221100000000,
.b = 0x8877665500000000,
.result = 0x8844773366225511 },
};
mmx_u64_test_t mmx_punpckhdq_test_data[] = {
{ .a = 0xAAAAAAAA00000000,
.b = 0xBBBBBBBB00000000,
.result = 0xBBBBBBBBAAAAAAAA },
};
mmx_u64_test_t mmx_punpckhwd_test_data[] = {
{ .a = 0xBBBBAAAA00000000,
.b = 0xDDDDCCCC00000000,
.result = 0xDDDDBBBBCCCCAAAA },
};
mmx_u64_test_t mmx_punpcklbw_test_data[] = {
{ .a = 0x0000000044332211,
.b = 0x0000000088776655,
.result = 0x8844773366225511 },
};
mmx_u64_test_t mmx_punpckldq_test_data[] = {
{ .a = 0x00000000AAAAAAAA,
.b = 0x00000000BBBBBBBB,
.result = 0xBBBBBBBBAAAAAAAA },
};
mmx_u64_test_t mmx_punpcklwd_test_data[] = {
{ .a = 0x00000000BBBBAAAA,
.b = 0x00000000DDDDCCCC,
.result = 0xDDDDBBBBCCCCAAAA },
};
MMX_64_TEST(test_mmx_punpckhbw, mmx_punpckhbw_test_data, _m_punpckhbw);
MMX_64_TEST(test_mmx_punpckhdq, mmx_punpckhdq_test_data, _m_punpckhdq);
MMX_64_TEST(test_mmx_punpckhwd, mmx_punpckhwd_test_data, _m_punpckhwd);
MMX_64_TEST(test_mmx_punpcklbw, mmx_punpcklbw_test_data, _m_punpcklbw);
MMX_64_TEST(test_mmx_punpckldq, mmx_punpckldq_test_data, _m_punpckldq);
MMX_64_TEST(test_mmx_punpcklwd, mmx_punpcklwd_test_data, _m_punpcklwd);
mmx_u64_test_t mmx_pcmpeqb_test_data[] = {
{ .a = 0x8877665544332211,
.b = 0x0077005500330011,
.result = 0x00FF00FF00FF00FF },
};
mmx_u64_test_t mmx_pcmpeqw_test_data[] = {
{ .a = 0x4444333322221111,
.b = 0x0000333300001111,
.result = 0x0000FFFF0000FFFF },
};
mmx_u64_test_t mmx_pcmpeqd_test_data[] = {
{ .a = 0x2222222211111111,
.b = 0x2222222200000000,
.result = 0xFFFFFFFF00000000 },
};
mmx_u64_test_t mmx_pcmpgtb_test_data[] = {
{ .a = 0x0000000000002201,
.b = 0x0000000000002300,
.result = 0x00000000000000FF },
};
mmx_u64_test_t mmx_pcmpgtw_test_data[] = {
{ .a = 0x4444333322221111,
.b = 0x0000333300001112,
.result = 0xFFFF0000FFFF0000 },
};
mmx_u64_test_t mmx_pcmpgtd_test_data[] = {
{ .a = 0x2222222111111111,
.b = 0x2222222200000000,
.result = 0x00000000FFFFFFFF },
};
MMX_64_TEST(test_mmx_pcmpeqb, mmx_pcmpeqb_test_data, _m_pcmpeqb);
MMX_64_TEST(test_mmx_pcmpeqw, mmx_pcmpeqw_test_data, _m_pcmpeqw);
MMX_64_TEST(test_mmx_pcmpeqd, mmx_pcmpeqd_test_data, _m_pcmpeqd);
MMX_64_TEST(test_mmx_pcmpgtb, mmx_pcmpgtb_test_data, _m_pcmpgtb);
MMX_64_TEST(test_mmx_pcmpgtw, mmx_pcmpgtw_test_data, _m_pcmpgtw);
MMX_64_TEST(test_mmx_pcmpgtd, mmx_pcmpgtd_test_data, _m_pcmpgtd);
mmx_u64_test_t mmx_pslld_test_data[] = {
{ .a = 1, .b = 1, .result = 2 },
{ .a = 16, .b = 1, .result = 32 },
{ .a = 16, .b = 32, .result = 0 },
{ .a = 16, .b = 0, .result = 16 },
};
mmx_u64_test_t mmx_psllq_test_data[] = {
{ .a = 1, .b = 1, .result = 2 },
{ .a = 16, .b = 1, .result = 32 },
{ .a = 16, .b = 64, .result = 0 },
{ .a = 16, .b = 0, .result = 16 },
};
mmx_u64_test_t mmx_psllw_test_data[] = {
{ .a = 1, .b = 1, .result = 2 },
{ .a = 16, .b = 1, .result = 32 },
{ .a = 16, .b = 16, .result = 0 },
{ .a = 16, .b = 0, .result = 16 },
};
mmx_u64_test_t mmx_psrad_test_data[] = {
{ .a = 1, .b = 1, .result = 0 },
{ .a = 16, .b = 1, .result = 8 },
{ .a = 16, .b = 0, .result = 16 },
{ .a = 0x7FFFFFFF, .b = 1, .result = 0x3FFFFFFF },
{ .a = I32_MAX, .b = 32, .result = 0 },
{ .a = I32_MIN, .b = 32, .result = U32_MAX },
};
mmx_u64_test_t mmx_psraw_test_data[] = {
{ .a = 1, .b = 1, .result = 0 },
{ .a = 16, .b = 1, .result = 8 },
{ .a = 16, .b = 0, .result = 16 },
{ .a = 0x7FFF, .b = 1, .result = 0x3FFF },
{ .a = I16_MAX, .b = 16, .result = 0 },
{ .a = U16_MAX, .b = 16, .result = U16_MAX },
};
mmx_u64_test_t mmx_psrld_test_data[] = {
{ .a = 1, .b = 1, .result = 0 },
{ .a = 16, .b = 1, .result = 8 },
{ .a = 16, .b = 0, .result = 16 },
{ .a = 0x7FFFFFFF, .b = 1, .result = 0x3FFFFFFF },
{ .a = I32_MAX, .b = 32, .result = 0 },
{ .a = I32_MIN, .b = 32, .result = 0 },
};
mmx_u64_test_t mmx_psrlq_test_data[] = {
{ .a = 1, .b = 1, .result = 0 },
{ .a = 16, .b = 1, .result = 8 },
{ .a = 16, .b = 0, .result = 16 },
{ .a = I64_MAX, .b = 64, .result = 0 },
{ .a = I64_MIN, .b = 64, .result = 0 },
};
mmx_u64_test_t mmx_psrlw_test_data[] = {
{ .a = 1, .b = 1, .result = 0 },
{ .a = 16, .b = 1, .result = 8 },
{ .a = 16, .b = 0, .result = 16 },
{ .a = I16_MAX, .b = 16, .result = 0 },
// TODO: Works on my machine
// { .a = I16_MIN, .b = 16, .result = 0 },
};
MMX_64_TEST(test_mmx_pslld, mmx_pslld_test_data, _m_pslld);
MMX_64_TEST(test_mmx_psllq, mmx_psllq_test_data, _m_psllq);
MMX_64_TEST(test_mmx_psllw, mmx_psllw_test_data, _m_psllw);
MMX_64_TEST(test_mmx_psrad, mmx_psrad_test_data, _m_psrad);
MMX_64_TEST(test_mmx_psraw, mmx_psraw_test_data, _m_psraw);
MMX_64_TEST(test_mmx_psrld, mmx_psrld_test_data, _m_psrld);
MMX_64_TEST(test_mmx_psrlq, mmx_psrlq_test_data, _m_psrlq);
MMX_64_TEST(test_mmx_psrlw, mmx_psrlw_test_data, _m_psrlw);
MMX_SHIFT_TEST(test_mmx_pslldi, mmx_pslld_test_data, _m_pslldi);
MMX_SHIFT_TEST(test_mmx_psllqi, mmx_psllq_test_data, _m_psllqi);
MMX_SHIFT_TEST(test_mmx_psllwi, mmx_psllw_test_data, _m_psllwi);
MMX_SHIFT_TEST(test_mmx_psradi, mmx_psrad_test_data, _m_psradi);
MMX_SHIFT_TEST(test_mmx_psrawi, mmx_psraw_test_data, _m_psrawi);
MMX_SHIFT_TEST(test_mmx_psrldi, mmx_psrld_test_data, _m_psrldi);
MMX_SHIFT_TEST(test_mmx_psrlqi, mmx_psrlq_test_data, _m_psrlqi);
MMX_SHIFT_TEST(test_mmx_psrlwi, mmx_psrlw_test_data, _m_psrlwi);
bool test_mmx_cpuid() {
printf("TEST: test_mmx_cpuid\n");
unsigned int eax, ebx, ecx, edx;
asm volatile(
"cpuid"
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
: "a" (1), "c" (0)
);
int has_mmx = !!(edx & (1 << 23));
if (has_mmx) {
return 0;
}
return 1;
}
int main() {
int errors = 0;
errors += (int) test_mmx_cpuid();
errors += (int) test_mmx_paddb();
errors += (int) test_mmx_paddsb();
errors += (int) test_mmx_paddusb();
errors += (int) test_mmx_paddw();
errors += (int) test_mmx_paddsw();
errors += (int) test_mmx_paddusw();
errors += (int) test_mmx_paddd();
errors += (int) test_mmx_psubb();
errors += (int) test_mmx_psubsb();
errors += (int) test_mmx_psubusb();
errors += (int) test_mmx_psubw();
errors += (int) test_mmx_psubuw();
errors += (int) test_mmx_psubusw();
errors += (int) test_mmx_psubd();
errors += (int) test_mmx_por();
errors += (int) test_mmx_pand();
errors += (int) test_mmx_pandn();
errors += (int) test_mmx_pxor();
errors += (int) test_mmx_pmullw();
errors += (int) test_mmx_pmulhw();
errors += (int) test_mmx_pmaddwd();
errors += (int) test_mmx_packssdw();
errors += (int) test_mmx_packsswb();
errors += (int) test_mmx_packuswb();
errors += (int) test_mmx_punpckhbw();
errors += (int) test_mmx_punpckhdq();
errors += (int) test_mmx_punpckhwd();
errors += (int) test_mmx_punpcklbw();
errors += (int) test_mmx_punpckldq();
errors += (int) test_mmx_punpcklwd();
errors += (int) test_mmx_pcmpeqb();
errors += (int) test_mmx_pcmpeqw();
errors += (int) test_mmx_pcmpeqd();
errors += (int) test_mmx_pcmpgtb();
errors += (int) test_mmx_pcmpgtw();
errors += (int) test_mmx_pcmpgtd();
errors += (int) test_mmx_psllw();
errors += (int) test_mmx_psllwi();
errors += (int) test_mmx_pslld();
errors += (int) test_mmx_pslldi();
errors += (int) test_mmx_psllq();
errors += (int) test_mmx_psllqi();
errors += (int) test_mmx_psraw();
errors += (int) test_mmx_psrawi();
errors += (int) test_mmx_psrad();
errors += (int) test_mmx_psradi();
errors += (int) test_mmx_psrld();
errors += (int) test_mmx_psrldi();
errors += (int) test_mmx_psrlq();
errors += (int) test_mmx_psrlqi();
errors += (int) test_mmx_psrlw();
errors += (int) test_mmx_psrlwi();
printf("Errors: %d\n", errors);
return errors;
}

106
tests/extensions/mmx.txt Normal file
View File

@ -0,0 +1,106 @@
TEST: test_mmx_cpuid
TEST: test_mmx_paddb
TEST: finished with: 0 errors
TEST: test_mmx_paddsb
TEST: finished with: 0 errors
TEST: test_mmx_paddusb
TEST: finished with: 0 errors
TEST: test_mmx_paddw
TEST: finished with: 0 errors
TEST: test_mmx_paddsw
TEST: finished with: 0 errors
TEST: test_mmx_paddusw
TEST: finished with: 0 errors
TEST: test_mmx_paddd
TEST: finished with: 0 errors
TEST: test_mmx_psubb
TEST: finished with: 0 errors
TEST: test_mmx_psubsb
TEST: finished with: 0 errors
TEST: test_mmx_psubusb
TEST: finished with: 0 errors
TEST: test_mmx_psubw
TEST: finished with: 0 errors
TEST: test_mmx_psubuw
TEST: finished with: 0 errors
TEST: test_mmx_psubusw
TEST: finished with: 0 errors
TEST: test_mmx_psubd
TEST: finished with: 0 errors
TEST: test_mmx_por
TEST: finished with: 0 errors
TEST: test_mmx_pand
TEST: finished with: 0 errors
TEST: test_mmx_pandn
TEST: finished with: 0 errors
TEST: test_mmx_pxor
TEST: finished with: 0 errors
TEST: test_mmx_pmullw
TEST: finished with: 0 errors
TEST: test_mmx_pmulhw
TEST: finished with: 0 errors
TEST: test_mmx_pmaddwd
TEST: finished with: 0 errors
TEST: test_mmx_packssdw
TEST: finished with: 0 errors
TEST: test_mmx_packsswb
TEST: finished with: 0 errors
TEST: test_mmx_packuswb
TEST: finished with: 0 errors
TEST: test_mmx_punpckhbw
TEST: finished with: 0 errors
TEST: test_mmx_punpckhdq
TEST: finished with: 0 errors
TEST: test_mmx_punpckhwd
TEST: finished with: 0 errors
TEST: test_mmx_punpcklbw
TEST: finished with: 0 errors
TEST: test_mmx_punpckldq
TEST: finished with: 0 errors
TEST: test_mmx_punpcklwd
TEST: finished with: 0 errors
TEST: test_mmx_pcmpeqb
TEST: finished with: 0 errors
TEST: test_mmx_pcmpeqw
TEST: finished with: 0 errors
TEST: test_mmx_pcmpeqd
TEST: finished with: 0 errors
TEST: test_mmx_pcmpgtb
TEST: finished with: 0 errors
TEST: test_mmx_pcmpgtw
TEST: finished with: 0 errors
TEST: test_mmx_pcmpgtd
TEST: finished with: 0 errors
TEST: test_mmx_psllw
TEST: finished with: 0 errors
TEST: test_mmx_psllwi
TEST: finished with: 0 errors
TEST: test_mmx_pslld
TEST: finished with: 0 errors
TEST: test_mmx_pslldi
TEST: finished with: 0 errors
TEST: test_mmx_psllq
TEST: finished with: 0 errors
TEST: test_mmx_psllqi
TEST: finished with: 0 errors
TEST: test_mmx_psraw
TEST: finished with: 0 errors
TEST: test_mmx_psrawi
TEST: finished with: 0 errors
TEST: test_mmx_psrad
TEST: finished with: 0 errors
TEST: test_mmx_psradi
TEST: finished with: 0 errors
TEST: test_mmx_psrld
TEST: finished with: 0 errors
TEST: test_mmx_psrldi
TEST: finished with: 0 errors
TEST: test_mmx_psrlq
TEST: finished with: 0 errors
TEST: test_mmx_psrlqi
TEST: finished with: 0 errors
TEST: test_mmx_psrlw
TEST: finished with: 0 errors
TEST: test_mmx_psrlwi
TEST: finished with: 0 errors
Errors: 0

1
tests/ref01.txt Normal file
View File

@ -0,0 +1 @@
Hello x86_64 World!

1
tests/ref02.txt Normal file
View File

@ -0,0 +1 @@
Hello x86_64 World!

1
tests/ref03.txt Normal file
View File

@ -0,0 +1 @@
Hello x86_64 World!

1
tests/ref04.txt Normal file
View File

@ -0,0 +1 @@
Hello, argc=2 argv[1]=yeah

6
tests/ref05.txt Normal file
View File

@ -0,0 +1,6 @@
fact(7)=5040
Prime list 0..5040: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97 101 103 107 109 113 127 131 137 139 149 151 157 163 167 173 179 181 191 193 197 199 211 223 227 229 233 239 241 251 257 263 269 271 277 281 283 293 307 311 313 317 331 337 347 349 353 359 367 373 379 383 389 397 401 409 419 421 431 433 439 443 449 457 461 463 467 479 487 491 499 503 509 521 523 541 547 557 563 569 571 577 587 593 599 601 607 613 617 619 631 641 643 647 653 659 661 673 677 683 691 701 709 719 727 733 739 743 751 757 761 769 773 787 797 809 811 821 823 827 829 839 853 857 859 863 877 881 883 887 907 911 919 929 937 941 947 953 967 971 977 983 991 997 1009 1013 1019 1021 1031 1033 1039 1049 1051 1061 1063 1069 1087 1091 1093 1097 1103 1109 1117 1123 1129 1151 1153 1163 1171 1181 1187 1193 1201 1213 1217 1223 1229 1231 1237 1249 1259 1277 1279 1283 1289 1291 1297 1301 1303 1307 1319 1321 1327 1361 1367 1373 1381 1399 1409 1423 1427 1429 1433 1439 1447 1451 1453 1459 1471 1481 1483 1487 1489 1493 1499 1511 1523 1531 1543 1549 1553 1559 1567 1571 1579 1583 1597 1601 1607 1609 1613 1619 1621 1627 1637 1657 1663 1667 1669 1693 1697 1699 1709 1721 1723 1733 1741 1747 1753 1759 1777 1783 1787 1789 1801 1811 1823 1831 1847 1861 1867 1871 1873 1877 1879 1889 1901 1907 1913 1931 1933 1949 1951 1973 1979 1987 1993 1997 1999 2003 2011 2017 2027 2029 2039 2053 2063 2069 2081 2083 2087 2089 2099 2111 2113 2129 2131 2137 2141 2143 2153 2161 2179 2203 2207 2213 2221 2237 2239 2243 2251 2267 2269 2273 2281 2287 2293 2297 2309 2311 2333 2339 2341 2347 2351 2357 2371 2377 2381 2383 2389 2393 2399 2411 2417 2423 2437 2441 2447 2459 2467 2473 2477 2503 2521 2531 2539 2543 2549 2551 2557 2579 2591 2593 2609 2617 2621 2633 2647 2657 2659 2663 2671 2677 2683 2687 2689 2693 2699 2707 2711 2713 2719 2729 2731 2741 2749 2753 2767 2777 2789 2791 2797 2801 2803 2819 2833 2837 2843 2851 2857 2861 2879 2887 2897 2903 2909 2917 2927 2939 2953 2957 2963 2969 2971 2999 3001 3011 3019 3023 3037 3041 3049 3061 3067 3079 3083 3089 3109 3119 3121 3137 3163 3167 3169 3181 3187 3191 3203 3209 3217 3221 3229 3251 3253 3257 3259 3271 3299 3301 3307 3313 3319 3323 3329 3331 3343 3347 3359 3361 3371 3373 3389 3391 3407 3413 3433 3449 3457 3461 3463 3467 3469 3491 3499 3511 3517 3527 3529 3533 3539 3541 3547 3557 3559 3571 3581 3583 3593 3607 3613 3617 3623 3631 3637 3643 3659 3671 3673 3677 3691 3697 3701 3709 3719 3727 3733 3739 3761 3767 3769 3779 3793 3797 3803 3821 3823 3833 3847 3851 3853 3863 3877 3881 3889 3907 3911 3917 3919 3923 3929 3931 3943 3947 3967 3989 4001 4003 4007 4013 4019 4021 4027 4049 4051 4057 4073 4079 4091 4093 4099 4111 4127 4129 4133 4139 4153 4157 4159 4177 4201 4211 4217 4219 4229 4231 4241 4243 4253 4259 4261 4271 4273 4283 4289 4297 4327 4337 4339 4349 4357 4363 4373 4391 4397 4409 4421 4423 4441 4447 4451 4457 4463 4481 4483 4493 4507 4513 4517 4519 4523 4547 4549 4561 4567 4583 4591 4597 4603 4621 4637 4639 4643 4649 4651 4657 4663 4673 4679 4691 4703 4721 4723 4729 4733 4751 4759 4783 4787 4789 4793 4799 4801 4813 4817 4831 4861 4871 4877 4889 4903 4909 4919 4931 4933 4937 4943 4951 4957 4967 4969 4973 4987 4993 4999 5003 5009 5011 5021 5023 5039
(un)signed char = -5/83 (un)signed int = -53/65500 total=65525
65500/5=13100, 65500%5=0
65525/5=13105, 65525%5=0
65525/-53=-1236 + 17

4
tests/ref06.txt Normal file
View File

@ -0,0 +1,4 @@
[02] Second thread executing
[02] Thread done.
[00] Done.

1
tests/ref07.txt Normal file
View File

@ -0,0 +1 @@
0 is 0.000000, sin(pi/2) is 1.000000 and 3*1.5 is 4.500000.

1
tests/ref08.txt Normal file
View File

@ -0,0 +1 @@
31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756482337867831652712019091456485669234603486104543266482133936072602491412737245870066063155881748815209209628292540917153643678925903600113305305488204665213841469519415116094330572703657595919530921861173819326117931051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798609437027705392171762931767523846748184676694051320005681271452635608277857713427577896091736371787214684409012249534301465495853710507922796892589235420199561121290219608640344181598136297747713099605187072113499999983729780499510597317328160963185

2
tests/ref09.txt Normal file
View File

@ -0,0 +1,2 @@
Child has x = 2
Parent has x = 0

10
tests/ref10.txt Normal file
View File

@ -0,0 +1,10 @@
thread #0
thread #1
thread #2
thread #3
thread #4
thread #5
thread #6
thread #7
thread #8
thread #9

8
tests/ref11.txt Normal file
View File

@ -0,0 +1,8 @@
Create/start 2 threads
Thread 1: Entered (10/20)
Thread 1: foo(), TLS data=0 2 "-1-"
Thread 2: Entered (10/20)
Thread 2: foo(), TLS data=1 4 "-2-"
Thread 2: bar(), TLS data=1 4 "-2-"
Thread 1: bar(), TLS data=0 2 "-1-"
Main completed

4
tests/ref12.txt Normal file
View File

@ -0,0 +1,4 @@
1000000000000 => 1000000000000.000000
-1000000000000 => -1000000000000.000000
(angle_t)268435456.000000 = 268435456 == 0x10000000
go PI trucated=3, -PI rounded=-3

8
tests/ref13.txt Normal file
View File

@ -0,0 +1,8 @@
main: swapcontext(&uctx_main, &uctx_func2)
func2: started (1, 2)
func2: swapcontext(&uctx_func2, &uctx_func1)
func1: started
func1: swapcontext(&uctx_func1, &uctx_func2)
func2: returning
func1: returning
main: exiting

5
tests/ref14.txt Normal file
View File

@ -0,0 +1,5 @@
Thread: thread_state = 1.
Main thread: thread_state == 1.
Thread: thread_state = 2.
Main thread: thread_state == 2.
Finished with no errors.

BIN
tests/test01 Executable file

Binary file not shown.

19
tests/test01.c Normal file
View File

@ -0,0 +1,19 @@
#include <sys/syscall.h>
#include <unistd.h>
int main(int argc, char **argv)
{
const char msg[] = "Hello x86_64 World!\n";
//syscall(1, STDOUT_FILENO, msg, sizeof(msg)-1);
asm (
"mov $1, %%rax \n"
"mov $1, %%rdi \n"
"mov %0, %%rsi \n"
"mov $20, %%rdx \n"
"syscall \n"
:
:"r" (msg)
:"%rax","%rdi","%rsi","%rdx"
);
return 0;
}

BIN
tests/test02 Executable file

Binary file not shown.

9
tests/test02.c Normal file
View File

@ -0,0 +1,9 @@
#include <sys/syscall.h>
#include <unistd.h>
int main(int argc, char **argv)
{
const char msg[] = "Hello x86_64 World!\n";
syscall(1, STDOUT_FILENO, msg, sizeof(msg)-1);
return 0;
}

BIN
tests/test03 Executable file

Binary file not shown.

7
tests/test03.c Normal file
View File

@ -0,0 +1,7 @@
#include <stdio.h>
int main(int argc, char **argv)
{
printf("Hello x86_64 World!\n");
return 0;
}

BIN
tests/test04 Executable file

Binary file not shown.

7
tests/test04.c Normal file
View File

@ -0,0 +1,7 @@
#include <stdio.h>
int main(int argc, char **argv)
{
printf("Hello, argc=%d argv[%d]=%s\n", argc, argc-1, argv[argc-1]);
return 0;
}

BIN
tests/test05 Executable file

Binary file not shown.

62
tests/test05.c Executable file
View File

@ -0,0 +1,62 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
int fact(int i) {
if (i<2) return i;
return i*fact(i-1);
}
#define SET(M) dels[M/8] |= (1<<(M%8))
#define GET(M) ((dels[M/8]>>(M%8))&1)
int main(int argc, const char** argv)
{
int j = 5;
if(argc>1)
j = atoi(argv[1]);
if(j==0)
j=5;
if(j>15) j=15;
int k = fact(j);
printf("fact(%d)=%d\n", j, k);
uint8_t* dels = (char*)calloc((k+7)/8, 1);
SET(0);
SET(1);
for (int i=2; i<k; i++)
if (!GET(i)) {
int m = 2 * i;
while (m < k) {
SET(m);
m += i;
}
}
printf("Prime list 0..%d: ", k);
for (int i=0; i<k; i++)
if (!GET(i))
printf("%d ", i);
printf("\n");
free(dels);
signed char sc = -5;
unsigned char uc = 83;
signed short int ss = -53;
unsigned short int us = 65500;
int it = sc+uc+ss+us;
printf("(un)signed char = %hhd/%hhu (un)signed int = %hd/%hu total=%d\n", sc, uc, ss, us, it);
printf("%hu/5=%hu, %hu%%5=%hu\n", us, us/5, us, us%5);
printf("%d/5=%d, %d%%5=%d\n", it, it/5, it, it%5);
printf("%d/%hd=%d + %d", it, ss, it/ss, it%ss);
return 0;
}

BIN
tests/test06 Executable file

Binary file not shown.

55
tests/test06.c Normal file
View File

@ -0,0 +1,55 @@
#include <stdio.h>
#include <string.h>
#include <pthread.h>
#include <unistd.h>
const int thread_count = 2;
pthread_t tid[2];
const char *thread_messages[2] = {
"First thread executing",
"Second thread executing"
};
void *doSomething(void *arg)
{
pthread_t id = pthread_self();
int num = -1;
for (int i = 0 ; i < thread_count ; ++i)
{
if (pthread_equal(id, tid[i]))
{
num = i + 1;
if (num == 2) printf("[%02d] %s\n", num, thread_messages[i]);
break;
}
}
for (unsigned int i = 0 ; i < 0x10000 ; ++i);
if (num == 2) printf("[%02d] Thread done.\n", num);
return NULL;
}
int main(int argc, char const *argv[])
{
int err;
for (int i = 0 ; i < thread_count ; ++i)
{
//printf("[00] Thread %d starting\n", i + 1);
err = pthread_create(&tid[i], NULL, doSomething, NULL);
if (err)
{
printf("[00] Couldn't create thread %d: %s\n", i + 1, strerror(err));
}
for (unsigned int i = 0 ; i < 0x1000 ; ++i);
}
//printf("[00] Waiting for all threads to end...\n");
for (int i = 0 ; i < thread_count ; ++i)
pthread_join(tid[i], NULL);
printf("\n[00] Done.\n");
return 0;
}

BIN
tests/test07 Executable file

Binary file not shown.

20
tests/test07.c Normal file
View File

@ -0,0 +1,20 @@
#include <math.h>
#include <stdio.h>
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
int main(int argc, char **argv)
{
long double zero = 0.0;
double si = sin(M_PI / 2.0);
int a = 3;
float b = 1.5;
float mul = a * b;
printf("0 is %Lf, sin(pi/2) is %f and 3*1.5 is %f.\n", zero, si, mul);
return 0;
}

BIN
tests/test08 Executable file

Binary file not shown.

36
tests/test08.c Executable file
View File

@ -0,0 +1,36 @@
#include <stdio.h>
#include <stdlib.h>
//from https://crypto.stanford.edu/pbc/notes/pi/code.html
// 800 first decimals of PI
int main() {
int r[2800 + 1];
int i, k;
int b, d;
int c = 0;
for (i = 0; i < 2800; i++) {
r[i] = 2000;
}
for (k = 2800; k > 0; k -= 14) {
d = 0;
i = k;
for (;;) {
d += r[i] * 10000;
b = 2 * i - 1;
r[i] = d % b;
d /= b;
i--;
if (i == 0) break;
d *= i;
}
printf("%.4d", c + d / 10000);
c = d % 10000;
}
return 0;
}

BIN
tests/test09 Executable file

Binary file not shown.

20
tests/test09.c Executable file
View File

@ -0,0 +1,20 @@
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
void forkexample()
{
int x = 1;
if (fork() == 0)
printf("Child has x = %d\n", ++x);
else {
usleep(20000);
printf("Parent has x = %d\n", --x);
}
}
int main()
{
forkexample();
return 0;
}

BIN
tests/test10 Executable file

Binary file not shown.

27
tests/test10.cpp Normal file
View File

@ -0,0 +1,27 @@
// using atomic as a lock
#include <iostream> // std::cout
#include <atomic> // std::atomic
#include <thread> // std::thread
#include <vector> // std::vector
#include <sstream> // std::stringstream
std::atomic<bool> lock_stream[10];
std::stringstream stream;
void append_number(int x) {
while (lock_stream[x].load()) {}
stream << "thread #" << x << '\n';
if (x != 9) lock_stream[x + 1].store(false);
}
int main ()
{
std::vector<std::thread> threads;
for (int i = 0; i < 10; ++i) lock_stream[i].store(true);
for (int i=0; i<10; ++i) threads.push_back(std::thread(append_number,i));
lock_stream[0].store(false);
for (auto& th : threads) th.join();
std::cout << stream.str();
return 0;
}

BIN
tests/test11 Executable file

Binary file not shown.

97
tests/test11.c Normal file
View File

@ -0,0 +1,97 @@
#define _MULTI_THREADED
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <unistd.h>
void foo(void); /* Functions that use the TLS data */
void bar(void);
#define checkResults(string, val) { \
if (val) { \
printf("Failed with %d at %s", val, string); \
exit(1); \
} \
}
/*
Use the keyword provided by pthread.h to delcare the following variable
is thread specific, i.e. it is only visible to a specific thread,
not shared/common to all thread.
These variables are stored in thread local storage (TLS) area.
*/
__thread int TLS_data1 = 10;
__thread int TLS_data2 = 20;
__thread char TLS_data3[10];
#define NUMTHREADS 2
pthread_t thread[NUMTHREADS];
typedef struct {
int data1;
int data2;
} threadparm_t;
void *thread_run(void *parm)
{
int rc;
threadparm_t *gData;
printf("Thread %d: Entered (%d/%d)\n", (pthread_self()==thread[0])?1:2, TLS_data1, TLS_data2);
gData = (threadparm_t *)parm;
/* Assign the value from global variable to thread specific variable*/
TLS_data1 = gData->data1;
TLS_data2 = gData->data2;
strcpy(TLS_data3, "---");
TLS_data3[1] = (pthread_self()==thread[0])?'1':'2';
foo();
return NULL;
}
void foo() {
printf("Thread %d: foo(), TLS data=%d %d \"%s\"\n",
(pthread_self()==thread[0])?1:2, TLS_data1, TLS_data2, TLS_data3);
while(!thread[1])
usleep(300);
if(pthread_self()==thread[0])
pthread_join(thread[1], NULL);
bar();
}
void bar() {
printf("Thread %d: bar(), TLS data=%d %d \"%s\"\n",
(pthread_self()==thread[0])?1:2, TLS_data1, TLS_data2, TLS_data3);
return;
}
int main(int argc, char **argv)
{
int rc=0;
int i;
threadparm_t gData[NUMTHREADS];
printf("Create/start %d threads\n", NUMTHREADS);
for (i=0; i < NUMTHREADS; i++) {
/* Create per-thread TLS data and pass it to the thread */
gData[i].data1 = i;
gData[i].data2 = (i+1)*2;
rc = pthread_create(&thread[i], NULL, thread_run, &gData[i]);
checkResults("pthread_create()\n", rc);
usleep(200);
}
//printf("Wait for all threads to complete, and release their resources\n");
for (i=0; i < NUMTHREADS; i++) {
rc = pthread_join(thread[i], NULL);
//checkResults("pthread_join()\n", rc);
}
printf("Main completed\n");
return 0;
}

BIN
tests/test12 Executable file

Binary file not shown.

35
tests/test12.c Executable file
View File

@ -0,0 +1,35 @@
#include <stdio.h>
#include <stdint.h>
#include <math.h>
typedef uint32_t uint32;
typedef uint32 angle_t;
int main(int argc, char **argv)
{
int64_t i64 = 1000000000000;
double d = i64;
printf("%lli => %f\n", i64, d);
i64 = -i64;
d = i64;
printf("%lli => %f\n", i64, d);
d = M_PI/4.0;
d = d*(1<<30)/M_PI;
angle_t u32 = (angle_t)d;
printf("(angle_t)%f = %u == 0x%08X\n", d, u32, u32);
int16_t a=0, b=0;
asm volatile (
"fldpi \n"
"fisttp %0 \n"
: "=m" (a));
asm volatile (
"fldpi \n"
"fchs \n"
"fistp %0 \n"
: "=m" (b));
printf("go PI trucated=%d, -PI rounded=%d\n", a, b);
return 0;
}

BIN
tests/test13 Executable file

Binary file not shown.

54
tests/test13.c Normal file
View File

@ -0,0 +1,54 @@
#include <ucontext.h>
#include <stdio.h>
#include <stdlib.h>
static ucontext_t uctx_main, uctx_func1, uctx_func2;
#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)
static void func1(void)
{
printf("func1: started\n");
printf("func1: swapcontext(&uctx_func1, &uctx_func2)\n");
if (swapcontext(&uctx_func1, &uctx_func2) == -1)
handle_error("swapcontext");
printf("func1: returning\n");
}
static void func2(int a, int b)
{
printf("func2: started (%d, %d)\n", a, b);
printf("func2: swapcontext(&uctx_func2, &uctx_func1)\n");
if (swapcontext(&uctx_func2, &uctx_func1) == -1)
handle_error("swapcontext");
printf("func2: returning\n");
}
int main(int argc, char *argv[])
{
char func1_stack[16384];
char func2_stack[16384];
if (getcontext(&uctx_func1) == -1)
handle_error("getcontext");
uctx_func1.uc_stack.ss_sp = func1_stack;
uctx_func1.uc_stack.ss_size = sizeof(func1_stack);
uctx_func1.uc_link = &uctx_main;
makecontext(&uctx_func1, func1, 0);
if (getcontext(&uctx_func2) == -1)
handle_error("getcontext");
uctx_func2.uc_stack.ss_sp = func2_stack;
uctx_func2.uc_stack.ss_size = sizeof(func2_stack);
/* Successor context is f1(), unless argc > 1 */
uctx_func2.uc_link = (argc > 1) ? NULL : &uctx_func1;
makecontext(&uctx_func2, func2, 2, 1, 2);
printf("main: swapcontext(&uctx_main, &uctx_func2)\n");
if (swapcontext(&uctx_main, &uctx_func2) == -1)
handle_error("swapcontext");
printf("main: exiting\n");
exit(EXIT_SUCCESS);
}

BIN
tests/test14 Executable file

Binary file not shown.

111
tests/test14.c Executable file
View File

@ -0,0 +1,111 @@
// Code from https://martin.uy/blog/pthread_cancel-glibc-stack-unwinding/
#include <pthread.h>
#include <stddef.h>
#include <stdio.h>
#include <unistd.h>
static pthread_mutex_t mutex;
static pthread_mutex_t* mutex_ptr = NULL;
static pthread_cond_t thread_state_cond;
static pthread_cond_t* thread_state_cond_ptr = NULL;
static int thread_state = 0; // Sync
void thread_cleanup(void* args) {
pthread_mutex_lock(mutex_ptr);
printf("Thread: thread_state = 2.\n");
thread_state = 2;
pthread_cond_broadcast(thread_state_cond_ptr);
pthread_mutex_unlock(mutex_ptr);
}
static void thread_f(void) {
int ret = -1;
pthread_mutex_lock(mutex_ptr);
printf("Thread: thread_state = 1.\n");
thread_state = 1;
pthread_cond_broadcast(thread_state_cond_ptr);
pthread_mutex_unlock(mutex_ptr);
while (1) {
sleep(1000);
}
}
static void* thread_main(void* args) {
pthread_cleanup_push(&thread_cleanup, NULL);
thread_f();
// This should never be executed
pthread_cleanup_pop(0);
return NULL;
}
int main(void) {
int ret = 0;
pthread_t thread;
pthread_attr_t thread_attributes;
pthread_attr_t* thread_attributes_ptr = NULL;
if (pthread_mutex_init(&mutex, NULL) != 0)
goto error;
mutex_ptr = &mutex;
if (pthread_cond_init(&thread_state_cond, NULL) != 0)
goto error;
thread_state_cond_ptr = &thread_state_cond;
if (pthread_attr_init(&thread_attributes) != 0)
goto error;
thread_attributes_ptr = &thread_attributes;
if (pthread_create(&thread, thread_attributes_ptr, &thread_main, NULL) != 0)
goto error;
thread_attributes_ptr = NULL;
if (pthread_attr_destroy(&thread_attributes) != 0)
goto error;
// Wait for thread to go deep into the call stack
pthread_mutex_lock(mutex_ptr);
while (thread_state != 1)
pthread_cond_wait(thread_state_cond_ptr, mutex_ptr);
printf("Main thread: thread_state == 1.\n");
pthread_mutex_unlock(mutex_ptr);
if (pthread_cancel(thread) != 0)
goto error;
// Wait for thread to execute the cleanup function
pthread_mutex_lock(mutex_ptr);
while (thread_state != 2)
pthread_cond_wait(thread_state_cond_ptr, mutex_ptr);
printf("Main thread: thread_state == 2.\n");
pthread_mutex_unlock(mutex_ptr);
thread_state_cond_ptr = NULL;
if (pthread_cond_destroy(&thread_state_cond) != 0)
goto error;
mutex_ptr = NULL;
if (pthread_mutex_destroy(&mutex) != 0)
goto error;
goto cleanup;
error:
ret = -1;
cleanup:
if (thread_attributes_ptr != NULL)
pthread_attr_destroy(thread_attributes_ptr);
if (thread_state_cond_ptr != NULL)
pthread_cond_destroy(thread_state_cond_ptr);
if (mutex_ptr != NULL)
pthread_mutex_destroy(mutex_ptr);
if (ret == -1)
printf("Finished with errors.\n");
else
printf("Finished with no errors.\n");
return ret;
}