Use the new floating point Montgomery multiply code from Sun on Solaris.

This commit is contained in:
nelsonb%netscape.com 2000-12-02 02:37:22 +00:00
parent 9187c93f00
commit 885d29d0b4
2 changed files with 152 additions and 43 deletions

View File

@ -36,7 +36,7 @@
## GPL.
##
##
## $Id: Makefile,v 1.11 2000/09/30 01:46:30 nelsonb%netscape.com Exp $
## $Id: Makefile,v 1.12 2000/12/02 02:37:22 nelsonb%netscape.com Exp $
##
## Define CC to be the C compiler you wish to use. The GNU cc
@ -65,68 +65,76 @@ CFLAGS= -O $(MPICMN)
#CFLAGS=-ansi -pedantic -Wall -O3 $(MPICMN)
#CFLAGS=-ansi -pedantic -Wall -g -O2 -DMP_DEBUG=1 $(MPICMN)
ifeq ($(TARGET),mipsIRIX)
#IRIX
#MPICMN += -DMP_MONT_USE_MP_MUL
#MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
#AS_OBJS = mpi_mips.o
MPICMN += -DMP_MONT_USE_MP_MUL -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
AS_OBJS = mpi_mips.o
#ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3 -exceptions
#ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3
ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3
#CFLAGS=-ansi -n32 -O3 -fullwarn -woff 1429 -D_SGI_SOURCE $(MPICMN)
#CFLAGS=-ansi -n32 -O2 -fullwarn -woff 1429 -D_SGI_SOURCE $(MPICMN)
CFLAGS=-ansi -n32 -O2 -fullwarn -woff 1429 -D_SGI_SOURCE $(MPICMN)
#CFLAGS=-ansi -n32 -g -fullwarn -woff 1429 -D_SGI_SOURCE $(MPICMN)
#CFLAGS=-ansi -n32 -g -fullwarn -woff 1429 -D_SGI_SOURCE -DMP_NO_MP_WORD \
$(MPICMN)
#CFLAGS=-ansi -64 -O2 -fullwarn -woff 1429 -D_SGI_SOURCE -DMP_NO_MP_WORD \
$(MPICMN)
endif
ifeq ($(TARGET),alphaOSF1)
#Alpha/OSF1
#CFLAGS= -O -Olimit 4000 -ieee_with_inexact -std1 -DOSF1 -D_REENTRANT $(MPICMN)
#CFLAGS= -O -Olimit 4000 -ieee_with_inexact -std1 -DOSF1 -D_REENTRANT \
-DMP_NO_MP_WORD $(MPICMN)
endif
ifeq ($(TARGET),v9SOLARIS)
#Solaris 64
#SOLARIS_ASM_FLAGS = -fast -xO5 -xrestrict=%all -xdepend -xchip=ultra -xarch=v9a -KPIC -mt
#AS_OBJS = mpi_sparc.o mpv_sparc.o
#MPICMN += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD -DMP_ASSEMBLY_MULTIPLY
#CFLAGS= -O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
SOLARIS_ASM_FLAGS = -fast -xO5 -xrestrict=%all -xdepend -xchip=ultra -xarch=v9a -KPIC -mt
AS_OBJS = montmulfv9.o mpi_sparc.o mpv_sparc.o
MPICMN += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD -DMP_ASSEMBLY_MULTIPLY
MPICMN += -DMP_USING_MONT_MULF
CFLAGS= -O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
-DSOLARIS2_8 -D_SVID_GETTOD -xarch=v9 -DXP_UNIX -DNSS_USE_64 $(MPICMN)
#CFLAGS= -g -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
-DSOLARIS2_8 -D_SVID_GETTOD -xarch=v9 -DXP_UNIX -DNSS_USE_64 $(MPICMN)
endif
ifeq ($(TARGET),v8SOLARIS)
#Solaris 32
#CFLAGS=-O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
SOLARIS_ASM_FLAGS = -xchip=ultra -xarch=v8plusa -KPIC -mt
AS_OBJS = montmulfv8.o mpi_sparc.o mpv_sparc32.o
MPICMN += -DMP_ASSEMBLY_MULTIPLY
MPICMN += -DMP_USING_MONT_MULF
CFLAGS=-O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
-DSOLARIS2_6 -D_SVID_GETTOD -xarch=v8 -DXP_UNIX -DMP_NO_MP_WORD $(MPICMN)
#CFLAGS=-O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
-DSOLARIS2_6 -D_SVID_GETTOD -xarch=v8 -DXP_UNIX $(MPICMN)
#SOLARIS_ASM_FLAGS = -xchip=ultra -xarch=v8plusa -KPIC -mt
#AS_OBJS = mpi_sparc.o mpv_sparc32.o
#MPICMN += -DMP_ASSEMBLY_MULTIPLY
#CFLAGS=-O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
-DSOLARIS2_6 -D_SVID_GETTOD -xarch=v8 -DXP_UNIX -DMP_NO_MP_WORD \
-DMP_ASSEMBLY_MULTIPLY $(MPICMN)
endif
ifeq ($(TARGET),HPUX)
#HPUX
#CFLAGS= -O -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
-D_HPUX_SOURCE -Aa +e +DA2.0W +DS2.0 +DChpux -DHPUX11 -DXP_UNIX \
-DNSS_USE_64 $(MPICMN)
#CFLAGS= -O -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
-D_HPUX_SOURCE +DAportable +DS1.1 -DHPUX11 -DXP_UNIX -DMP_NO_MP_WORD $(MPICMN)
endif
ifeq ($(TARGET),x86LINUX)
#Linux
#AS_OBJS = mpi_x86.o
#MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D
#MPICMN += -DMP_MONT_USE_MP_MUL
#CFLAGS= -O2 -fPIC -DLINUX1_2 -Di386 -D_XOPEN_SOURCE -DLINUX2_1 -ansi -Wall \
AS_OBJS = mpi_x86.o
MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D
MPICMN += -DMP_MONT_USE_MP_MUL
CFLAGS= -O2 -fPIC -DLINUX1_2 -Di386 -D_XOPEN_SOURCE -DLINUX2_1 -ansi -Wall \
-pipe -DLINUX -Dlinux -D_POSIX_SOURCE -D_BSD_SOURCE -DHAVE_STRERROR \
-DXP_UNIX -UDEBUG -DNDEBUG -D_REENTRANT $(MPICMN)
#CFLAGS= -g -fPIC -DLINUX1_2 -Di386 -D_XOPEN_SOURCE -DLINUX2_1 -ansi -Wall \
-pipe -DLINUX -Dlinux -D_POSIX_SOURCE -D_BSD_SOURCE -DHAVE_STRERROR \
-DXP_UNIX -DDEBUG -UNDEBUG -D_REENTRANT $(MPICMN)
#CFLAGS= -g -fPIC -DLINUX1_2 -Di386 -D_XOPEN_SOURCE -DLINUX2_1 -ansi -Wall \
-pipe -DLINUX -Dlinux -D_POSIX_SOURCE -D_BSD_SOURCE -DHAVE_STRERROR \
-DXP_UNIX -UDEBUG -DNDEBUG -D_REENTRANT $(MPICMN)
endif
##
## Define LIBS to include any libraries you need to link against.
@ -176,7 +184,7 @@ DOCS=README doc utils/README utils/PRIMES
## This is the list of tools built by 'make tools'
TOOLS=gcd invmod isprime lap dec2hex hex2dec primegen prng \
basecvt fact exptmod pi makeprime
basecvt fact exptmod pi makeprime identest
LIBOBJS = mpprime.o mpmontg.o mplogic.o mpi.o $(AS_OBJS)
LIBHDRS = mpi-config.h mpi-priv.h mpi.h
@ -221,12 +229,22 @@ mpprime.o: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS)
mpi_mips.o: mpi_mips.s
$(CC) -o $@ $(ASFLAGS) -c mpi_mips.s
mpi_sparc.o : montmulf.h
mpv_sparc32.o: mpv_sparc32.S
$(CC) -o $@ $(SOLARIS_ASM_FLAGS) -c mpv_sparc32.S
mpv_sparc.o: vis_64.il mpv_sparc.c
$(CC) -o $@ $(SOLARIS_ASM_FLAGS) -c vis_64.il mpv_sparc.c
montmulfv8.o montmulfv9.o : %.o : %.s
$(CC) -o $@ $(SOLARIS_ASM_FLAGS) -c $<
# This rule is used to build the .s sources, which are then hand optimized.
#montmulfv8.s montmulfv9.s : montmulf%.s : montmulf%.il montmulf.c montmulf.h
# $(CC) -o $@ $(SOLARIS_ASM_FLAGS) -S montmulf$*.il montmulf.c
libmpi.a: $(LIBOBJS)
ar -cvr libmpi.a $(LIBOBJS)
$(RANLIB) libmpi.a
@ -311,6 +329,9 @@ clean:
rm -f utils/core
rm -f utils/*~ utils/.*~
clobber: clean
rm -f $(TOOLS) $(UTILS)
distclean: clean
rm -f mptest? mpi-test metime mulsqr karatsuba
rm -f mptest?a mptest?b

View File

@ -29,7 +29,7 @@
* the GPL. If you do not delete the provisions above, a recipient
* may use your version of this file under either the MPL or the
* GPL.
* $Id: mpmontg.c,v 1.8 2000/09/14 00:30:51 nelsonb%netscape.com Exp $
* $Id: mpmontg.c,v 1.9 2000/12/02 02:37:22 nelsonb%netscape.com Exp $
*/
/* This file implements moduluar exponentiation using Montgomery's
@ -41,10 +41,14 @@
* published by Springer Verlag.
*/
/* #define MP_USING_MONT_MULF 1 */
#include <string.h>
#include "mpi-priv.h"
#include "mplogic.h"
#include "mpprime.h"
#ifdef MP_USING_MONT_MULF
#include "montmulf.h"
#endif
#define STATIC
/* #define DEBUG 1 */
@ -192,9 +196,21 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
mp_size bits_in_exponent;
mp_size i;
mp_size window_bits, odd_ints;
mp_err res;
mp_int square, accum1, accum2, goodBase;
mp_err res;
int expOff, nLen;
mp_int square, accum1, accum2, goodBase;
mp_mont_modulus mmm;
#ifdef MP_USING_MONT_MULF
int dSize = 0, oddPowSize, dTmpSize, dSqrSize;
double dn0;
double *dBuf = 0;
double *dm1, *dn, *dSqr, *d16Tmp, *oddPowers[MAX_ODD_INTS], *dTmp;
mp_digit *mResult;
#else
/* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */
/* oddPowers[i] = base ** (2*i + 1); */
mp_int power2, oddPowers[MAX_ODD_INTS];
#endif
/* function for computing n0prime only works if n0 is odd */
if (!mp_isodd(modulus))
@ -204,6 +220,10 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
MP_DIGITS(&accum1) = 0;
MP_DIGITS(&accum2) = 0;
MP_DIGITS(&goodBase) = 0;
#ifdef MP_USING_MONT_MULF
for (i = 0; i < MAX_ODD_INTS; ++i)
oddPowers[i] = 0;
#endif
if (mp_cmp(inBase, modulus) < 0) {
base = inBase;
@ -213,10 +233,12 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
MP_CHECKOK( mp_mod(inBase, modulus, &goodBase) );
}
MP_CHECKOK( mp_init_size(&square, 2 * MP_USED(modulus) + 2) );
MP_CHECKOK( mp_init_size(&accum1, 3 * MP_USED(modulus) + 2) );
MP_CHECKOK( mp_init_size(&accum2, 3 * MP_USED(modulus) + 2) );
nLen = MP_USED(modulus);
MP_CHECKOK( mp_init_size(&square, 2 * nLen + 2) );
MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) );
#ifndef MP_USING_MONT_MULF
MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) );
#endif
mmm.N = *modulus; /* a copy of the mp_int struct */
i = mpl_significant_bits(modulus);
i += MP_DIGIT_BIT - 1;
@ -228,7 +250,12 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
mmm.n0prime = 0 - s_mp_invmod_radix( MP_DIGIT(modulus, 0) );
MP_CHECKOK( s_mp_to_mont(base, &mmm, &square) );
#ifdef MP_USING_MONT_MULF
MP_CHECKOK( s_mp_pad(&square, nLen) );
mp_set(&accum1, 1);
MP_CHECKOK( s_mp_to_mont(&accum1, &mmm, &accum1) );
MP_CHECKOK( s_mp_pad(&accum1, nLen) );
#endif
bits_in_exponent = mpl_significant_bits(exponent);
if (bits_in_exponent > 480)
window_bits = 6;
@ -242,19 +269,66 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
bits_in_exponent += window_bits - i;
}
{
/* oddPowers[i] = base ** (2*i + 1); */
int expOff;
/* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */
mp_int power2, oddPowers[MAX_ODD_INTS];
#ifdef MP_USING_MONT_MULF
oddPowSize = 2 * nLen + 1;
dTmpSize = 2 * oddPowSize;
dSize = sizeof(double) * (nLen * 4 + 1 +
((odd_ints + 1) * oddPowSize) + dTmpSize);
dBuf = (double *)malloc(dSize);
dm1 = dBuf; /* array of d32 */
dn = dBuf + nLen; /* array of d32 */
dSqr = dn + nLen; /* array of d32 */
d16Tmp = dSqr + nLen; /* array of d16 */
dTmp = d16Tmp + oddPowSize;
for (i = 0; i < odd_ints; ++i) {
oddPowers[i] = dTmp;
dTmp += oddPowSize;
}
mResult = (mp_digit *)(dTmp + dTmpSize); /* size is nLen + 1 */
/* Make dn and dn0 */
conv_i32_to_d32(dn, MP_DIGITS(modulus), nLen);
dn0 = (double)(mmm.n0prime & 0xffff);
/* Make dSqr */
conv_i32_to_d32_and_d16(dm1, oddPowers[0], MP_DIGITS(&square), nLen);
mont_mulf_noconv(mResult, dm1, oddPowers[0],
dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
conv_i32_to_d32(dSqr, mResult, nLen);
for (i = 1; i < odd_ints; ++i) {
mont_mulf_noconv(mResult, dSqr, oddPowers[i - 1],
dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
conv_i32_to_d16(oddPowers[i], mResult, nLen);
}
s_mp_copy(MP_DIGITS(&accum1), mResult, nLen);
#define SWAPPA
/* computes montgomery square of the integer in mResult */
#define SQR(a,b) \
conv_i32_to_d32_and_d16(dm1, d16Tmp, mResult, nLen); \
mont_mulf_noconv(mResult, dm1, d16Tmp, \
dTmp, dn, MP_DIGITS(modulus), nLen, dn0)
/* computes montgomery product of x and the integer in mResult */
#define MUL(x,a,b) \
conv_i32_to_d32(dm1, mResult, nLen); \
mont_mulf_noconv(mResult, dm1, oddPowers[x], \
dTmp, dn, MP_DIGITS(modulus), nLen, dn0)
#else
MP_CHECKOK( mp_init_copy(oddPowers, &square) );
mp_init_size(&power2, MP_USED(modulus) + 2 * MP_USED(&square) + 2);
mp_init_size(&power2, nLen + 2 * MP_USED(&square) + 2);
MP_CHECKOK( mp_sqr(&square, &power2) ); /* square = square ** 2 */
MP_CHECKOK( s_mp_redc(&power2, &mmm) );
for (i = 1; i < odd_ints; ++i) {
mp_init_size(oddPowers + i, MP_USED(modulus) + 2 * MP_USED(&power2) + 2);
mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2);
MP_CHECKOK( mp_mul(oddPowers + (i - 1), &power2, oddPowers + i) );
MP_CHECKOK( s_mp_redc(oddPowers + i, &mmm) );
}
@ -277,6 +351,7 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
#endif
#define SWAPPA ptmp = pa1; pa1 = pa2; pa2 = ptmp
#endif
for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
mp_size smallExp;
@ -354,18 +429,31 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
}
}
mp_clear(&power2);
for (i = 0; i < odd_ints; ++i) {
mp_clear(oddPowers + i);
}
#ifdef MP_USING_MONT_MULF
s_mp_copy(mResult, MP_DIGITS(&square), nLen);
pa1 = &square;
#endif
}
res = s_mp_redc(pa1, &mmm);
mp_exch(pa1, result);
CLEANUP:
mp_clear(&square);
mp_clear(&accum1);
mp_clear(&accum2);
mp_clear(&goodBase);
#ifdef MP_USING_MONT_MULF
if (dBuf) {
if (dSize)
memset(dBuf, 0, dSize);
free(dBuf);
}
#else
mp_clear(&power2);
for (i = 0; i < odd_ints; ++i) {
mp_clear(oddPowers + i);
}
mp_clear(&accum2);
#endif
/* Don't mp_clear mmm.N because it is merely a copy of modulus.
** Just zap it.
*/