use tomsfastmath instead of tommath hack from wpa_supplicant

using latest git version from https://github.com/libtom/tomsfastmath
This commit is contained in:
rofl0r 2017-12-31 20:27:39 +00:00 committed by wiire-a
parent 7576a68717
commit d8e7bba357
51 changed files with 9143 additions and 3646 deletions

View File

@ -18,6 +18,9 @@ INTFLAGS += -DUSE_OPENSSL
endif
TARGET = pixiewps
TFMSRC = $(sort $(wildcard $(SRCDIR)/crypto/tfm/*.c))
TFMOBJS = $(TFMSRC:.c=.o)
SOURCE = $(SRCDIR)/pixiewps.c
-include config.mak
@ -26,8 +29,11 @@ SOURCE = $(SRCDIR)/pixiewps.c
all: $(TARGET)
$(TARGET): $(SOURCE) $(HDRS)
$(CC) $(INTFLAGS) $(CFLAGS) $(CPPFLAGS) -o $(TARGET) $(SOURCE) $(LIBS) $(LDFLAGS)
$(TARGET): $(SOURCE) $(HDRS) $(TFMOBJS)
$(CC) $(INTFLAGS) $(CFLAGS) $(CPPFLAGS) -o $(TARGET) $(SOURCE) $(LIBS) $(LDFLAGS) $(TFMOBJS)
$(SRCDIR)/crypto/tfm/%.o: $(SRCDIR)/crypto/tfm/%.c
$(CC) $(CFLAGS) $(CPPFLAGS) -I$(SRCDIR)/crypto/tfm -c -o $@ $<
install: install-bin install-man
@ -43,4 +49,4 @@ strip: $(TARGET)
strip $(TARGET)
clean:
rm -f $(TARGET)
rm -f $(TARGET) $(TFMOBJS)

View File

@ -1,207 +0,0 @@
/*
* Big number math
* Copyright (c) 2006, Jouni Malinen <j@w1.fi>
*
* This software may be distributed under the terms of the BSD license.
* See README for more details.
*/
#include "libtommath.c"
/*
* The current version is just a wrapper for LibTomMath library, so
* struct bignum is just typecast to mp_int.
*/
/**
* bignum_init - Allocate memory for bignum
* Returns: Pointer to allocated bignum or %NULL on failure
*/
struct bignum *bignum_init(void)
{
struct bignum *n = calloc(sizeof(mp_int), 1);
if (n == NULL)
return NULL;
if (mp_init((mp_int *) n) != MP_OKAY) {
free(n);
n = NULL;
}
return n;
}
/**
* bignum_deinit - Free bignum
* @n: Bignum from bignum_init()
*/
void bignum_deinit(struct bignum *n)
{
if (n) {
mp_clear((mp_int *) n);
free(n);
}
}
/**
* bignum_get_unsigned_bin - Get length of bignum as an unsigned binary buffer
* @n: Bignum from bignum_init()
* Returns: Length of n if written to a binary buffer
*/
size_t bignum_get_unsigned_bin_len(struct bignum *n)
{
return mp_unsigned_bin_size((mp_int *) n);
}
/**
* bignum_get_unsigned_bin - Set binary buffer to unsigned bignum
* @n: Bignum from bignum_init()
* @buf: Buffer for the binary number
* @len: Length of the buffer, can be %NULL if buffer is known to be long
* enough. Set to used buffer length on success if not %NULL.
* Returns: 0 on success, -1 on failure
*/
int bignum_get_unsigned_bin(const struct bignum *n, u8 *buf, size_t *len)
{
size_t need = mp_unsigned_bin_size((mp_int *) n);
if (len && need > *len) {
*len = need;
return -1;
}
if (mp_to_unsigned_bin((mp_int *) n, buf) != MP_OKAY) {
return -1;
}
if (len)
*len = need;
return 0;
}
/**
* bignum_set_unsigned_bin - Set bignum based on unsigned binary buffer
* @n: Bignum from bignum_init(); to be set to the given value
* @buf: Buffer with unsigned binary value
* @len: Length of buf in octets
* Returns: 0 on success, -1 on failure
*/
int bignum_set_unsigned_bin(struct bignum *n, const u8 *buf, size_t len)
{
if (mp_read_unsigned_bin((mp_int *) n, (u8 *) buf, len) != MP_OKAY) {
return -1;
}
return 0;
}
/**
* bignum_cmp - Signed comparison
* @a: Bignum from bignum_init()
* @b: Bignum from bignum_init()
* Returns: 0 on success, -1 on failure
*/
int bignum_cmp(const struct bignum *a, const struct bignum *b)
{
return mp_cmp((mp_int *) a, (mp_int *) b);
}
/**
* bignum_cmd_d - Compare bignum to standard integer
* @a: Bignum from bignum_init()
* @b: Small integer
* Returns: 0 on success, -1 on failure
*/
int bignum_cmp_d(const struct bignum *a, unsigned long b)
{
return mp_cmp_d((mp_int *) a, b);
}
/**
* bignum_add - c = a + b
* @a: Bignum from bignum_init()
* @b: Bignum from bignum_init()
* @c: Bignum from bignum_init(); used to store the result of a + b
* Returns: 0 on success, -1 on failure
*/
int bignum_add(const struct bignum *a, const struct bignum *b,
struct bignum *c)
{
if (mp_add((mp_int *) a, (mp_int *) b, (mp_int *) c) != MP_OKAY) {
return -1;
}
return 0;
}
/**
* bignum_sub - c = a - b
* @a: Bignum from bignum_init()
* @b: Bignum from bignum_init()
* @c: Bignum from bignum_init(); used to store the result of a - b
* Returns: 0 on success, -1 on failure
*/
int bignum_sub(const struct bignum *a, const struct bignum *b,
struct bignum *c)
{
if (mp_sub((mp_int *) a, (mp_int *) b, (mp_int *) c) != MP_OKAY) {
return -1;
}
return 0;
}
/**
* bignum_mul - c = a * b
* @a: Bignum from bignum_init()
* @b: Bignum from bignum_init()
* @c: Bignum from bignum_init(); used to store the result of a * b
* Returns: 0 on success, -1 on failure
*/
int bignum_mul(const struct bignum *a, const struct bignum *b,
struct bignum *c)
{
if (mp_mul((mp_int *) a, (mp_int *) b, (mp_int *) c) != MP_OKAY) {
return -1;
}
return 0;
}
/**
* bignum_mulmod - d = a * b (mod c)
* @a: Bignum from bignum_init()
* @b: Bignum from bignum_init()
* @c: Bignum from bignum_init(); modulus
* @d: Bignum from bignum_init(); used to store the result of a * b (mod c)
* Returns: 0 on success, -1 on failure
*/
int bignum_mulmod(const struct bignum *a, const struct bignum *b,
const struct bignum *c, struct bignum *d)
{
if (mp_mulmod((mp_int *) a, (mp_int *) b, (mp_int *) c, (mp_int *) d)
!= MP_OKAY) {
return -1;
}
return 0;
}
/**
* bignum_exptmod - Modular exponentiation: d = a^b (mod c)
* @a: Bignum from bignum_init(); base
* @b: Bignum from bignum_init(); exponent
* @c: Bignum from bignum_init(); modulus
* @d: Bignum from bignum_init(); used to store the result of a^b (mod c)
* Returns: 0 on success, -1 on failure
*/
int bignum_exptmod(const struct bignum *a, const struct bignum *b,
const struct bignum *c, struct bignum *d)
{
if (mp_exptmod((mp_int *) a, (mp_int *) b, (mp_int *) c, (mp_int *) d)
!= MP_OKAY) {
return -1;
}
return 0;
}

View File

@ -1,21 +1,7 @@
/*
* Crypto wrapper for internal crypto implementation - modexp
* Copyright (c) 2006-2009, Jouni Malinen <j@w1.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Alternatively, this software may be distributed under the terms of BSD
* license.
*
* See README and COPYING for more details.
*/
#include <stdlib.h>
#include <stdint.h>
#include "bignum.c"
#include "tfm/tfm.h"
#define u8 uint8_t
@ -24,32 +10,17 @@ int crypto_mod_exp(const u8 *base, size_t base_len,
const u8 *modulus, size_t modulus_len,
u8 *result, size_t *result_len)
{
struct bignum *bn_base, *bn_exp, *bn_modulus, *bn_result;
int ret = -1;
fp_int bn_base, bn_exp, bn_modulus, bn_result;
bn_base = bignum_init();
bn_exp = bignum_init();
bn_modulus = bignum_init();
bn_result = bignum_init();
fp_read_unsigned_bin(&bn_base, base, base_len);
fp_read_unsigned_bin(&bn_exp, power, power_len);
fp_read_unsigned_bin(&bn_modulus, modulus, modulus_len);
fp_init(&bn_result);
if (bn_base == NULL || bn_exp == NULL || bn_modulus == NULL ||
bn_result == NULL)
goto error;
fp_exptmod(&bn_base, &bn_exp, &bn_modulus, &bn_result);
if (bignum_set_unsigned_bin(bn_base, base, base_len) < 0 ||
bignum_set_unsigned_bin(bn_exp, power, power_len) < 0 ||
bignum_set_unsigned_bin(bn_modulus, modulus, modulus_len) < 0)
goto error;
fp_to_unsigned_bin(&bn_result, result);
if (bignum_exptmod(bn_base, bn_exp, bn_modulus, bn_result) < 0)
goto error;
ret = bignum_get_unsigned_bin(bn_result, result, result_len);
error:
bignum_deinit(bn_base);
bignum_deinit(bn_exp);
bignum_deinit(bn_modulus);
bignum_deinit(bn_result);
return ret;
*result_len = fp_unsigned_bin_size(&bn_result);
return 0;
}

File diff suppressed because it is too large Load Diff

39
src/crypto/tfm/fp_2expt.c Normal file
View File

@ -0,0 +1,39 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* computes a = 2**b */
void fp_2expt(fp_int *a, int b)
{
int z;
/* zero a as per default */
fp_zero (a);
if (b < 0) {
return;
}
z = b / DIGIT_BIT;
if (z >= FP_SIZE) {
return;
}
/* set the used count of where the bit will go */
a->used = z + 1;
/* put the single bit in its place */
a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

43
src/crypto/tfm/fp_add.c Normal file
View File

@ -0,0 +1,43 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
void fp_add(fp_int *a, fp_int *b, fp_int *c)
{
int sa, sb;
/* get sign of both inputs */
sa = a->sign;
sb = b->sign;
/* handle two cases, not four */
if (sa == sb) {
/* both positive or both negative */
/* add their magnitudes, copy the sign */
c->sign = sa;
s_fp_add (a, b, c);
} else {
/* one positive, the other negative */
/* subtract the one with the greater magnitude from */
/* the one of the lesser magnitude. The result gets */
/* the sign of the one with the greater magnitude. */
if (fp_cmp_mag (a, b) == FP_LT) {
c->sign = sb;
s_fp_sub (b, a, c);
} else {
c->sign = sa;
s_fp_sub (a, b, c);
}
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

31
src/crypto/tfm/fp_cmp.c Normal file
View File

@ -0,0 +1,31 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
int fp_cmp(fp_int *a, fp_int *b)
{
if (a->sign == FP_NEG && b->sign == FP_ZPOS) {
return FP_LT;
} else if (a->sign == FP_ZPOS && b->sign == FP_NEG) {
return FP_GT;
} else {
/* compare digits */
if (a->sign == FP_NEG) {
/* if negative compare opposite direction */
return fp_cmp_mag(b, a);
} else {
return fp_cmp_mag(a, b);
}
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

38
src/crypto/tfm/fp_cmp_d.c Normal file
View File

@ -0,0 +1,38 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* compare against a single digit */
int fp_cmp_d(fp_int *a, fp_digit b)
{
/* compare based on sign */
if ((b && a->used == 0) || a->sign == FP_NEG) {
return FP_LT;
}
/* compare based on magnitude */
if (a->used > 1) {
return FP_GT;
}
/* compare the only digit of a to b */
if (a->dp[0] > b) {
return FP_GT;
} else if (a->dp[0] < b) {
return FP_LT;
} else {
return FP_EQ;
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,35 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
int fp_cmp_mag(fp_int *a, fp_int *b)
{
int x;
if (a->used > b->used) {
return FP_GT;
} else if (a->used < b->used) {
return FP_LT;
} else {
for (x = a->used - 1; x >= 0; x--) {
if (a->dp[x] > b->dp[x]) {
return FP_GT;
} else if (a->dp[x] < b->dp[x]) {
return FP_LT;
}
}
}
return FP_EQ;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,36 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
int fp_count_bits (fp_int * a)
{
int r;
fp_digit q;
/* shortcut */
if (a->used == 0) {
return 0;
}
/* get number of digits and add that */
r = (a->used - 1) * DIGIT_BIT;
/* take the last digit and count the bits in it */
q = a->dp[a->used - 1];
while (q > ((fp_digit) 0)) {
++r;
q >>= ((fp_digit) 1);
}
return r;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

157
src/crypto/tfm/fp_div.c Normal file
View File

@ -0,0 +1,157 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* a/b => cb + d == a */
int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
{
fp_int q, x, y, t1, t2;
int n, t, i, norm, neg;
/* is divisor zero ? */
if (fp_iszero (b) == 1) {
return FP_VAL;
}
/* if a < b then q=0, r = a */
if (fp_cmp_mag (a, b) == FP_LT) {
if (d != NULL) {
fp_copy (a, d);
}
if (c != NULL) {
fp_zero (c);
}
return FP_OKAY;
}
fp_init(&q);
q.used = a->used + 2;
fp_init(&t1);
fp_init(&t2);
fp_init_copy(&x, a);
fp_init_copy(&y, b);
/* fix the sign */
neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG;
x.sign = y.sign = FP_ZPOS;
/* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
norm = fp_count_bits(&y) % DIGIT_BIT;
if (norm < (int)(DIGIT_BIT-1)) {
norm = (DIGIT_BIT-1) - norm;
fp_mul_2d (&x, norm, &x);
fp_mul_2d (&y, norm, &y);
} else {
norm = 0;
}
/* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
n = x.used - 1;
t = y.used - 1;
/* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
fp_lshd (&y, n - t); /* y = y*b**{n-t} */
while (fp_cmp (&x, &y) != FP_LT) {
++(q.dp[n - t]);
fp_sub (&x, &y, &x);
}
/* reset y by shifting it back down */
fp_rshd (&y, n - t);
/* step 3. for i from n down to (t + 1) */
for (i = n; i >= (t + 1); i--) {
if (i > x.used) {
continue;
}
/* step 3.1 if xi == yt then set q{i-t-1} to b-1,
* otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
if (x.dp[i] == y.dp[t]) {
q.dp[i - t - 1] = ((((fp_word)1) << DIGIT_BIT) - 1);
} else {
fp_word tmp;
tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT);
tmp |= ((fp_word) x.dp[i - 1]);
tmp /= ((fp_word) y.dp[t]);
q.dp[i - t - 1] = (fp_digit) (tmp);
}
/* while (q{i-t-1} * (yt * b + y{t-1})) >
xi * b**2 + xi-1 * b + xi-2
do q{i-t-1} -= 1;
*/
q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
do {
q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
/* find left hand */
fp_zero (&t1);
t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
t1.dp[1] = y.dp[t];
t1.used = 2;
fp_mul_d (&t1, q.dp[i - t - 1], &t1);
/* find right hand */
t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
t2.dp[2] = x.dp[i];
t2.used = 3;
} while (fp_cmp_mag(&t1, &t2) == FP_GT);
/* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
fp_mul_d (&y, q.dp[i - t - 1], &t1);
fp_lshd (&t1, i - t - 1);
fp_sub (&x, &t1, &x);
/* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
if (x.sign == FP_NEG) {
fp_copy (&y, &t1);
fp_lshd (&t1, i - t - 1);
fp_add (&x, &t1, &x);
q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
}
}
/* now q is the quotient and x is the remainder
* [which we have to normalize]
*/
/* get sign before writing to c */
x.sign = x.used == 0 ? FP_ZPOS : a->sign;
if (c != NULL) {
fp_clamp (&q);
fp_copy (&q, c);
c->sign = neg;
}
if (d != NULL) {
fp_div_2d (&x, norm, &x, NULL);
/* the following is a kludge, essentially we were seeing the right remainder but
with excess digits that should have been zero
*/
for (i = b->used; i < x.used; i++) {
x.dp[i] = 0;
}
fp_clamp(&x);
fp_copy (&x, d);
}
return FP_OKAY;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

53
src/crypto/tfm/fp_div_2.c Normal file
View File

@ -0,0 +1,53 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* b = a/2 */
void fp_div_2(fp_int * a, fp_int * b)
{
int x, oldused;
oldused = b->used;
b->used = a->used;
{
register fp_digit r, rr, *tmpa, *tmpb;
/* source alias */
tmpa = a->dp + b->used - 1;
/* dest alias */
tmpb = b->dp + b->used - 1;
/* carry */
r = 0;
for (x = b->used - 1; x >= 0; x--) {
/* get the carry for the next iteration */
rr = *tmpa & 1;
/* shift the current digit, add in carry and store */
*tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
/* forward carry to next iteration */
r = rr;
}
/* zero excess digits */
tmpb = b->dp + b->used;
for (x = b->used; x < oldused; x++) {
*tmpb++ = 0;
}
}
b->sign = a->sign;
fp_clamp (b);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,79 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* c = a / 2**b */
void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
{
fp_digit D, r, rr;
int x;
fp_int t;
/* if the shift count is <= 0 then we do no work */
if (b <= 0) {
fp_copy (a, c);
if (d != NULL) {
fp_zero (d);
}
return;
}
fp_init(&t);
/* get the remainder */
if (d != NULL) {
fp_mod_2d (a, b, &t);
}
/* copy */
fp_copy(a, c);
/* shift by as many digits in the bit count */
if (b >= (int)DIGIT_BIT) {
fp_rshd (c, b / DIGIT_BIT);
}
/* shift any bit count < DIGIT_BIT */
D = (fp_digit) (b % DIGIT_BIT);
if (D != 0) {
register fp_digit *tmpc, mask, shift;
/* mask */
mask = (((fp_digit)1) << D) - 1;
/* shift for lsb */
shift = DIGIT_BIT - D;
/* alias */
tmpc = c->dp + (c->used - 1);
/* carry */
r = 0;
for (x = c->used - 1; x >= 0; x--) {
/* get the lower bits of this word in a temp */
rr = *tmpc & mask;
/* shift the current word and mix in the carry bits from the previous word */
*tmpc = (*tmpc >> D) | (r << shift);
--tmpc;
/* set the carry to the carry bits of the current word found above */
r = rr;
}
}
fp_clamp (c);
if (d != NULL) {
fp_copy (&t, d);
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

276
src/crypto/tfm/fp_exptmod.c Normal file
View File

@ -0,0 +1,276 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
#ifdef TFM_TIMING_RESISTANT
/* timing resistant montgomery ladder based exptmod
Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", Cryptographic Hardware and Embedded Systems, CHES 2002
*/
static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
{
fp_int R[2];
fp_digit buf, mp;
int err, bitcnt, digidx, y;
/* now setup montgomery */
if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
return err;
}
fp_init(&R[0]);
fp_init(&R[1]);
/* now we need R mod m */
fp_montgomery_calc_normalization (&R[0], P);
/* now set R[0][1] to G * R mod m */
if (fp_cmp_mag(P, G) != FP_GT) {
/* G > P so we reduce it first */
fp_mod(G, P, &R[1]);
} else {
fp_copy(G, &R[1]);
}
fp_mulmod (&R[1], &R[0], P, &R[1]);
/* for j = t-1 downto 0 do
r_!k = R0*R1; r_k = r_k^2
*/
/* set initial mode and bit cnt */
bitcnt = 1;
buf = 0;
digidx = X->used - 1;
for (;;) {
/* grab next digit as required */
if (--bitcnt == 0) {
/* if digidx == -1 we are out of digits so break */
if (digidx == -1) {
break;
}
/* read next digit and reset bitcnt */
buf = X->dp[digidx--];
bitcnt = (int)DIGIT_BIT;
}
/* grab the next msb from the exponent */
y = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
buf <<= (fp_digit)1;
/* do ops */
fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
fp_sqr(&R[y], &R[y]); fp_montgomery_reduce(&R[y], P, mp);
}
fp_montgomery_reduce(&R[0], P, mp);
fp_copy(&R[0], Y);
return FP_OKAY;
}
#else
/* y = g**x (mod b)
* Some restrictions... x must be positive and < b
*/
static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
{
fp_int M[64], res;
fp_digit buf, mp;
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
/* find window size */
x = fp_count_bits (X);
if (x <= 21) {
winsize = 1;
} else if (x <= 36) {
winsize = 3;
} else if (x <= 140) {
winsize = 4;
} else if (x <= 450) {
winsize = 5;
} else {
winsize = 6;
}
/* init M array */
memset(M, 0, sizeof(M));
/* now setup montgomery */
if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
return err;
}
/* setup result */
fp_init(&res);
/* create M table
*
* The M table contains powers of the input base, e.g. M[x] = G^x mod P
*
* The first half of the table is not computed though accept for M[0] and M[1]
*/
/* now we need R mod m */
fp_montgomery_calc_normalization (&res, P);
/* now set M[1] to G * R mod m */
if (fp_cmp_mag(P, G) != FP_GT) {
/* G > P so we reduce it first */
fp_mod(G, P, &M[1]);
} else {
fp_copy(G, &M[1]);
}
fp_mulmod (&M[1], &res, P, &M[1]);
/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
fp_copy (&M[1], &M[1 << (winsize - 1)]);
for (x = 0; x < (winsize - 1); x++) {
fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]);
fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp);
}
/* create upper table */
for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
fp_mul(&M[x - 1], &M[1], &M[x]);
fp_montgomery_reduce(&M[x], P, mp);
}
/* set initial mode and bit cnt */
mode = 0;
bitcnt = 1;
buf = 0;
digidx = X->used - 1;
bitcpy = 0;
bitbuf = 0;
for (;;) {
/* grab next digit as required */
if (--bitcnt == 0) {
/* if digidx == -1 we are out of digits so break */
if (digidx == -1) {
break;
}
/* read next digit and reset bitcnt */
buf = X->dp[digidx--];
bitcnt = (int)DIGIT_BIT;
}
/* grab the next msb from the exponent */
y = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
buf <<= (fp_digit)1;
/* if the bit is zero and mode == 0 then we ignore it
* These represent the leading zero bits before the first 1 bit
* in the exponent. Technically this opt is not required but it
* does lower the # of trivial squaring/reductions used
*/
if (mode == 0 && y == 0) {
continue;
}
/* if the bit is zero and mode == 1 then we square */
if (mode == 1 && y == 0) {
fp_sqr(&res, &res);
fp_montgomery_reduce(&res, P, mp);
continue;
}
/* else we add it to the window */
bitbuf |= (y << (winsize - ++bitcpy));
mode = 2;
if (bitcpy == winsize) {
/* ok window is filled so square as required and multiply */
/* square first */
for (x = 0; x < winsize; x++) {
fp_sqr(&res, &res);
fp_montgomery_reduce(&res, P, mp);
}
/* then multiply */
fp_mul(&res, &M[bitbuf], &res);
fp_montgomery_reduce(&res, P, mp);
/* empty window and reset */
bitcpy = 0;
bitbuf = 0;
mode = 1;
}
}
/* if bits remain then square/multiply */
if (mode == 2 && bitcpy > 0) {
/* square then multiply if the bit is set */
for (x = 0; x < bitcpy; x++) {
fp_sqr(&res, &res);
fp_montgomery_reduce(&res, P, mp);
/* get next bit of the window */
bitbuf <<= 1;
if ((bitbuf & (1 << winsize)) != 0) {
/* then multiply */
fp_mul(&res, &M[1], &res);
fp_montgomery_reduce(&res, P, mp);
}
}
}
/* fixup result if Montgomery reduction is used
* recall that any value in a Montgomery system is
* actually multiplied by R mod n. So we have
* to reduce one more time to cancel out the factor
* of R.
*/
fp_montgomery_reduce(&res, P, mp);
/* swap res with Y */
fp_copy (&res, Y);
return FP_OKAY;
}
#endif
int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
{
fp_int tmp;
int err;
#ifdef TFM_CHECK
/* prevent overflows */
if (P->used > (FP_SIZE/2)) {
return FP_VAL;
}
#endif
/* is X negative? */
if (X->sign == FP_NEG) {
/* yes, copy G and invmod it */
fp_copy(G, &tmp);
if ((err = fp_invmod(&tmp, P, &tmp)) != FP_OKAY) {
return err;
}
X->sign = FP_ZPOS;
err = _fp_exptmod(&tmp, X, P, Y);
if (X != Y) {
X->sign = FP_NEG;
}
return err;
} else {
/* Positive exponent so just exptmod */
return _fp_exptmod(G, X, P, Y);
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

207
src/crypto/tfm/fp_invmod.c Normal file
View File

@ -0,0 +1,207 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
{
fp_int x, y, u, v, A, B, C, D;
int res;
/* b cannot be negative */
if (b->sign == FP_NEG || fp_iszero(b) == 1) {
return FP_VAL;
}
/* init temps */
fp_init(&x); fp_init(&y);
fp_init(&u); fp_init(&v);
fp_init(&A); fp_init(&B);
fp_init(&C); fp_init(&D);
/* x = a, y = b */
if ((res = fp_mod(a, b, &x)) != FP_OKAY) {
return res;
}
fp_copy(b, &y);
/* 2. [modified] if x,y are both even then return an error! */
if (fp_iseven (&x) == 1 && fp_iseven (&y) == 1) {
return FP_VAL;
}
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
fp_copy (&x, &u);
fp_copy (&y, &v);
fp_set (&A, 1);
fp_set (&D, 1);
top:
/* 4. while u is even do */
while (fp_iseven (&u) == 1) {
/* 4.1 u = u/2 */
fp_div_2 (&u, &u);
/* 4.2 if A or B is odd then */
if (fp_isodd (&A) == 1 || fp_isodd (&B) == 1) {
/* A = (A+y)/2, B = (B-x)/2 */
fp_add (&A, &y, &A);
fp_sub (&B, &x, &B);
}
/* A = A/2, B = B/2 */
fp_div_2 (&A, &A);
fp_div_2 (&B, &B);
}
/* 5. while v is even do */
while (fp_iseven (&v) == 1) {
/* 5.1 v = v/2 */
fp_div_2 (&v, &v);
/* 5.2 if C or D is odd then */
if (fp_isodd (&C) == 1 || fp_isodd (&D) == 1) {
/* C = (C+y)/2, D = (D-x)/2 */
fp_add (&C, &y, &C);
fp_sub (&D, &x, &D);
}
/* C = C/2, D = D/2 */
fp_div_2 (&C, &C);
fp_div_2 (&D, &D);
}
/* 6. if u >= v then */
if (fp_cmp (&u, &v) != FP_LT) {
/* u = u - v, A = A - C, B = B - D */
fp_sub (&u, &v, &u);
fp_sub (&A, &C, &A);
fp_sub (&B, &D, &B);
} else {
/* v - v - u, C = C - A, D = D - B */
fp_sub (&v, &u, &v);
fp_sub (&C, &A, &C);
fp_sub (&D, &B, &D);
}
/* if not zero goto step 4 */
if (fp_iszero (&u) == 0)
goto top;
/* now a = C, b = D, gcd == g*v */
/* if v != 1 then there is no inverse */
if (fp_cmp_d (&v, 1) != FP_EQ) {
return FP_VAL;
}
/* if its too low */
while (fp_cmp_d(&C, 0) == FP_LT) {
fp_add(&C, b, &C);
}
/* too big */
while (fp_cmp_mag(&C, b) != FP_LT) {
fp_sub(&C, b, &C);
}
/* C is now the inverse */
fp_copy(&C, c);
return FP_OKAY;
}
/* c = 1/a (mod b) for odd b only */
int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
{
fp_int x, y, u, v, B, D;
int neg;
/* 2. [modified] b must be odd */
if (fp_iseven (b) == FP_YES) {
return fp_invmod_slow(a,b,c);
}
/* init all our temps */
fp_init(&x); fp_init(&y);
fp_init(&u); fp_init(&v);
fp_init(&B); fp_init(&D);
/* x == modulus, y == value to invert */
fp_copy(b, &x);
/* we need y = |a| */
fp_abs(a, &y);
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
fp_copy(&x, &u);
fp_copy(&y, &v);
fp_set (&D, 1);
top:
/* 4. while u is even do */
while (fp_iseven (&u) == FP_YES) {
/* 4.1 u = u/2 */
fp_div_2 (&u, &u);
/* 4.2 if B is odd then */
if (fp_isodd (&B) == FP_YES) {
fp_sub (&B, &x, &B);
}
/* B = B/2 */
fp_div_2 (&B, &B);
}
/* 5. while v is even do */
while (fp_iseven (&v) == FP_YES) {
/* 5.1 v = v/2 */
fp_div_2 (&v, &v);
/* 5.2 if D is odd then */
if (fp_isodd (&D) == FP_YES) {
/* D = (D-x)/2 */
fp_sub (&D, &x, &D);
}
/* D = D/2 */
fp_div_2 (&D, &D);
}
/* 6. if u >= v then */
if (fp_cmp (&u, &v) != FP_LT) {
/* u = u - v, B = B - D */
fp_sub (&u, &v, &u);
fp_sub (&B, &D, &B);
} else {
/* v - v - u, D = D - B */
fp_sub (&v, &u, &v);
fp_sub (&D, &B, &D);
}
/* if not zero goto step 4 */
if (fp_iszero (&u) == FP_NO) {
goto top;
}
/* now a = C, b = D, gcd == g*v */
/* if v != 1 then there is no inverse */
if (fp_cmp_d (&v, 1) != FP_EQ) {
return FP_VAL;
}
/* b is now the inverse */
neg = a->sign;
while (D.sign == FP_NEG) {
fp_add (&D, b, &D);
}
fp_copy (&D, c);
c->sign = neg;
return FP_OKAY;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

38
src/crypto/tfm/fp_lshd.c Normal file
View File

@ -0,0 +1,38 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
void fp_lshd(fp_int *a, int x)
{
int y;
/* move up and truncate as required */
y = MIN(a->used + x - 1, (int)(FP_SIZE-1));
/* store new size */
a->used = y + 1;
/* move digits */
for (; y >= x; y--) {
a->dp[y] = a->dp[y-x];
}
/* zero lower digits */
for (; y >= 0; y--) {
a->dp[y] = 0;
}
/* clamp digits */
fp_clamp(a);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

34
src/crypto/tfm/fp_mod.c Normal file
View File

@ -0,0 +1,34 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* c = a mod b, 0 <= c < b */
int fp_mod(fp_int *a, fp_int *b, fp_int *c)
{
fp_int t;
int err;
fp_zero(&t);
if ((err = fp_div(a, b, NULL, &t)) != FP_OKAY) {
return err;
}
if (t.sign != b->sign) {
fp_add(&t, b, c);
} else {
fp_copy(&t, c);
}
return FP_OKAY;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,42 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* c = a mod 2**d */
void fp_mod_2d(fp_int *a, int b, fp_int *c)
{
int x;
/* zero if count less than or equal to zero */
if (b <= 0) {
fp_zero(c);
return;
}
/* get copy of input */
fp_copy(a, c);
/* if 2**d is larger than we just return */
if (b >= (DIGIT_BIT * a->used)) {
return;
}
/* zero digits above the last digit of the modulus */
for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
c->dp[x] = 0;
}
/* clear the digit that is not completely outside/inside the modulus */
c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b);
fp_clamp (c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,43 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* computes a = B**n mod b without division or multiplication useful for
* normalizing numbers in a Montgomery system.
*/
void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
{
int x, bits;
/* how many bits of last digit does b use */
bits = fp_count_bits (b) % DIGIT_BIT;
if (!bits) bits = DIGIT_BIT;
/* compute A = B^(n-1) * 2^(bits-1) */
if (b->used > 1) {
fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1);
} else {
fp_set(a, 1);
bits = 1;
}
/* now compute C = A * B mod b */
for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
fp_mul_2 (a, a);
if (fp_cmp_mag (a, b) != FP_LT) {
s_fp_sub (a, b, a);
}
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,552 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/******************************************************************/
#if defined(TFM_X86) && !defined(TFM_SSE2)
/* x86-32 code */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
"movl %5,%%eax \n\t" \
"mull %4 \n\t" \
"addl %1,%%eax \n\t" \
"adcl $0,%%edx \n\t" \
"addl %%eax,%0 \n\t" \
"adcl $0,%%edx \n\t" \
"movl %%edx,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
: "%eax", "%edx", "cc")
#define PROPCARRY \
asm( \
"addl %1,%0 \n\t" \
"setb %%al \n\t" \
"movzbl %%al,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy) \
: "%eax", "cc")
/******************************************************************/
#elif defined(TFM_X86_64)
/* x86-64 code */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
"movq %5,%%rax \n\t" \
"mulq %4 \n\t" \
"addq %1,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"addq %%rax,%0 \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rdx,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
: "%rax", "%rdx", "cc")
#define INNERMUL8 \
asm( \
"movq 0(%5),%%rax \n\t" \
"movq 0(%2),%%r10 \n\t" \
"movq 0x8(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x8(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x10(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x10(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x8(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x18(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x18(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x10(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x20(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x20(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x18(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x28(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x28(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x20(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x30(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x30(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x28(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x38(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x38(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x30(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x38(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
:"=r"(_c), "=r"(cy) \
: "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
: "%rax", "%rdx", "%r10", "%r11", "cc")
#define PROPCARRY \
asm( \
"addq %1,%0 \n\t" \
"setb %%al \n\t" \
"movzbq %%al,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy) \
: "%rax", "cc")
/******************************************************************/
#elif defined(TFM_SSE2)
/* SSE2 code (assumes 32-bit fp_digits) */
/* XMM register assignments:
* xmm0 *tmpm++, then Mu * (*tmpm++)
* xmm1 c[x], then Mu
* xmm2 mp
* xmm3 cy
* xmm4 _c[LO]
*/
#define MONT_START \
asm("movd %0,%%mm2"::"g"(mp))
#define MONT_FINI \
asm("emms")
#define LOOP_START \
asm( \
"movd %0,%%mm1 \n\t" \
"pxor %%mm3,%%mm3 \n\t" \
"pmuludq %%mm2,%%mm1 \n\t" \
:: "g"(c[x]))
/* pmuludq on mmx registers does a 32x32->64 multiply. */
#define INNERMUL \
asm( \
"movd %1,%%mm4 \n\t" \
"movd %2,%%mm0 \n\t" \
"paddq %%mm4,%%mm3 \n\t" \
"pmuludq %%mm1,%%mm0 \n\t" \
"paddq %%mm0,%%mm3 \n\t" \
"movd %%mm3,%0 \n\t" \
"psrlq $32, %%mm3 \n\t" \
:"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) );
#define INNERMUL8 \
asm( \
"movd 0(%1),%%mm4 \n\t" \
"movd 0(%2),%%mm0 \n\t" \
"paddq %%mm4,%%mm3 \n\t" \
"pmuludq %%mm1,%%mm0 \n\t" \
"movd 4(%2),%%mm5 \n\t" \
"paddq %%mm0,%%mm3 \n\t" \
"movd 4(%1),%%mm6 \n\t" \
"movd %%mm3,0(%0) \n\t" \
"psrlq $32, %%mm3 \n\t" \
\
"paddq %%mm6,%%mm3 \n\t" \
"pmuludq %%mm1,%%mm5 \n\t" \
"movd 8(%2),%%mm6 \n\t" \
"paddq %%mm5,%%mm3 \n\t" \
"movd 8(%1),%%mm7 \n\t" \
"movd %%mm3,4(%0) \n\t" \
"psrlq $32, %%mm3 \n\t" \
\
"paddq %%mm7,%%mm3 \n\t" \
"pmuludq %%mm1,%%mm6 \n\t" \
"movd 12(%2),%%mm7 \n\t" \
"paddq %%mm6,%%mm3 \n\t" \
"movd 12(%1),%%mm5 \n\t" \
"movd %%mm3,8(%0) \n\t" \
"psrlq $32, %%mm3 \n\t" \
\
"paddq %%mm5,%%mm3 \n\t" \
"pmuludq %%mm1,%%mm7 \n\t" \
"movd 16(%2),%%mm5 \n\t" \
"paddq %%mm7,%%mm3 \n\t" \
"movd 16(%1),%%mm6 \n\t" \
"movd %%mm3,12(%0) \n\t" \
"psrlq $32, %%mm3 \n\t" \
\
"paddq %%mm6,%%mm3 \n\t" \
"pmuludq %%mm1,%%mm5 \n\t" \
"movd 20(%2),%%mm6 \n\t" \
"paddq %%mm5,%%mm3 \n\t" \
"movd 20(%1),%%mm7 \n\t" \
"movd %%mm3,16(%0) \n\t" \
"psrlq $32, %%mm3 \n\t" \
\
"paddq %%mm7,%%mm3 \n\t" \
"pmuludq %%mm1,%%mm6 \n\t" \
"movd 24(%2),%%mm7 \n\t" \
"paddq %%mm6,%%mm3 \n\t" \
"movd 24(%1),%%mm5 \n\t" \
"movd %%mm3,20(%0) \n\t" \
"psrlq $32, %%mm3 \n\t" \
\
"paddq %%mm5,%%mm3 \n\t" \
"pmuludq %%mm1,%%mm7 \n\t" \
"movd 28(%2),%%mm5 \n\t" \
"paddq %%mm7,%%mm3 \n\t" \
"movd 28(%1),%%mm6 \n\t" \
"movd %%mm3,24(%0) \n\t" \
"psrlq $32, %%mm3 \n\t" \
\
"paddq %%mm6,%%mm3 \n\t" \
"pmuludq %%mm1,%%mm5 \n\t" \
"paddq %%mm5,%%mm3 \n\t" \
"movd %%mm3,28(%0) \n\t" \
"psrlq $32, %%mm3 \n\t" \
:"=r"(_c) : "0"(_c), "g"(tmpm) );
#define LOOP_END \
asm( "movd %%mm3,%0 \n" :"=r"(cy))
#define PROPCARRY \
asm( \
"addl %1,%0 \n\t" \
"setb %%al \n\t" \
"movzbl %%al,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy) \
: "%eax", "cc")
/******************************************************************/
#elif defined(TFM_ARM)
/* ARMv4 code */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
" LDR r0,%1 \n\t" \
" ADDS r0,r0,%0 \n\t" \
" MOVCS %0,#1 \n\t" \
" MOVCC %0,#0 \n\t" \
" UMLAL r0,%0,%3,%4 \n\t" \
" STR r0,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","cc");
#define PROPCARRY \
asm( \
" LDR r0,%1 \n\t" \
" ADDS r0,r0,%0 \n\t" \
" STR r0,%1 \n\t" \
" MOVCS %0,#1 \n\t" \
" MOVCC %0,#0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","cc");
/******************************************************************/
#elif defined(TFM_PPC32)
/* PPC32 */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
" mullw 16,%3,%4 \n\t" \
" mulhwu 17,%3,%4 \n\t" \
" addc 16,16,%2 \n\t" \
" addze 17,17 \n\t" \
" addc %1,16,%5 \n\t" \
" addze %0,17 \n\t" \
:"=r"(cy),"=r"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "cc"); ++tmpm;
#define PROPCARRY \
asm( \
" addc %1,%3,%2 \n\t" \
" xor %0,%2,%2 \n\t" \
" addze %0,%2 \n\t" \
:"=r"(cy),"=r"(_c[0]):"0"(cy),"1"(_c[0]):"cc");
/******************************************************************/
#elif defined(TFM_PPC64)
/* PPC64 */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
" mulld r16,%3,%4 \n\t" \
" mulhdu r17,%3,%4 \n\t" \
" addc r16,16,%0 \n\t" \
" addze r17,r17 \n\t" \
" ldx r18,0,%1 \n\t" \
" addc r16,r16,r18 \n\t" \
" addze %0,r17 \n\t" \
" sdx r16,0,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"r16", "r17", "r18","cc"); ++tmpm;
#define PROPCARRY \
asm( \
" ldx r16,0,%1 \n\t" \
" addc r16,r16,%0 \n\t" \
" sdx r16,0,%1 \n\t" \
" xor %0,%0,%0 \n\t" \
" addze %0,%0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","cc");
/******************************************************************/
#elif defined(TFM_AVR32)
/* AVR32 */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
" ld.w r2,%1 \n\t" \
" add r2,%0 \n\t" \
" eor r3,r3 \n\t" \
" acr r3 \n\t" \
" macu.d r2,%3,%4 \n\t" \
" st.w %1,r2 \n\t" \
" mov %0,r3 \n\t" \
:"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3");
#define PROPCARRY \
asm( \
" ld.w r2,%1 \n\t" \
" add r2,%0 \n\t" \
" st.w %1,r2 \n\t" \
" eor %0,%0 \n\t" \
" acr %0 \n\t" \
:"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","cc");
/******************************************************************/
#elif defined(TFM_MIPS)
/* MIPS */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
" multu %3,%4 \n\t" \
" mflo $12 \n\t" \
" mfhi $13 \n\t" \
" addu $12,$12,%0 \n\t" \
" sltu $10,$12,%0 \n\t" \
" addu $13,$13,$10 \n\t" \
" lw $10,%1 \n\t" \
" addu $12,$12,$10 \n\t" \
" sltu $10,$12,$10 \n\t" \
" addu %0,$13,$10 \n\t" \
" sw $12,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"$10","$12","$13"); ++tmpm;
#define PROPCARRY \
asm( \
" lw $10,%1 \n\t" \
" addu $10,$10,%0 \n\t" \
" sw $10,%1 \n\t" \
" sltu %0,$10,%0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"$10");
/******************************************************************/
#else
/* ISO C code */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
do { fp_word t; \
_c[0] = t = ((fp_word)_c[0] + (fp_word)cy) + \
(((fp_word)mu) * ((fp_word)*tmpm++)); \
cy = (t >> DIGIT_BIT); \
} while (0)
#define PROPCARRY \
do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0)
#endif
/******************************************************************/
#define LO 0
#ifdef TFM_SMALL_MONT_SET
#include "fp_mont_small.i"
#endif
/* computes x/R == x (mod N) via Montgomery Reduction */
void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
{
fp_digit c[FP_SIZE], *_c, *tmpm, mu;
int oldused, x, y, pa;
/* bail if too large */
if (m->used > (FP_SIZE/2)) {
return;
}
#ifdef TFM_SMALL_MONT_SET
if (m->used <= 16) {
fp_montgomery_reduce_small(a, m, mp);
return;
}
#endif
#if defined(USE_MEMSET)
/* now zero the buff */
memset(c, 0, sizeof c);
#endif
pa = m->used;
/* copy the input */
oldused = a->used;
for (x = 0; x < oldused; x++) {
c[x] = a->dp[x];
}
#if !defined(USE_MEMSET)
for (; x < 2*pa+1; x++) {
c[x] = 0;
}
#endif
MONT_START;
for (x = 0; x < pa; x++) {
fp_digit cy = 0;
/* get Mu for this round */
LOOP_START;
_c = c + x;
tmpm = m->dp;
y = 0;
#if defined(INNERMUL8)
for (; y < (pa & ~7); y += 8) {
INNERMUL8;
_c += 8;
tmpm += 8;
}
#endif
for (; y < pa; y++) {
INNERMUL;
++_c;
}
LOOP_END;
while (cy) {
PROPCARRY;
++_c;
}
}
/* now copy out */
_c = c + pa;
tmpm = a->dp;
for (x = 0; x < pa+1; x++) {
*tmpm++ = *_c++;
}
for (; x < oldused; x++) {
*tmpm++ = 0;
}
MONT_FINI;
a->used = pa+1;
fp_clamp(a);
/* if A >= m then A = A - m */
if (fp_cmp_mag (a, m) != FP_LT) {
s_fp_sub (a, m, a);
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,48 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* setups the montgomery reduction */
int fp_montgomery_setup(fp_int *a, fp_digit *rho)
{
fp_digit x, b;
/* fast inversion mod 2**k
*
* Based on the fact that
*
* XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
* => 2*X*A - X*X*A*A = 1
* => 2*(1) - (1) = 1
*/
b = a->dp[0];
if ((b & 1) == 0) {
return FP_VAL;
}
x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
x *= 2 - b * x; /* here x*a==1 mod 2**8 */
x *= 2 - b * x; /* here x*a==1 mod 2**16 */
x *= 2 - b * x; /* here x*a==1 mod 2**32 */
#ifdef FP_64BIT
x *= 2 - b * x; /* here x*a==1 mod 2**64 */
#endif
/* rho = -1/m mod b */
*rho = (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x));
return FP_OKAY;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

139
src/crypto/tfm/fp_mul.c Normal file
View File

@ -0,0 +1,139 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* c = a * b */
void fp_mul(fp_int *A, fp_int *B, fp_int *C)
{
int y, old_used;
#if FP_SIZE >= 48
int yy;
#endif
old_used = C->used;
/* call generic if we're out of range */
if (A->used + B->used > FP_SIZE) {
fp_mul_comba(A, B, C);
goto clean;
}
y = MAX(A->used, B->used);
#if FP_SIZE >= 48
yy = MIN(A->used, B->used);
#endif
/* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
of the largest input. We also want to avoid doing excess mults if the
inputs are not close to the next power of two. That is, for example,
if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
*/
#if defined(TFM_MUL3) && FP_SIZE >= 6
if (y <= 3) {
fp_mul_comba3(A,B,C);
goto clean;
}
#endif
#if defined(TFM_MUL4) && FP_SIZE >= 8
if (y == 4) {
fp_mul_comba4(A,B,C);
goto clean;
}
#endif
#if defined(TFM_MUL6) && FP_SIZE >= 12
if (y <= 6) {
fp_mul_comba6(A,B,C);
goto clean;
}
#endif
#if defined(TFM_MUL7) && FP_SIZE >= 14
if (y == 7) {
fp_mul_comba7(A,B,C);
goto clean;
}
#endif
#if defined(TFM_MUL8) && FP_SIZE >= 16
if (y == 8) {
fp_mul_comba8(A,B,C);
goto clean;
}
#endif
#if defined(TFM_MUL9) && FP_SIZE >= 18
if (y == 9) {
fp_mul_comba9(A,B,C);
goto clean;
}
#endif
#if defined(TFM_MUL12) && FP_SIZE >= 24
if (y <= 12) {
fp_mul_comba12(A,B,C);
goto clean;
}
#endif
#if defined(TFM_MUL17) && FP_SIZE >= 34
if (y <= 17) {
fp_mul_comba17(A,B,C);
goto clean;
}
#endif
#if defined(TFM_SMALL_SET) && FP_SIZE >= 32
if (y <= 16) {
fp_mul_comba_small(A,B,C);
goto clean;
}
#endif
#if defined(TFM_MUL20) && FP_SIZE >= 40
if (y <= 20) {
fp_mul_comba20(A,B,C);
goto clean;
}
#endif
#if defined(TFM_MUL24) && FP_SIZE >= 48
if (yy >= 16 && y <= 24) {
fp_mul_comba24(A,B,C);
goto clean;
}
#endif
#if defined(TFM_MUL28) && FP_SIZE >= 56
if (yy >= 20 && y <= 28) {
fp_mul_comba28(A,B,C);
goto clean;
}
#endif
#if defined(TFM_MUL32) && FP_SIZE >= 64
if (yy >= 24 && y <= 32) {
fp_mul_comba32(A,B,C);
goto clean;
}
#endif
#if defined(TFM_MUL48) && FP_SIZE >= 96
if (yy >= 40 && y <= 48) {
fp_mul_comba48(A,B,C);
goto clean;
}
#endif
#if defined(TFM_MUL64) && FP_SIZE >= 128
if (yy >= 56 && y <= 64) {
fp_mul_comba64(A,B,C);
goto clean;
}
#endif
fp_mul_comba(A,B,C);
clean:
for (y = C->used; y < old_used; y++) {
C->dp[y] = 0;
}
}
/* $Source: /cvs/libtom/tomsfastmath/src/mul/fp_mul.c,v $ */
/* $Revision: 1.1 $ */
/* $Date: 2006/12/31 21:25:53 $ */

67
src/crypto/tfm/fp_mul_2.c Normal file
View File

@ -0,0 +1,67 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
void fp_mul_2(fp_int * a, fp_int * b)
{
int x, oldused;
oldused = b->used;
b->used = a->used;
{
register fp_digit r, rr, *tmpa, *tmpb;
/* alias for source */
tmpa = a->dp;
/* alias for dest */
tmpb = b->dp;
/* carry */
r = 0;
for (x = 0; x < a->used; x++) {
/* get what will be the *next* carry bit from the
* MSB of the current digit
*/
rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1));
/* now shift up this digit, add in the carry [from the previous] */
*tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r);
/* copy the carry that would be from the source
* digit into the next iteration
*/
r = rr;
}
/* new leading digit? */
if (r != 0 && b->used != (FP_SIZE-1)) {
/* add a MSB which is always 1 at this point */
*tmpb = 1;
++(b->used);
}
/* now zero any excess digits on the destination
* that we didn't write to
*/
tmpb = b->dp + b->used;
for (x = b->used; x < oldused; x++) {
*tmpb++ = 0;
}
}
b->sign = a->sign;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,47 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* c = a * 2**d */
void fp_mul_2d(fp_int *a, int b, fp_int *c)
{
fp_digit carry, carrytmp, shift;
int x;
/* copy it */
fp_copy(a, c);
/* handle whole digits */
if (b >= DIGIT_BIT) {
fp_lshd(c, b/DIGIT_BIT);
}
b %= DIGIT_BIT;
/* shift the digits */
if (b != 0) {
carry = 0;
shift = DIGIT_BIT - b;
for (x = 0; x < c->used; x++) {
carrytmp = c->dp[x] >> shift;
c->dp[x] = (c->dp[x] << b) + carry;
carry = carrytmp;
}
/* store last carry if room */
if (carry && x < FP_SIZE) {
c->dp[c->used++] = carry;
}
}
fp_clamp(c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,373 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
/* About this file...
*/
#include <tfm_private.h>
#if defined(TFM_PRESCOTT) && defined(TFM_SSE2)
#undef TFM_SSE2
#define TFM_X86
#endif
/* these are the combas. Worship them. */
#if defined(TFM_X86)
/* Generic x86 optimized code */
/* anything you need at the start */
#define COMBA_START
/* clear the chaining variables */
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
/* forward the carry to the next digit */
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
/* store the first sum */
#define COMBA_STORE(x) \
x = c0;
/* store the second sum [carry] */
#define COMBA_STORE2(x) \
x = c1;
/* anything you need at the end */
#define COMBA_FINI
/* this should multiply i and j */
#define MULADD(i, j) \
asm( \
"movl %6,%%eax \n\t" \
"mull %7 \n\t" \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
#elif defined(TFM_X86_64)
/* x86-64 optimized */
/* anything you need at the start */
#define COMBA_START
/* clear the chaining variables */
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
/* forward the carry to the next digit */
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
/* store the first sum */
#define COMBA_STORE(x) \
x = c0;
/* store the second sum [carry] */
#define COMBA_STORE2(x) \
x = c1;
/* anything you need at the end */
#define COMBA_FINI
/* this should multiply i and j */
#define MULADD(i, j) \
asm ( \
"movq %6,%%rax \n\t" \
"mulq %7 \n\t" \
"addq %%rax,%0 \n\t" \
"adcq %%rdx,%1 \n\t" \
"adcq $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
#elif defined(TFM_SSE2)
/* use SSE2 optimizations */
/* anything you need at the start */
#define COMBA_START
/* clear the chaining variables */
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
/* forward the carry to the next digit */
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
/* store the first sum */
#define COMBA_STORE(x) \
x = c0;
/* store the second sum [carry] */
#define COMBA_STORE2(x) \
x = c1;
/* anything you need at the end */
#define COMBA_FINI \
asm("emms");
/* this should multiply i and j */
#define MULADD(i, j) \
asm( \
"movd %6,%%mm0 \n\t" \
"movd %7,%%mm1 \n\t" \
"pmuludq %%mm1,%%mm0\n\t" \
"movd %%mm0,%%eax \n\t" \
"psrlq $32,%%mm0 \n\t" \
"addl %%eax,%0 \n\t" \
"movd %%mm0,%%eax \n\t" \
"adcl %%eax,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","cc");
#elif defined(TFM_ARM)
/* ARM code */
#define COMBA_START
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define COMBA_FINI
#define MULADD(i, j) \
asm( \
" UMULL r0,r1,%6,%7 \n\t" \
" ADDS %0,%0,r0 \n\t" \
" ADCS %1,%1,r1 \n\t" \
" ADC %2,%2,#0 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
#elif defined(TFM_PPC32)
/* For 32-bit PPC */
#define COMBA_START
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define COMBA_FINI
/* untested: will mulhwu change the flags? Docs say no */
#define MULADD(i, j) \
asm( \
" mullw 16,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \
" mulhwu 16,%6,%7 \n\t" \
" adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
#elif defined(TFM_PPC64)
/* For 64-bit PPC */
#define COMBA_START
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define COMBA_FINI
/* untested: will mulhdu change the flags? Docs say no */
#define MULADD(i, j) \
asm( \
" mulld r16,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \
" mulhdu r16,%6,%7 \n\t" \
" adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16");
#elif defined(TFM_AVR32)
/* ISO C code */
#define COMBA_START
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define COMBA_FINI
#define MULADD(i, j) \
asm( \
" mulu.d r2,%6,%7 \n\t"\
" add %0,r2 \n\t"\
" adc %1,%1,r3 \n\t"\
" acr %2 \n\t"\
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
#elif defined(TFM_MIPS)
#define COMBA_START
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define COMBA_FINI
#define MULADD(i, j) \
asm( \
" multu %6,%7 \n\t" \
" mflo $12 \n\t" \
" mfhi $13 \n\t" \
" addu %0,%0,$12 \n\t" \
" sltu $12,%0,$12 \n\t" \
" addu %1,%1,$13 \n\t" \
" sltu $13,%1,$13 \n\t" \
" addu %1,%1,$12 \n\t" \
" sltu $12,%1,$12 \n\t" \
" addu %2,%2,$13 \n\t" \
" addu %2,%2,$12 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13");
#else
/* ISO C code */
#define COMBA_START
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
#define COMBA_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define COMBA_FINI
#define MULADD(i, j) \
do { fp_word t; \
t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); \
c0 = t; \
t = (fp_word)c1 + (t >> DIGIT_BIT); \
c1 = t; \
c2 += t >> DIGIT_BIT; \
} while (0);
#endif
#ifndef TFM_DEFINES
/* generic PxQ multiplier */
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
{
int ix, iy, iz, tx, ty, pa;
fp_digit c0, c1, c2, *tmpx, *tmpy;
fp_int tmp, *dst;
COMBA_START;
COMBA_CLEAR;
/* get size of output and trim */
pa = A->used + B->used;
if (pa >= FP_SIZE) {
pa = FP_SIZE-1;
}
if (A == C || B == C) {
fp_zero(&tmp);
dst = &tmp;
} else {
fp_zero(C);
dst = C;
}
for (ix = 0; ix < pa; ix++) {
/* get offsets into the two bignums */
ty = MIN(ix, B->used-1);
tx = ix - ty;
/* setup temp aliases */
tmpx = A->dp + tx;
tmpy = B->dp + ty;
/* this is the number of times the loop will iterrate, essentially its
while (tx++ < a->used && ty-- >= 0) { ... }
*/
iy = MIN(A->used-tx, ty+1);
/* execute loop */
COMBA_FORWARD;
for (iz = 0; iz < iy; ++iz) {
fp_digit _tmpx = *tmpx++;
fp_digit _tmpy = *tmpy--;
MULADD(_tmpx, _tmpy);
}
/* store term */
COMBA_STORE(dst->dp[ix]);
}
COMBA_FINI;
dst->used = pa;
dst->sign = A->sign ^ B->sign;
fp_clamp(dst);
fp_copy(dst, C);
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,180 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#if defined(TFM_MUL20) && FP_SIZE >= 40
void fp_mul_comba20(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[40];
memcpy(at, A->dp, 20 * sizeof(fp_digit));
memcpy(at+20, B->dp, 20 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
/* 0 */
MULADD(at[0], at[20]);
COMBA_STORE(C->dp[0]);
/* 1 */
COMBA_FORWARD;
MULADD(at[0], at[21]); MULADD(at[1], at[20]);
COMBA_STORE(C->dp[1]);
/* 2 */
COMBA_FORWARD;
MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]);
COMBA_STORE(C->dp[2]);
/* 3 */
COMBA_FORWARD;
MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]);
COMBA_STORE(C->dp[3]);
/* 4 */
COMBA_FORWARD;
MULADD(at[0], at[24]); MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]);
COMBA_STORE(C->dp[4]);
/* 5 */
COMBA_FORWARD;
MULADD(at[0], at[25]); MULADD(at[1], at[24]); MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]);
COMBA_STORE(C->dp[5]);
/* 6 */
COMBA_FORWARD;
MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]); MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]);
COMBA_STORE(C->dp[6]);
/* 7 */
COMBA_FORWARD;
MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]); MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]);
COMBA_STORE(C->dp[7]);
/* 8 */
COMBA_FORWARD;
MULADD(at[0], at[28]); MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]); MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]);
COMBA_STORE(C->dp[8]);
/* 9 */
COMBA_FORWARD;
MULADD(at[0], at[29]); MULADD(at[1], at[28]); MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]); MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]);
COMBA_STORE(C->dp[9]);
/* 10 */
COMBA_FORWARD;
MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]); MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]); MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]);
COMBA_STORE(C->dp[10]);
/* 11 */
COMBA_FORWARD;
MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]); MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]); MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]);
COMBA_STORE(C->dp[11]);
/* 12 */
COMBA_FORWARD;
MULADD(at[0], at[32]); MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]); MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]); MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); MULADD(at[12], at[20]);
COMBA_STORE(C->dp[12]);
/* 13 */
COMBA_FORWARD;
MULADD(at[0], at[33]); MULADD(at[1], at[32]); MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]); MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]); MULADD(at[10], at[23]); MULADD(at[11], at[22]); MULADD(at[12], at[21]); MULADD(at[13], at[20]);
COMBA_STORE(C->dp[13]);
/* 14 */
COMBA_FORWARD;
MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]); MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]); MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]); MULADD(at[11], at[23]); MULADD(at[12], at[22]); MULADD(at[13], at[21]); MULADD(at[14], at[20]);
COMBA_STORE(C->dp[14]);
/* 15 */
COMBA_FORWARD;
MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]); MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]); MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]); MULADD(at[12], at[23]); MULADD(at[13], at[22]); MULADD(at[14], at[21]); MULADD(at[15], at[20]);
COMBA_STORE(C->dp[15]);
/* 16 */
COMBA_FORWARD;
MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]); MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]); MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]); MULADD(at[13], at[23]); MULADD(at[14], at[22]); MULADD(at[15], at[21]); MULADD(at[16], at[20]);
COMBA_STORE(C->dp[16]);
/* 17 */
COMBA_FORWARD;
MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]); MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]); MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]); MULADD(at[14], at[23]); MULADD(at[15], at[22]); MULADD(at[16], at[21]); MULADD(at[17], at[20]);
COMBA_STORE(C->dp[17]);
/* 18 */
COMBA_FORWARD;
MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]); MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]); MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); MULADD(at[14], at[24]); MULADD(at[15], at[23]); MULADD(at[16], at[22]); MULADD(at[17], at[21]); MULADD(at[18], at[20]);
COMBA_STORE(C->dp[18]);
/* 19 */
COMBA_FORWARD;
MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]); MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]); MULADD(at[12], at[27]); MULADD(at[13], at[26]); MULADD(at[14], at[25]); MULADD(at[15], at[24]); MULADD(at[16], at[23]); MULADD(at[17], at[22]); MULADD(at[18], at[21]); MULADD(at[19], at[20]);
COMBA_STORE(C->dp[19]);
/* 20 */
COMBA_FORWARD;
MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]); MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]); MULADD(at[13], at[27]); MULADD(at[14], at[26]); MULADD(at[15], at[25]); MULADD(at[16], at[24]); MULADD(at[17], at[23]); MULADD(at[18], at[22]); MULADD(at[19], at[21]);
COMBA_STORE(C->dp[20]);
/* 21 */
COMBA_FORWARD;
MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]); MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]); MULADD(at[14], at[27]); MULADD(at[15], at[26]); MULADD(at[16], at[25]); MULADD(at[17], at[24]); MULADD(at[18], at[23]); MULADD(at[19], at[22]);
COMBA_STORE(C->dp[21]);
/* 22 */
COMBA_FORWARD;
MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]); MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]); MULADD(at[15], at[27]); MULADD(at[16], at[26]); MULADD(at[17], at[25]); MULADD(at[18], at[24]); MULADD(at[19], at[23]);
COMBA_STORE(C->dp[22]);
/* 23 */
COMBA_FORWARD;
MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]); MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]); MULADD(at[16], at[27]); MULADD(at[17], at[26]); MULADD(at[18], at[25]); MULADD(at[19], at[24]);
COMBA_STORE(C->dp[23]);
/* 24 */
COMBA_FORWARD;
MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]); MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]); MULADD(at[16], at[28]); MULADD(at[17], at[27]); MULADD(at[18], at[26]); MULADD(at[19], at[25]);
COMBA_STORE(C->dp[24]);
/* 25 */
COMBA_FORWARD;
MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]); MULADD(at[14], at[31]); MULADD(at[15], at[30]); MULADD(at[16], at[29]); MULADD(at[17], at[28]); MULADD(at[18], at[27]); MULADD(at[19], at[26]);
COMBA_STORE(C->dp[25]);
/* 26 */
COMBA_FORWARD;
MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]); MULADD(at[15], at[31]); MULADD(at[16], at[30]); MULADD(at[17], at[29]); MULADD(at[18], at[28]); MULADD(at[19], at[27]);
COMBA_STORE(C->dp[26]);
/* 27 */
COMBA_FORWARD;
MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]); MULADD(at[16], at[31]); MULADD(at[17], at[30]); MULADD(at[18], at[29]); MULADD(at[19], at[28]);
COMBA_STORE(C->dp[27]);
/* 28 */
COMBA_FORWARD;
MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]); MULADD(at[17], at[31]); MULADD(at[18], at[30]); MULADD(at[19], at[29]);
COMBA_STORE(C->dp[28]);
/* 29 */
COMBA_FORWARD;
MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]); MULADD(at[18], at[31]); MULADD(at[19], at[30]);
COMBA_STORE(C->dp[29]);
/* 30 */
COMBA_FORWARD;
MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]); MULADD(at[19], at[31]);
COMBA_STORE(C->dp[30]);
/* 31 */
COMBA_FORWARD;
MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]);
COMBA_STORE(C->dp[31]);
/* 32 */
COMBA_FORWARD;
MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]);
COMBA_STORE(C->dp[32]);
/* 33 */
COMBA_FORWARD;
MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]);
COMBA_STORE(C->dp[33]);
/* 34 */
COMBA_FORWARD;
MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]);
COMBA_STORE(C->dp[34]);
/* 35 */
COMBA_FORWARD;
MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]);
COMBA_STORE(C->dp[35]);
/* 36 */
COMBA_FORWARD;
MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]);
COMBA_STORE(C->dp[36]);
/* 37 */
COMBA_FORWARD;
MULADD(at[18], at[39]); MULADD(at[19], at[38]);
COMBA_STORE(C->dp[37]);
/* 38 */
COMBA_FORWARD;
MULADD(at[19], at[39]);
COMBA_STORE(C->dp[38]);
COMBA_STORE2(C->dp[39]);
C->used = 40;
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,212 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#if defined(TFM_MUL24) && FP_SIZE >= 48
void fp_mul_comba24(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[48];
memcpy(at, A->dp, 24 * sizeof(fp_digit));
memcpy(at+24, B->dp, 24 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
/* 0 */
MULADD(at[0], at[24]);
COMBA_STORE(C->dp[0]);
/* 1 */
COMBA_FORWARD;
MULADD(at[0], at[25]); MULADD(at[1], at[24]);
COMBA_STORE(C->dp[1]);
/* 2 */
COMBA_FORWARD;
MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]);
COMBA_STORE(C->dp[2]);
/* 3 */
COMBA_FORWARD;
MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]);
COMBA_STORE(C->dp[3]);
/* 4 */
COMBA_FORWARD;
MULADD(at[0], at[28]); MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]);
COMBA_STORE(C->dp[4]);
/* 5 */
COMBA_FORWARD;
MULADD(at[0], at[29]); MULADD(at[1], at[28]); MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]);
COMBA_STORE(C->dp[5]);
/* 6 */
COMBA_FORWARD;
MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]); MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]);
COMBA_STORE(C->dp[6]);
/* 7 */
COMBA_FORWARD;
MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]); MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]);
COMBA_STORE(C->dp[7]);
/* 8 */
COMBA_FORWARD;
MULADD(at[0], at[32]); MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]); MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]);
COMBA_STORE(C->dp[8]);
/* 9 */
COMBA_FORWARD;
MULADD(at[0], at[33]); MULADD(at[1], at[32]); MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]); MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]);
COMBA_STORE(C->dp[9]);
/* 10 */
COMBA_FORWARD;
MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]); MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]); MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]);
COMBA_STORE(C->dp[10]);
/* 11 */
COMBA_FORWARD;
MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]); MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]); MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]);
COMBA_STORE(C->dp[11]);
/* 12 */
COMBA_FORWARD;
MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]); MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]); MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]);
COMBA_STORE(C->dp[12]);
/* 13 */
COMBA_FORWARD;
MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]); MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]); MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]);
COMBA_STORE(C->dp[13]);
/* 14 */
COMBA_FORWARD;
MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]); MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]); MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); MULADD(at[14], at[24]);
COMBA_STORE(C->dp[14]);
/* 15 */
COMBA_FORWARD;
MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]); MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]); MULADD(at[12], at[27]); MULADD(at[13], at[26]); MULADD(at[14], at[25]); MULADD(at[15], at[24]);
COMBA_STORE(C->dp[15]);
/* 16 */
COMBA_FORWARD;
MULADD(at[0], at[40]); MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]); MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]); MULADD(at[13], at[27]); MULADD(at[14], at[26]); MULADD(at[15], at[25]); MULADD(at[16], at[24]);
COMBA_STORE(C->dp[16]);
/* 17 */
COMBA_FORWARD;
MULADD(at[0], at[41]); MULADD(at[1], at[40]); MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]); MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]); MULADD(at[14], at[27]); MULADD(at[15], at[26]); MULADD(at[16], at[25]); MULADD(at[17], at[24]);
COMBA_STORE(C->dp[17]);
/* 18 */
COMBA_FORWARD;
MULADD(at[0], at[42]); MULADD(at[1], at[41]); MULADD(at[2], at[40]); MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]); MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]); MULADD(at[15], at[27]); MULADD(at[16], at[26]); MULADD(at[17], at[25]); MULADD(at[18], at[24]);
COMBA_STORE(C->dp[18]);
/* 19 */
COMBA_FORWARD;
MULADD(at[0], at[43]); MULADD(at[1], at[42]); MULADD(at[2], at[41]); MULADD(at[3], at[40]); MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]); MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]); MULADD(at[16], at[27]); MULADD(at[17], at[26]); MULADD(at[18], at[25]); MULADD(at[19], at[24]);
COMBA_STORE(C->dp[19]);
/* 20 */
COMBA_FORWARD;
MULADD(at[0], at[44]); MULADD(at[1], at[43]); MULADD(at[2], at[42]); MULADD(at[3], at[41]); MULADD(at[4], at[40]); MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]); MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]); MULADD(at[16], at[28]); MULADD(at[17], at[27]); MULADD(at[18], at[26]); MULADD(at[19], at[25]); MULADD(at[20], at[24]);
COMBA_STORE(C->dp[20]);
/* 21 */
COMBA_FORWARD;
MULADD(at[0], at[45]); MULADD(at[1], at[44]); MULADD(at[2], at[43]); MULADD(at[3], at[42]); MULADD(at[4], at[41]); MULADD(at[5], at[40]); MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]); MULADD(at[14], at[31]); MULADD(at[15], at[30]); MULADD(at[16], at[29]); MULADD(at[17], at[28]); MULADD(at[18], at[27]); MULADD(at[19], at[26]); MULADD(at[20], at[25]); MULADD(at[21], at[24]);
COMBA_STORE(C->dp[21]);
/* 22 */
COMBA_FORWARD;
MULADD(at[0], at[46]); MULADD(at[1], at[45]); MULADD(at[2], at[44]); MULADD(at[3], at[43]); MULADD(at[4], at[42]); MULADD(at[5], at[41]); MULADD(at[6], at[40]); MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]); MULADD(at[15], at[31]); MULADD(at[16], at[30]); MULADD(at[17], at[29]); MULADD(at[18], at[28]); MULADD(at[19], at[27]); MULADD(at[20], at[26]); MULADD(at[21], at[25]); MULADD(at[22], at[24]);
COMBA_STORE(C->dp[22]);
/* 23 */
COMBA_FORWARD;
MULADD(at[0], at[47]); MULADD(at[1], at[46]); MULADD(at[2], at[45]); MULADD(at[3], at[44]); MULADD(at[4], at[43]); MULADD(at[5], at[42]); MULADD(at[6], at[41]); MULADD(at[7], at[40]); MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]); MULADD(at[16], at[31]); MULADD(at[17], at[30]); MULADD(at[18], at[29]); MULADD(at[19], at[28]); MULADD(at[20], at[27]); MULADD(at[21], at[26]); MULADD(at[22], at[25]); MULADD(at[23], at[24]);
COMBA_STORE(C->dp[23]);
/* 24 */
COMBA_FORWARD;
MULADD(at[1], at[47]); MULADD(at[2], at[46]); MULADD(at[3], at[45]); MULADD(at[4], at[44]); MULADD(at[5], at[43]); MULADD(at[6], at[42]); MULADD(at[7], at[41]); MULADD(at[8], at[40]); MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]); MULADD(at[17], at[31]); MULADD(at[18], at[30]); MULADD(at[19], at[29]); MULADD(at[20], at[28]); MULADD(at[21], at[27]); MULADD(at[22], at[26]); MULADD(at[23], at[25]);
COMBA_STORE(C->dp[24]);
/* 25 */
COMBA_FORWARD;
MULADD(at[2], at[47]); MULADD(at[3], at[46]); MULADD(at[4], at[45]); MULADD(at[5], at[44]); MULADD(at[6], at[43]); MULADD(at[7], at[42]); MULADD(at[8], at[41]); MULADD(at[9], at[40]); MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]); MULADD(at[18], at[31]); MULADD(at[19], at[30]); MULADD(at[20], at[29]); MULADD(at[21], at[28]); MULADD(at[22], at[27]); MULADD(at[23], at[26]);
COMBA_STORE(C->dp[25]);
/* 26 */
COMBA_FORWARD;
MULADD(at[3], at[47]); MULADD(at[4], at[46]); MULADD(at[5], at[45]); MULADD(at[6], at[44]); MULADD(at[7], at[43]); MULADD(at[8], at[42]); MULADD(at[9], at[41]); MULADD(at[10], at[40]); MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]); MULADD(at[19], at[31]); MULADD(at[20], at[30]); MULADD(at[21], at[29]); MULADD(at[22], at[28]); MULADD(at[23], at[27]);
COMBA_STORE(C->dp[26]);
/* 27 */
COMBA_FORWARD;
MULADD(at[4], at[47]); MULADD(at[5], at[46]); MULADD(at[6], at[45]); MULADD(at[7], at[44]); MULADD(at[8], at[43]); MULADD(at[9], at[42]); MULADD(at[10], at[41]); MULADD(at[11], at[40]); MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]); MULADD(at[20], at[31]); MULADD(at[21], at[30]); MULADD(at[22], at[29]); MULADD(at[23], at[28]);
COMBA_STORE(C->dp[27]);
/* 28 */
COMBA_FORWARD;
MULADD(at[5], at[47]); MULADD(at[6], at[46]); MULADD(at[7], at[45]); MULADD(at[8], at[44]); MULADD(at[9], at[43]); MULADD(at[10], at[42]); MULADD(at[11], at[41]); MULADD(at[12], at[40]); MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]); MULADD(at[20], at[32]); MULADD(at[21], at[31]); MULADD(at[22], at[30]); MULADD(at[23], at[29]);
COMBA_STORE(C->dp[28]);
/* 29 */
COMBA_FORWARD;
MULADD(at[6], at[47]); MULADD(at[7], at[46]); MULADD(at[8], at[45]); MULADD(at[9], at[44]); MULADD(at[10], at[43]); MULADD(at[11], at[42]); MULADD(at[12], at[41]); MULADD(at[13], at[40]); MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]); MULADD(at[20], at[33]); MULADD(at[21], at[32]); MULADD(at[22], at[31]); MULADD(at[23], at[30]);
COMBA_STORE(C->dp[29]);
/* 30 */
COMBA_FORWARD;
MULADD(at[7], at[47]); MULADD(at[8], at[46]); MULADD(at[9], at[45]); MULADD(at[10], at[44]); MULADD(at[11], at[43]); MULADD(at[12], at[42]); MULADD(at[13], at[41]); MULADD(at[14], at[40]); MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]); MULADD(at[20], at[34]); MULADD(at[21], at[33]); MULADD(at[22], at[32]); MULADD(at[23], at[31]);
COMBA_STORE(C->dp[30]);
/* 31 */
COMBA_FORWARD;
MULADD(at[8], at[47]); MULADD(at[9], at[46]); MULADD(at[10], at[45]); MULADD(at[11], at[44]); MULADD(at[12], at[43]); MULADD(at[13], at[42]); MULADD(at[14], at[41]); MULADD(at[15], at[40]); MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]); MULADD(at[20], at[35]); MULADD(at[21], at[34]); MULADD(at[22], at[33]); MULADD(at[23], at[32]);
COMBA_STORE(C->dp[31]);
/* 32 */
COMBA_FORWARD;
MULADD(at[9], at[47]); MULADD(at[10], at[46]); MULADD(at[11], at[45]); MULADD(at[12], at[44]); MULADD(at[13], at[43]); MULADD(at[14], at[42]); MULADD(at[15], at[41]); MULADD(at[16], at[40]); MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]); MULADD(at[20], at[36]); MULADD(at[21], at[35]); MULADD(at[22], at[34]); MULADD(at[23], at[33]);
COMBA_STORE(C->dp[32]);
/* 33 */
COMBA_FORWARD;
MULADD(at[10], at[47]); MULADD(at[11], at[46]); MULADD(at[12], at[45]); MULADD(at[13], at[44]); MULADD(at[14], at[43]); MULADD(at[15], at[42]); MULADD(at[16], at[41]); MULADD(at[17], at[40]); MULADD(at[18], at[39]); MULADD(at[19], at[38]); MULADD(at[20], at[37]); MULADD(at[21], at[36]); MULADD(at[22], at[35]); MULADD(at[23], at[34]);
COMBA_STORE(C->dp[33]);
/* 34 */
COMBA_FORWARD;
MULADD(at[11], at[47]); MULADD(at[12], at[46]); MULADD(at[13], at[45]); MULADD(at[14], at[44]); MULADD(at[15], at[43]); MULADD(at[16], at[42]); MULADD(at[17], at[41]); MULADD(at[18], at[40]); MULADD(at[19], at[39]); MULADD(at[20], at[38]); MULADD(at[21], at[37]); MULADD(at[22], at[36]); MULADD(at[23], at[35]);
COMBA_STORE(C->dp[34]);
/* 35 */
COMBA_FORWARD;
MULADD(at[12], at[47]); MULADD(at[13], at[46]); MULADD(at[14], at[45]); MULADD(at[15], at[44]); MULADD(at[16], at[43]); MULADD(at[17], at[42]); MULADD(at[18], at[41]); MULADD(at[19], at[40]); MULADD(at[20], at[39]); MULADD(at[21], at[38]); MULADD(at[22], at[37]); MULADD(at[23], at[36]);
COMBA_STORE(C->dp[35]);
/* 36 */
COMBA_FORWARD;
MULADD(at[13], at[47]); MULADD(at[14], at[46]); MULADD(at[15], at[45]); MULADD(at[16], at[44]); MULADD(at[17], at[43]); MULADD(at[18], at[42]); MULADD(at[19], at[41]); MULADD(at[20], at[40]); MULADD(at[21], at[39]); MULADD(at[22], at[38]); MULADD(at[23], at[37]);
COMBA_STORE(C->dp[36]);
/* 37 */
COMBA_FORWARD;
MULADD(at[14], at[47]); MULADD(at[15], at[46]); MULADD(at[16], at[45]); MULADD(at[17], at[44]); MULADD(at[18], at[43]); MULADD(at[19], at[42]); MULADD(at[20], at[41]); MULADD(at[21], at[40]); MULADD(at[22], at[39]); MULADD(at[23], at[38]);
COMBA_STORE(C->dp[37]);
/* 38 */
COMBA_FORWARD;
MULADD(at[15], at[47]); MULADD(at[16], at[46]); MULADD(at[17], at[45]); MULADD(at[18], at[44]); MULADD(at[19], at[43]); MULADD(at[20], at[42]); MULADD(at[21], at[41]); MULADD(at[22], at[40]); MULADD(at[23], at[39]);
COMBA_STORE(C->dp[38]);
/* 39 */
COMBA_FORWARD;
MULADD(at[16], at[47]); MULADD(at[17], at[46]); MULADD(at[18], at[45]); MULADD(at[19], at[44]); MULADD(at[20], at[43]); MULADD(at[21], at[42]); MULADD(at[22], at[41]); MULADD(at[23], at[40]);
COMBA_STORE(C->dp[39]);
/* 40 */
COMBA_FORWARD;
MULADD(at[17], at[47]); MULADD(at[18], at[46]); MULADD(at[19], at[45]); MULADD(at[20], at[44]); MULADD(at[21], at[43]); MULADD(at[22], at[42]); MULADD(at[23], at[41]);
COMBA_STORE(C->dp[40]);
/* 41 */
COMBA_FORWARD;
MULADD(at[18], at[47]); MULADD(at[19], at[46]); MULADD(at[20], at[45]); MULADD(at[21], at[44]); MULADD(at[22], at[43]); MULADD(at[23], at[42]);
COMBA_STORE(C->dp[41]);
/* 42 */
COMBA_FORWARD;
MULADD(at[19], at[47]); MULADD(at[20], at[46]); MULADD(at[21], at[45]); MULADD(at[22], at[44]); MULADD(at[23], at[43]);
COMBA_STORE(C->dp[42]);
/* 43 */
COMBA_FORWARD;
MULADD(at[20], at[47]); MULADD(at[21], at[46]); MULADD(at[22], at[45]); MULADD(at[23], at[44]);
COMBA_STORE(C->dp[43]);
/* 44 */
COMBA_FORWARD;
MULADD(at[21], at[47]); MULADD(at[22], at[46]); MULADD(at[23], at[45]);
COMBA_STORE(C->dp[44]);
/* 45 */
COMBA_FORWARD;
MULADD(at[22], at[47]); MULADD(at[23], at[46]);
COMBA_STORE(C->dp[45]);
/* 46 */
COMBA_FORWARD;
MULADD(at[23], at[47]);
COMBA_STORE(C->dp[46]);
COMBA_STORE2(C->dp[47]);
C->used = 48;
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,244 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#if defined(TFM_MUL28) && FP_SIZE >= 56
void fp_mul_comba28(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[56];
memcpy(at, A->dp, 28 * sizeof(fp_digit));
memcpy(at+28, B->dp, 28 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
/* 0 */
MULADD(at[0], at[28]);
COMBA_STORE(C->dp[0]);
/* 1 */
COMBA_FORWARD;
MULADD(at[0], at[29]); MULADD(at[1], at[28]);
COMBA_STORE(C->dp[1]);
/* 2 */
COMBA_FORWARD;
MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]);
COMBA_STORE(C->dp[2]);
/* 3 */
COMBA_FORWARD;
MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]);
COMBA_STORE(C->dp[3]);
/* 4 */
COMBA_FORWARD;
MULADD(at[0], at[32]); MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]);
COMBA_STORE(C->dp[4]);
/* 5 */
COMBA_FORWARD;
MULADD(at[0], at[33]); MULADD(at[1], at[32]); MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]);
COMBA_STORE(C->dp[5]);
/* 6 */
COMBA_FORWARD;
MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]); MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]);
COMBA_STORE(C->dp[6]);
/* 7 */
COMBA_FORWARD;
MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]); MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]);
COMBA_STORE(C->dp[7]);
/* 8 */
COMBA_FORWARD;
MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]); MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]);
COMBA_STORE(C->dp[8]);
/* 9 */
COMBA_FORWARD;
MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]); MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]);
COMBA_STORE(C->dp[9]);
/* 10 */
COMBA_FORWARD;
MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]); MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]);
COMBA_STORE(C->dp[10]);
/* 11 */
COMBA_FORWARD;
MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]); MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]);
COMBA_STORE(C->dp[11]);
/* 12 */
COMBA_FORWARD;
MULADD(at[0], at[40]); MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]); MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]);
COMBA_STORE(C->dp[12]);
/* 13 */
COMBA_FORWARD;
MULADD(at[0], at[41]); MULADD(at[1], at[40]); MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]); MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]);
COMBA_STORE(C->dp[13]);
/* 14 */
COMBA_FORWARD;
MULADD(at[0], at[42]); MULADD(at[1], at[41]); MULADD(at[2], at[40]); MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]); MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]);
COMBA_STORE(C->dp[14]);
/* 15 */
COMBA_FORWARD;
MULADD(at[0], at[43]); MULADD(at[1], at[42]); MULADD(at[2], at[41]); MULADD(at[3], at[40]); MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]); MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]);
COMBA_STORE(C->dp[15]);
/* 16 */
COMBA_FORWARD;
MULADD(at[0], at[44]); MULADD(at[1], at[43]); MULADD(at[2], at[42]); MULADD(at[3], at[41]); MULADD(at[4], at[40]); MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]); MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]); MULADD(at[16], at[28]);
COMBA_STORE(C->dp[16]);
/* 17 */
COMBA_FORWARD;
MULADD(at[0], at[45]); MULADD(at[1], at[44]); MULADD(at[2], at[43]); MULADD(at[3], at[42]); MULADD(at[4], at[41]); MULADD(at[5], at[40]); MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]); MULADD(at[14], at[31]); MULADD(at[15], at[30]); MULADD(at[16], at[29]); MULADD(at[17], at[28]);
COMBA_STORE(C->dp[17]);
/* 18 */
COMBA_FORWARD;
MULADD(at[0], at[46]); MULADD(at[1], at[45]); MULADD(at[2], at[44]); MULADD(at[3], at[43]); MULADD(at[4], at[42]); MULADD(at[5], at[41]); MULADD(at[6], at[40]); MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]); MULADD(at[15], at[31]); MULADD(at[16], at[30]); MULADD(at[17], at[29]); MULADD(at[18], at[28]);
COMBA_STORE(C->dp[18]);
/* 19 */
COMBA_FORWARD;
MULADD(at[0], at[47]); MULADD(at[1], at[46]); MULADD(at[2], at[45]); MULADD(at[3], at[44]); MULADD(at[4], at[43]); MULADD(at[5], at[42]); MULADD(at[6], at[41]); MULADD(at[7], at[40]); MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]); MULADD(at[16], at[31]); MULADD(at[17], at[30]); MULADD(at[18], at[29]); MULADD(at[19], at[28]);
COMBA_STORE(C->dp[19]);
/* 20 */
COMBA_FORWARD;
MULADD(at[0], at[48]); MULADD(at[1], at[47]); MULADD(at[2], at[46]); MULADD(at[3], at[45]); MULADD(at[4], at[44]); MULADD(at[5], at[43]); MULADD(at[6], at[42]); MULADD(at[7], at[41]); MULADD(at[8], at[40]); MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]); MULADD(at[17], at[31]); MULADD(at[18], at[30]); MULADD(at[19], at[29]); MULADD(at[20], at[28]);
COMBA_STORE(C->dp[20]);
/* 21 */
COMBA_FORWARD;
MULADD(at[0], at[49]); MULADD(at[1], at[48]); MULADD(at[2], at[47]); MULADD(at[3], at[46]); MULADD(at[4], at[45]); MULADD(at[5], at[44]); MULADD(at[6], at[43]); MULADD(at[7], at[42]); MULADD(at[8], at[41]); MULADD(at[9], at[40]); MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]); MULADD(at[18], at[31]); MULADD(at[19], at[30]); MULADD(at[20], at[29]); MULADD(at[21], at[28]);
COMBA_STORE(C->dp[21]);
/* 22 */
COMBA_FORWARD;
MULADD(at[0], at[50]); MULADD(at[1], at[49]); MULADD(at[2], at[48]); MULADD(at[3], at[47]); MULADD(at[4], at[46]); MULADD(at[5], at[45]); MULADD(at[6], at[44]); MULADD(at[7], at[43]); MULADD(at[8], at[42]); MULADD(at[9], at[41]); MULADD(at[10], at[40]); MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]); MULADD(at[19], at[31]); MULADD(at[20], at[30]); MULADD(at[21], at[29]); MULADD(at[22], at[28]);
COMBA_STORE(C->dp[22]);
/* 23 */
COMBA_FORWARD;
MULADD(at[0], at[51]); MULADD(at[1], at[50]); MULADD(at[2], at[49]); MULADD(at[3], at[48]); MULADD(at[4], at[47]); MULADD(at[5], at[46]); MULADD(at[6], at[45]); MULADD(at[7], at[44]); MULADD(at[8], at[43]); MULADD(at[9], at[42]); MULADD(at[10], at[41]); MULADD(at[11], at[40]); MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]); MULADD(at[20], at[31]); MULADD(at[21], at[30]); MULADD(at[22], at[29]); MULADD(at[23], at[28]);
COMBA_STORE(C->dp[23]);
/* 24 */
COMBA_FORWARD;
MULADD(at[0], at[52]); MULADD(at[1], at[51]); MULADD(at[2], at[50]); MULADD(at[3], at[49]); MULADD(at[4], at[48]); MULADD(at[5], at[47]); MULADD(at[6], at[46]); MULADD(at[7], at[45]); MULADD(at[8], at[44]); MULADD(at[9], at[43]); MULADD(at[10], at[42]); MULADD(at[11], at[41]); MULADD(at[12], at[40]); MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]); MULADD(at[20], at[32]); MULADD(at[21], at[31]); MULADD(at[22], at[30]); MULADD(at[23], at[29]); MULADD(at[24], at[28]);
COMBA_STORE(C->dp[24]);
/* 25 */
COMBA_FORWARD;
MULADD(at[0], at[53]); MULADD(at[1], at[52]); MULADD(at[2], at[51]); MULADD(at[3], at[50]); MULADD(at[4], at[49]); MULADD(at[5], at[48]); MULADD(at[6], at[47]); MULADD(at[7], at[46]); MULADD(at[8], at[45]); MULADD(at[9], at[44]); MULADD(at[10], at[43]); MULADD(at[11], at[42]); MULADD(at[12], at[41]); MULADD(at[13], at[40]); MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]); MULADD(at[20], at[33]); MULADD(at[21], at[32]); MULADD(at[22], at[31]); MULADD(at[23], at[30]); MULADD(at[24], at[29]); MULADD(at[25], at[28]);
COMBA_STORE(C->dp[25]);
/* 26 */
COMBA_FORWARD;
MULADD(at[0], at[54]); MULADD(at[1], at[53]); MULADD(at[2], at[52]); MULADD(at[3], at[51]); MULADD(at[4], at[50]); MULADD(at[5], at[49]); MULADD(at[6], at[48]); MULADD(at[7], at[47]); MULADD(at[8], at[46]); MULADD(at[9], at[45]); MULADD(at[10], at[44]); MULADD(at[11], at[43]); MULADD(at[12], at[42]); MULADD(at[13], at[41]); MULADD(at[14], at[40]); MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]); MULADD(at[20], at[34]); MULADD(at[21], at[33]); MULADD(at[22], at[32]); MULADD(at[23], at[31]); MULADD(at[24], at[30]); MULADD(at[25], at[29]); MULADD(at[26], at[28]);
COMBA_STORE(C->dp[26]);
/* 27 */
COMBA_FORWARD;
MULADD(at[0], at[55]); MULADD(at[1], at[54]); MULADD(at[2], at[53]); MULADD(at[3], at[52]); MULADD(at[4], at[51]); MULADD(at[5], at[50]); MULADD(at[6], at[49]); MULADD(at[7], at[48]); MULADD(at[8], at[47]); MULADD(at[9], at[46]); MULADD(at[10], at[45]); MULADD(at[11], at[44]); MULADD(at[12], at[43]); MULADD(at[13], at[42]); MULADD(at[14], at[41]); MULADD(at[15], at[40]); MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]); MULADD(at[20], at[35]); MULADD(at[21], at[34]); MULADD(at[22], at[33]); MULADD(at[23], at[32]); MULADD(at[24], at[31]); MULADD(at[25], at[30]); MULADD(at[26], at[29]); MULADD(at[27], at[28]);
COMBA_STORE(C->dp[27]);
/* 28 */
COMBA_FORWARD;
MULADD(at[1], at[55]); MULADD(at[2], at[54]); MULADD(at[3], at[53]); MULADD(at[4], at[52]); MULADD(at[5], at[51]); MULADD(at[6], at[50]); MULADD(at[7], at[49]); MULADD(at[8], at[48]); MULADD(at[9], at[47]); MULADD(at[10], at[46]); MULADD(at[11], at[45]); MULADD(at[12], at[44]); MULADD(at[13], at[43]); MULADD(at[14], at[42]); MULADD(at[15], at[41]); MULADD(at[16], at[40]); MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]); MULADD(at[20], at[36]); MULADD(at[21], at[35]); MULADD(at[22], at[34]); MULADD(at[23], at[33]); MULADD(at[24], at[32]); MULADD(at[25], at[31]); MULADD(at[26], at[30]); MULADD(at[27], at[29]);
COMBA_STORE(C->dp[28]);
/* 29 */
COMBA_FORWARD;
MULADD(at[2], at[55]); MULADD(at[3], at[54]); MULADD(at[4], at[53]); MULADD(at[5], at[52]); MULADD(at[6], at[51]); MULADD(at[7], at[50]); MULADD(at[8], at[49]); MULADD(at[9], at[48]); MULADD(at[10], at[47]); MULADD(at[11], at[46]); MULADD(at[12], at[45]); MULADD(at[13], at[44]); MULADD(at[14], at[43]); MULADD(at[15], at[42]); MULADD(at[16], at[41]); MULADD(at[17], at[40]); MULADD(at[18], at[39]); MULADD(at[19], at[38]); MULADD(at[20], at[37]); MULADD(at[21], at[36]); MULADD(at[22], at[35]); MULADD(at[23], at[34]); MULADD(at[24], at[33]); MULADD(at[25], at[32]); MULADD(at[26], at[31]); MULADD(at[27], at[30]);
COMBA_STORE(C->dp[29]);
/* 30 */
COMBA_FORWARD;
MULADD(at[3], at[55]); MULADD(at[4], at[54]); MULADD(at[5], at[53]); MULADD(at[6], at[52]); MULADD(at[7], at[51]); MULADD(at[8], at[50]); MULADD(at[9], at[49]); MULADD(at[10], at[48]); MULADD(at[11], at[47]); MULADD(at[12], at[46]); MULADD(at[13], at[45]); MULADD(at[14], at[44]); MULADD(at[15], at[43]); MULADD(at[16], at[42]); MULADD(at[17], at[41]); MULADD(at[18], at[40]); MULADD(at[19], at[39]); MULADD(at[20], at[38]); MULADD(at[21], at[37]); MULADD(at[22], at[36]); MULADD(at[23], at[35]); MULADD(at[24], at[34]); MULADD(at[25], at[33]); MULADD(at[26], at[32]); MULADD(at[27], at[31]);
COMBA_STORE(C->dp[30]);
/* 31 */
COMBA_FORWARD;
MULADD(at[4], at[55]); MULADD(at[5], at[54]); MULADD(at[6], at[53]); MULADD(at[7], at[52]); MULADD(at[8], at[51]); MULADD(at[9], at[50]); MULADD(at[10], at[49]); MULADD(at[11], at[48]); MULADD(at[12], at[47]); MULADD(at[13], at[46]); MULADD(at[14], at[45]); MULADD(at[15], at[44]); MULADD(at[16], at[43]); MULADD(at[17], at[42]); MULADD(at[18], at[41]); MULADD(at[19], at[40]); MULADD(at[20], at[39]); MULADD(at[21], at[38]); MULADD(at[22], at[37]); MULADD(at[23], at[36]); MULADD(at[24], at[35]); MULADD(at[25], at[34]); MULADD(at[26], at[33]); MULADD(at[27], at[32]);
COMBA_STORE(C->dp[31]);
/* 32 */
COMBA_FORWARD;
MULADD(at[5], at[55]); MULADD(at[6], at[54]); MULADD(at[7], at[53]); MULADD(at[8], at[52]); MULADD(at[9], at[51]); MULADD(at[10], at[50]); MULADD(at[11], at[49]); MULADD(at[12], at[48]); MULADD(at[13], at[47]); MULADD(at[14], at[46]); MULADD(at[15], at[45]); MULADD(at[16], at[44]); MULADD(at[17], at[43]); MULADD(at[18], at[42]); MULADD(at[19], at[41]); MULADD(at[20], at[40]); MULADD(at[21], at[39]); MULADD(at[22], at[38]); MULADD(at[23], at[37]); MULADD(at[24], at[36]); MULADD(at[25], at[35]); MULADD(at[26], at[34]); MULADD(at[27], at[33]);
COMBA_STORE(C->dp[32]);
/* 33 */
COMBA_FORWARD;
MULADD(at[6], at[55]); MULADD(at[7], at[54]); MULADD(at[8], at[53]); MULADD(at[9], at[52]); MULADD(at[10], at[51]); MULADD(at[11], at[50]); MULADD(at[12], at[49]); MULADD(at[13], at[48]); MULADD(at[14], at[47]); MULADD(at[15], at[46]); MULADD(at[16], at[45]); MULADD(at[17], at[44]); MULADD(at[18], at[43]); MULADD(at[19], at[42]); MULADD(at[20], at[41]); MULADD(at[21], at[40]); MULADD(at[22], at[39]); MULADD(at[23], at[38]); MULADD(at[24], at[37]); MULADD(at[25], at[36]); MULADD(at[26], at[35]); MULADD(at[27], at[34]);
COMBA_STORE(C->dp[33]);
/* 34 */
COMBA_FORWARD;
MULADD(at[7], at[55]); MULADD(at[8], at[54]); MULADD(at[9], at[53]); MULADD(at[10], at[52]); MULADD(at[11], at[51]); MULADD(at[12], at[50]); MULADD(at[13], at[49]); MULADD(at[14], at[48]); MULADD(at[15], at[47]); MULADD(at[16], at[46]); MULADD(at[17], at[45]); MULADD(at[18], at[44]); MULADD(at[19], at[43]); MULADD(at[20], at[42]); MULADD(at[21], at[41]); MULADD(at[22], at[40]); MULADD(at[23], at[39]); MULADD(at[24], at[38]); MULADD(at[25], at[37]); MULADD(at[26], at[36]); MULADD(at[27], at[35]);
COMBA_STORE(C->dp[34]);
/* 35 */
COMBA_FORWARD;
MULADD(at[8], at[55]); MULADD(at[9], at[54]); MULADD(at[10], at[53]); MULADD(at[11], at[52]); MULADD(at[12], at[51]); MULADD(at[13], at[50]); MULADD(at[14], at[49]); MULADD(at[15], at[48]); MULADD(at[16], at[47]); MULADD(at[17], at[46]); MULADD(at[18], at[45]); MULADD(at[19], at[44]); MULADD(at[20], at[43]); MULADD(at[21], at[42]); MULADD(at[22], at[41]); MULADD(at[23], at[40]); MULADD(at[24], at[39]); MULADD(at[25], at[38]); MULADD(at[26], at[37]); MULADD(at[27], at[36]);
COMBA_STORE(C->dp[35]);
/* 36 */
COMBA_FORWARD;
MULADD(at[9], at[55]); MULADD(at[10], at[54]); MULADD(at[11], at[53]); MULADD(at[12], at[52]); MULADD(at[13], at[51]); MULADD(at[14], at[50]); MULADD(at[15], at[49]); MULADD(at[16], at[48]); MULADD(at[17], at[47]); MULADD(at[18], at[46]); MULADD(at[19], at[45]); MULADD(at[20], at[44]); MULADD(at[21], at[43]); MULADD(at[22], at[42]); MULADD(at[23], at[41]); MULADD(at[24], at[40]); MULADD(at[25], at[39]); MULADD(at[26], at[38]); MULADD(at[27], at[37]);
COMBA_STORE(C->dp[36]);
/* 37 */
COMBA_FORWARD;
MULADD(at[10], at[55]); MULADD(at[11], at[54]); MULADD(at[12], at[53]); MULADD(at[13], at[52]); MULADD(at[14], at[51]); MULADD(at[15], at[50]); MULADD(at[16], at[49]); MULADD(at[17], at[48]); MULADD(at[18], at[47]); MULADD(at[19], at[46]); MULADD(at[20], at[45]); MULADD(at[21], at[44]); MULADD(at[22], at[43]); MULADD(at[23], at[42]); MULADD(at[24], at[41]); MULADD(at[25], at[40]); MULADD(at[26], at[39]); MULADD(at[27], at[38]);
COMBA_STORE(C->dp[37]);
/* 38 */
COMBA_FORWARD;
MULADD(at[11], at[55]); MULADD(at[12], at[54]); MULADD(at[13], at[53]); MULADD(at[14], at[52]); MULADD(at[15], at[51]); MULADD(at[16], at[50]); MULADD(at[17], at[49]); MULADD(at[18], at[48]); MULADD(at[19], at[47]); MULADD(at[20], at[46]); MULADD(at[21], at[45]); MULADD(at[22], at[44]); MULADD(at[23], at[43]); MULADD(at[24], at[42]); MULADD(at[25], at[41]); MULADD(at[26], at[40]); MULADD(at[27], at[39]);
COMBA_STORE(C->dp[38]);
/* 39 */
COMBA_FORWARD;
MULADD(at[12], at[55]); MULADD(at[13], at[54]); MULADD(at[14], at[53]); MULADD(at[15], at[52]); MULADD(at[16], at[51]); MULADD(at[17], at[50]); MULADD(at[18], at[49]); MULADD(at[19], at[48]); MULADD(at[20], at[47]); MULADD(at[21], at[46]); MULADD(at[22], at[45]); MULADD(at[23], at[44]); MULADD(at[24], at[43]); MULADD(at[25], at[42]); MULADD(at[26], at[41]); MULADD(at[27], at[40]);
COMBA_STORE(C->dp[39]);
/* 40 */
COMBA_FORWARD;
MULADD(at[13], at[55]); MULADD(at[14], at[54]); MULADD(at[15], at[53]); MULADD(at[16], at[52]); MULADD(at[17], at[51]); MULADD(at[18], at[50]); MULADD(at[19], at[49]); MULADD(at[20], at[48]); MULADD(at[21], at[47]); MULADD(at[22], at[46]); MULADD(at[23], at[45]); MULADD(at[24], at[44]); MULADD(at[25], at[43]); MULADD(at[26], at[42]); MULADD(at[27], at[41]);
COMBA_STORE(C->dp[40]);
/* 41 */
COMBA_FORWARD;
MULADD(at[14], at[55]); MULADD(at[15], at[54]); MULADD(at[16], at[53]); MULADD(at[17], at[52]); MULADD(at[18], at[51]); MULADD(at[19], at[50]); MULADD(at[20], at[49]); MULADD(at[21], at[48]); MULADD(at[22], at[47]); MULADD(at[23], at[46]); MULADD(at[24], at[45]); MULADD(at[25], at[44]); MULADD(at[26], at[43]); MULADD(at[27], at[42]);
COMBA_STORE(C->dp[41]);
/* 42 */
COMBA_FORWARD;
MULADD(at[15], at[55]); MULADD(at[16], at[54]); MULADD(at[17], at[53]); MULADD(at[18], at[52]); MULADD(at[19], at[51]); MULADD(at[20], at[50]); MULADD(at[21], at[49]); MULADD(at[22], at[48]); MULADD(at[23], at[47]); MULADD(at[24], at[46]); MULADD(at[25], at[45]); MULADD(at[26], at[44]); MULADD(at[27], at[43]);
COMBA_STORE(C->dp[42]);
/* 43 */
COMBA_FORWARD;
MULADD(at[16], at[55]); MULADD(at[17], at[54]); MULADD(at[18], at[53]); MULADD(at[19], at[52]); MULADD(at[20], at[51]); MULADD(at[21], at[50]); MULADD(at[22], at[49]); MULADD(at[23], at[48]); MULADD(at[24], at[47]); MULADD(at[25], at[46]); MULADD(at[26], at[45]); MULADD(at[27], at[44]);
COMBA_STORE(C->dp[43]);
/* 44 */
COMBA_FORWARD;
MULADD(at[17], at[55]); MULADD(at[18], at[54]); MULADD(at[19], at[53]); MULADD(at[20], at[52]); MULADD(at[21], at[51]); MULADD(at[22], at[50]); MULADD(at[23], at[49]); MULADD(at[24], at[48]); MULADD(at[25], at[47]); MULADD(at[26], at[46]); MULADD(at[27], at[45]);
COMBA_STORE(C->dp[44]);
/* 45 */
COMBA_FORWARD;
MULADD(at[18], at[55]); MULADD(at[19], at[54]); MULADD(at[20], at[53]); MULADD(at[21], at[52]); MULADD(at[22], at[51]); MULADD(at[23], at[50]); MULADD(at[24], at[49]); MULADD(at[25], at[48]); MULADD(at[26], at[47]); MULADD(at[27], at[46]);
COMBA_STORE(C->dp[45]);
/* 46 */
COMBA_FORWARD;
MULADD(at[19], at[55]); MULADD(at[20], at[54]); MULADD(at[21], at[53]); MULADD(at[22], at[52]); MULADD(at[23], at[51]); MULADD(at[24], at[50]); MULADD(at[25], at[49]); MULADD(at[26], at[48]); MULADD(at[27], at[47]);
COMBA_STORE(C->dp[46]);
/* 47 */
COMBA_FORWARD;
MULADD(at[20], at[55]); MULADD(at[21], at[54]); MULADD(at[22], at[53]); MULADD(at[23], at[52]); MULADD(at[24], at[51]); MULADD(at[25], at[50]); MULADD(at[26], at[49]); MULADD(at[27], at[48]);
COMBA_STORE(C->dp[47]);
/* 48 */
COMBA_FORWARD;
MULADD(at[21], at[55]); MULADD(at[22], at[54]); MULADD(at[23], at[53]); MULADD(at[24], at[52]); MULADD(at[25], at[51]); MULADD(at[26], at[50]); MULADD(at[27], at[49]);
COMBA_STORE(C->dp[48]);
/* 49 */
COMBA_FORWARD;
MULADD(at[22], at[55]); MULADD(at[23], at[54]); MULADD(at[24], at[53]); MULADD(at[25], at[52]); MULADD(at[26], at[51]); MULADD(at[27], at[50]);
COMBA_STORE(C->dp[49]);
/* 50 */
COMBA_FORWARD;
MULADD(at[23], at[55]); MULADD(at[24], at[54]); MULADD(at[25], at[53]); MULADD(at[26], at[52]); MULADD(at[27], at[51]);
COMBA_STORE(C->dp[50]);
/* 51 */
COMBA_FORWARD;
MULADD(at[24], at[55]); MULADD(at[25], at[54]); MULADD(at[26], at[53]); MULADD(at[27], at[52]);
COMBA_STORE(C->dp[51]);
/* 52 */
COMBA_FORWARD;
MULADD(at[25], at[55]); MULADD(at[26], at[54]); MULADD(at[27], at[53]);
COMBA_STORE(C->dp[52]);
/* 53 */
COMBA_FORWARD;
MULADD(at[26], at[55]); MULADD(at[27], at[54]);
COMBA_STORE(C->dp[53]);
/* 54 */
COMBA_FORWARD;
MULADD(at[27], at[55]);
COMBA_STORE(C->dp[54]);
COMBA_STORE2(C->dp[55]);
C->used = 56;
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,290 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#if defined(TFM_MUL32) && FP_SIZE >= 64
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[64];
int out_size;
out_size = A->used + B->used;
memcpy(at, A->dp, 32 * sizeof(fp_digit));
memcpy(at+32, B->dp, 32 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
/* 0 */
MULADD(at[0], at[32]);
COMBA_STORE(C->dp[0]);
/* 1 */
COMBA_FORWARD;
MULADD(at[0], at[33]); MULADD(at[1], at[32]);
COMBA_STORE(C->dp[1]);
/* 2 */
COMBA_FORWARD;
MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]);
COMBA_STORE(C->dp[2]);
/* 3 */
COMBA_FORWARD;
MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]);
COMBA_STORE(C->dp[3]);
/* 4 */
COMBA_FORWARD;
MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]);
COMBA_STORE(C->dp[4]);
/* 5 */
COMBA_FORWARD;
MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]);
COMBA_STORE(C->dp[5]);
/* 6 */
COMBA_FORWARD;
MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]);
COMBA_STORE(C->dp[6]);
/* 7 */
COMBA_FORWARD;
MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]);
COMBA_STORE(C->dp[7]);
/* 8 */
COMBA_FORWARD;
MULADD(at[0], at[40]); MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]);
COMBA_STORE(C->dp[8]);
/* 9 */
COMBA_FORWARD;
MULADD(at[0], at[41]); MULADD(at[1], at[40]); MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]);
COMBA_STORE(C->dp[9]);
/* 10 */
COMBA_FORWARD;
MULADD(at[0], at[42]); MULADD(at[1], at[41]); MULADD(at[2], at[40]); MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]);
COMBA_STORE(C->dp[10]);
/* 11 */
COMBA_FORWARD;
MULADD(at[0], at[43]); MULADD(at[1], at[42]); MULADD(at[2], at[41]); MULADD(at[3], at[40]); MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]);
COMBA_STORE(C->dp[11]);
/* 12 */
COMBA_FORWARD;
MULADD(at[0], at[44]); MULADD(at[1], at[43]); MULADD(at[2], at[42]); MULADD(at[3], at[41]); MULADD(at[4], at[40]); MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]);
COMBA_STORE(C->dp[12]);
/* 13 */
COMBA_FORWARD;
MULADD(at[0], at[45]); MULADD(at[1], at[44]); MULADD(at[2], at[43]); MULADD(at[3], at[42]); MULADD(at[4], at[41]); MULADD(at[5], at[40]); MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]);
COMBA_STORE(C->dp[13]);
/* 14 */
COMBA_FORWARD;
MULADD(at[0], at[46]); MULADD(at[1], at[45]); MULADD(at[2], at[44]); MULADD(at[3], at[43]); MULADD(at[4], at[42]); MULADD(at[5], at[41]); MULADD(at[6], at[40]); MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]);
COMBA_STORE(C->dp[14]);
/* 15 */
COMBA_FORWARD;
MULADD(at[0], at[47]); MULADD(at[1], at[46]); MULADD(at[2], at[45]); MULADD(at[3], at[44]); MULADD(at[4], at[43]); MULADD(at[5], at[42]); MULADD(at[6], at[41]); MULADD(at[7], at[40]); MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]);
COMBA_STORE(C->dp[15]);
/* 16 */
COMBA_FORWARD;
MULADD(at[0], at[48]); MULADD(at[1], at[47]); MULADD(at[2], at[46]); MULADD(at[3], at[45]); MULADD(at[4], at[44]); MULADD(at[5], at[43]); MULADD(at[6], at[42]); MULADD(at[7], at[41]); MULADD(at[8], at[40]); MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]);
COMBA_STORE(C->dp[16]);
/* 17 */
COMBA_FORWARD;
MULADD(at[0], at[49]); MULADD(at[1], at[48]); MULADD(at[2], at[47]); MULADD(at[3], at[46]); MULADD(at[4], at[45]); MULADD(at[5], at[44]); MULADD(at[6], at[43]); MULADD(at[7], at[42]); MULADD(at[8], at[41]); MULADD(at[9], at[40]); MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]);
COMBA_STORE(C->dp[17]);
/* 18 */
COMBA_FORWARD;
MULADD(at[0], at[50]); MULADD(at[1], at[49]); MULADD(at[2], at[48]); MULADD(at[3], at[47]); MULADD(at[4], at[46]); MULADD(at[5], at[45]); MULADD(at[6], at[44]); MULADD(at[7], at[43]); MULADD(at[8], at[42]); MULADD(at[9], at[41]); MULADD(at[10], at[40]); MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]);
COMBA_STORE(C->dp[18]);
/* 19 */
COMBA_FORWARD;
MULADD(at[0], at[51]); MULADD(at[1], at[50]); MULADD(at[2], at[49]); MULADD(at[3], at[48]); MULADD(at[4], at[47]); MULADD(at[5], at[46]); MULADD(at[6], at[45]); MULADD(at[7], at[44]); MULADD(at[8], at[43]); MULADD(at[9], at[42]); MULADD(at[10], at[41]); MULADD(at[11], at[40]); MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]);
COMBA_STORE(C->dp[19]);
/* 20 */
COMBA_FORWARD;
MULADD(at[0], at[52]); MULADD(at[1], at[51]); MULADD(at[2], at[50]); MULADD(at[3], at[49]); MULADD(at[4], at[48]); MULADD(at[5], at[47]); MULADD(at[6], at[46]); MULADD(at[7], at[45]); MULADD(at[8], at[44]); MULADD(at[9], at[43]); MULADD(at[10], at[42]); MULADD(at[11], at[41]); MULADD(at[12], at[40]); MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]); MULADD(at[20], at[32]);
COMBA_STORE(C->dp[20]);
/* 21 */
COMBA_FORWARD;
MULADD(at[0], at[53]); MULADD(at[1], at[52]); MULADD(at[2], at[51]); MULADD(at[3], at[50]); MULADD(at[4], at[49]); MULADD(at[5], at[48]); MULADD(at[6], at[47]); MULADD(at[7], at[46]); MULADD(at[8], at[45]); MULADD(at[9], at[44]); MULADD(at[10], at[43]); MULADD(at[11], at[42]); MULADD(at[12], at[41]); MULADD(at[13], at[40]); MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]); MULADD(at[20], at[33]); MULADD(at[21], at[32]);
COMBA_STORE(C->dp[21]);
/* 22 */
COMBA_FORWARD;
MULADD(at[0], at[54]); MULADD(at[1], at[53]); MULADD(at[2], at[52]); MULADD(at[3], at[51]); MULADD(at[4], at[50]); MULADD(at[5], at[49]); MULADD(at[6], at[48]); MULADD(at[7], at[47]); MULADD(at[8], at[46]); MULADD(at[9], at[45]); MULADD(at[10], at[44]); MULADD(at[11], at[43]); MULADD(at[12], at[42]); MULADD(at[13], at[41]); MULADD(at[14], at[40]); MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]); MULADD(at[20], at[34]); MULADD(at[21], at[33]); MULADD(at[22], at[32]);
COMBA_STORE(C->dp[22]);
/* 23 */
COMBA_FORWARD;
MULADD(at[0], at[55]); MULADD(at[1], at[54]); MULADD(at[2], at[53]); MULADD(at[3], at[52]); MULADD(at[4], at[51]); MULADD(at[5], at[50]); MULADD(at[6], at[49]); MULADD(at[7], at[48]); MULADD(at[8], at[47]); MULADD(at[9], at[46]); MULADD(at[10], at[45]); MULADD(at[11], at[44]); MULADD(at[12], at[43]); MULADD(at[13], at[42]); MULADD(at[14], at[41]); MULADD(at[15], at[40]); MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]); MULADD(at[20], at[35]); MULADD(at[21], at[34]); MULADD(at[22], at[33]); MULADD(at[23], at[32]);
COMBA_STORE(C->dp[23]);
/* 24 */
COMBA_FORWARD;
MULADD(at[0], at[56]); MULADD(at[1], at[55]); MULADD(at[2], at[54]); MULADD(at[3], at[53]); MULADD(at[4], at[52]); MULADD(at[5], at[51]); MULADD(at[6], at[50]); MULADD(at[7], at[49]); MULADD(at[8], at[48]); MULADD(at[9], at[47]); MULADD(at[10], at[46]); MULADD(at[11], at[45]); MULADD(at[12], at[44]); MULADD(at[13], at[43]); MULADD(at[14], at[42]); MULADD(at[15], at[41]); MULADD(at[16], at[40]); MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]); MULADD(at[20], at[36]); MULADD(at[21], at[35]); MULADD(at[22], at[34]); MULADD(at[23], at[33]); MULADD(at[24], at[32]);
COMBA_STORE(C->dp[24]);
/* 25 */
COMBA_FORWARD;
MULADD(at[0], at[57]); MULADD(at[1], at[56]); MULADD(at[2], at[55]); MULADD(at[3], at[54]); MULADD(at[4], at[53]); MULADD(at[5], at[52]); MULADD(at[6], at[51]); MULADD(at[7], at[50]); MULADD(at[8], at[49]); MULADD(at[9], at[48]); MULADD(at[10], at[47]); MULADD(at[11], at[46]); MULADD(at[12], at[45]); MULADD(at[13], at[44]); MULADD(at[14], at[43]); MULADD(at[15], at[42]); MULADD(at[16], at[41]); MULADD(at[17], at[40]); MULADD(at[18], at[39]); MULADD(at[19], at[38]); MULADD(at[20], at[37]); MULADD(at[21], at[36]); MULADD(at[22], at[35]); MULADD(at[23], at[34]); MULADD(at[24], at[33]); MULADD(at[25], at[32]);
COMBA_STORE(C->dp[25]);
/* 26 */
COMBA_FORWARD;
MULADD(at[0], at[58]); MULADD(at[1], at[57]); MULADD(at[2], at[56]); MULADD(at[3], at[55]); MULADD(at[4], at[54]); MULADD(at[5], at[53]); MULADD(at[6], at[52]); MULADD(at[7], at[51]); MULADD(at[8], at[50]); MULADD(at[9], at[49]); MULADD(at[10], at[48]); MULADD(at[11], at[47]); MULADD(at[12], at[46]); MULADD(at[13], at[45]); MULADD(at[14], at[44]); MULADD(at[15], at[43]); MULADD(at[16], at[42]); MULADD(at[17], at[41]); MULADD(at[18], at[40]); MULADD(at[19], at[39]); MULADD(at[20], at[38]); MULADD(at[21], at[37]); MULADD(at[22], at[36]); MULADD(at[23], at[35]); MULADD(at[24], at[34]); MULADD(at[25], at[33]); MULADD(at[26], at[32]);
COMBA_STORE(C->dp[26]);
/* 27 */
COMBA_FORWARD;
MULADD(at[0], at[59]); MULADD(at[1], at[58]); MULADD(at[2], at[57]); MULADD(at[3], at[56]); MULADD(at[4], at[55]); MULADD(at[5], at[54]); MULADD(at[6], at[53]); MULADD(at[7], at[52]); MULADD(at[8], at[51]); MULADD(at[9], at[50]); MULADD(at[10], at[49]); MULADD(at[11], at[48]); MULADD(at[12], at[47]); MULADD(at[13], at[46]); MULADD(at[14], at[45]); MULADD(at[15], at[44]); MULADD(at[16], at[43]); MULADD(at[17], at[42]); MULADD(at[18], at[41]); MULADD(at[19], at[40]); MULADD(at[20], at[39]); MULADD(at[21], at[38]); MULADD(at[22], at[37]); MULADD(at[23], at[36]); MULADD(at[24], at[35]); MULADD(at[25], at[34]); MULADD(at[26], at[33]); MULADD(at[27], at[32]);
COMBA_STORE(C->dp[27]);
/* 28 */
COMBA_FORWARD;
MULADD(at[0], at[60]); MULADD(at[1], at[59]); MULADD(at[2], at[58]); MULADD(at[3], at[57]); MULADD(at[4], at[56]); MULADD(at[5], at[55]); MULADD(at[6], at[54]); MULADD(at[7], at[53]); MULADD(at[8], at[52]); MULADD(at[9], at[51]); MULADD(at[10], at[50]); MULADD(at[11], at[49]); MULADD(at[12], at[48]); MULADD(at[13], at[47]); MULADD(at[14], at[46]); MULADD(at[15], at[45]); MULADD(at[16], at[44]); MULADD(at[17], at[43]); MULADD(at[18], at[42]); MULADD(at[19], at[41]); MULADD(at[20], at[40]); MULADD(at[21], at[39]); MULADD(at[22], at[38]); MULADD(at[23], at[37]); MULADD(at[24], at[36]); MULADD(at[25], at[35]); MULADD(at[26], at[34]); MULADD(at[27], at[33]); MULADD(at[28], at[32]);
COMBA_STORE(C->dp[28]);
/* 29 */
COMBA_FORWARD;
MULADD(at[0], at[61]); MULADD(at[1], at[60]); MULADD(at[2], at[59]); MULADD(at[3], at[58]); MULADD(at[4], at[57]); MULADD(at[5], at[56]); MULADD(at[6], at[55]); MULADD(at[7], at[54]); MULADD(at[8], at[53]); MULADD(at[9], at[52]); MULADD(at[10], at[51]); MULADD(at[11], at[50]); MULADD(at[12], at[49]); MULADD(at[13], at[48]); MULADD(at[14], at[47]); MULADD(at[15], at[46]); MULADD(at[16], at[45]); MULADD(at[17], at[44]); MULADD(at[18], at[43]); MULADD(at[19], at[42]); MULADD(at[20], at[41]); MULADD(at[21], at[40]); MULADD(at[22], at[39]); MULADD(at[23], at[38]); MULADD(at[24], at[37]); MULADD(at[25], at[36]); MULADD(at[26], at[35]); MULADD(at[27], at[34]); MULADD(at[28], at[33]); MULADD(at[29], at[32]);
COMBA_STORE(C->dp[29]);
/* 30 */
COMBA_FORWARD;
MULADD(at[0], at[62]); MULADD(at[1], at[61]); MULADD(at[2], at[60]); MULADD(at[3], at[59]); MULADD(at[4], at[58]); MULADD(at[5], at[57]); MULADD(at[6], at[56]); MULADD(at[7], at[55]); MULADD(at[8], at[54]); MULADD(at[9], at[53]); MULADD(at[10], at[52]); MULADD(at[11], at[51]); MULADD(at[12], at[50]); MULADD(at[13], at[49]); MULADD(at[14], at[48]); MULADD(at[15], at[47]); MULADD(at[16], at[46]); MULADD(at[17], at[45]); MULADD(at[18], at[44]); MULADD(at[19], at[43]); MULADD(at[20], at[42]); MULADD(at[21], at[41]); MULADD(at[22], at[40]); MULADD(at[23], at[39]); MULADD(at[24], at[38]); MULADD(at[25], at[37]); MULADD(at[26], at[36]); MULADD(at[27], at[35]); MULADD(at[28], at[34]); MULADD(at[29], at[33]); MULADD(at[30], at[32]);
COMBA_STORE(C->dp[30]);
/* 31 */
COMBA_FORWARD;
MULADD(at[0], at[63]); MULADD(at[1], at[62]); MULADD(at[2], at[61]); MULADD(at[3], at[60]); MULADD(at[4], at[59]); MULADD(at[5], at[58]); MULADD(at[6], at[57]); MULADD(at[7], at[56]); MULADD(at[8], at[55]); MULADD(at[9], at[54]); MULADD(at[10], at[53]); MULADD(at[11], at[52]); MULADD(at[12], at[51]); MULADD(at[13], at[50]); MULADD(at[14], at[49]); MULADD(at[15], at[48]); MULADD(at[16], at[47]); MULADD(at[17], at[46]); MULADD(at[18], at[45]); MULADD(at[19], at[44]); MULADD(at[20], at[43]); MULADD(at[21], at[42]); MULADD(at[22], at[41]); MULADD(at[23], at[40]); MULADD(at[24], at[39]); MULADD(at[25], at[38]); MULADD(at[26], at[37]); MULADD(at[27], at[36]); MULADD(at[28], at[35]); MULADD(at[29], at[34]); MULADD(at[30], at[33]); MULADD(at[31], at[32]);
COMBA_STORE(C->dp[31]);
/* 32 */
COMBA_FORWARD;
MULADD(at[1], at[63]); MULADD(at[2], at[62]); MULADD(at[3], at[61]); MULADD(at[4], at[60]); MULADD(at[5], at[59]); MULADD(at[6], at[58]); MULADD(at[7], at[57]); MULADD(at[8], at[56]); MULADD(at[9], at[55]); MULADD(at[10], at[54]); MULADD(at[11], at[53]); MULADD(at[12], at[52]); MULADD(at[13], at[51]); MULADD(at[14], at[50]); MULADD(at[15], at[49]); MULADD(at[16], at[48]); MULADD(at[17], at[47]); MULADD(at[18], at[46]); MULADD(at[19], at[45]); MULADD(at[20], at[44]); MULADD(at[21], at[43]); MULADD(at[22], at[42]); MULADD(at[23], at[41]); MULADD(at[24], at[40]); MULADD(at[25], at[39]); MULADD(at[26], at[38]); MULADD(at[27], at[37]); MULADD(at[28], at[36]); MULADD(at[29], at[35]); MULADD(at[30], at[34]); MULADD(at[31], at[33]);
COMBA_STORE(C->dp[32]);
/* 33 */
COMBA_FORWARD;
MULADD(at[2], at[63]); MULADD(at[3], at[62]); MULADD(at[4], at[61]); MULADD(at[5], at[60]); MULADD(at[6], at[59]); MULADD(at[7], at[58]); MULADD(at[8], at[57]); MULADD(at[9], at[56]); MULADD(at[10], at[55]); MULADD(at[11], at[54]); MULADD(at[12], at[53]); MULADD(at[13], at[52]); MULADD(at[14], at[51]); MULADD(at[15], at[50]); MULADD(at[16], at[49]); MULADD(at[17], at[48]); MULADD(at[18], at[47]); MULADD(at[19], at[46]); MULADD(at[20], at[45]); MULADD(at[21], at[44]); MULADD(at[22], at[43]); MULADD(at[23], at[42]); MULADD(at[24], at[41]); MULADD(at[25], at[40]); MULADD(at[26], at[39]); MULADD(at[27], at[38]); MULADD(at[28], at[37]); MULADD(at[29], at[36]); MULADD(at[30], at[35]); MULADD(at[31], at[34]);
COMBA_STORE(C->dp[33]);
/* 34 */
COMBA_FORWARD;
MULADD(at[3], at[63]); MULADD(at[4], at[62]); MULADD(at[5], at[61]); MULADD(at[6], at[60]); MULADD(at[7], at[59]); MULADD(at[8], at[58]); MULADD(at[9], at[57]); MULADD(at[10], at[56]); MULADD(at[11], at[55]); MULADD(at[12], at[54]); MULADD(at[13], at[53]); MULADD(at[14], at[52]); MULADD(at[15], at[51]); MULADD(at[16], at[50]); MULADD(at[17], at[49]); MULADD(at[18], at[48]); MULADD(at[19], at[47]); MULADD(at[20], at[46]); MULADD(at[21], at[45]); MULADD(at[22], at[44]); MULADD(at[23], at[43]); MULADD(at[24], at[42]); MULADD(at[25], at[41]); MULADD(at[26], at[40]); MULADD(at[27], at[39]); MULADD(at[28], at[38]); MULADD(at[29], at[37]); MULADD(at[30], at[36]); MULADD(at[31], at[35]);
COMBA_STORE(C->dp[34]);
/* 35 */
COMBA_FORWARD;
MULADD(at[4], at[63]); MULADD(at[5], at[62]); MULADD(at[6], at[61]); MULADD(at[7], at[60]); MULADD(at[8], at[59]); MULADD(at[9], at[58]); MULADD(at[10], at[57]); MULADD(at[11], at[56]); MULADD(at[12], at[55]); MULADD(at[13], at[54]); MULADD(at[14], at[53]); MULADD(at[15], at[52]); MULADD(at[16], at[51]); MULADD(at[17], at[50]); MULADD(at[18], at[49]); MULADD(at[19], at[48]); MULADD(at[20], at[47]); MULADD(at[21], at[46]); MULADD(at[22], at[45]); MULADD(at[23], at[44]); MULADD(at[24], at[43]); MULADD(at[25], at[42]); MULADD(at[26], at[41]); MULADD(at[27], at[40]); MULADD(at[28], at[39]); MULADD(at[29], at[38]); MULADD(at[30], at[37]); MULADD(at[31], at[36]);
COMBA_STORE(C->dp[35]);
/* 36 */
COMBA_FORWARD;
MULADD(at[5], at[63]); MULADD(at[6], at[62]); MULADD(at[7], at[61]); MULADD(at[8], at[60]); MULADD(at[9], at[59]); MULADD(at[10], at[58]); MULADD(at[11], at[57]); MULADD(at[12], at[56]); MULADD(at[13], at[55]); MULADD(at[14], at[54]); MULADD(at[15], at[53]); MULADD(at[16], at[52]); MULADD(at[17], at[51]); MULADD(at[18], at[50]); MULADD(at[19], at[49]); MULADD(at[20], at[48]); MULADD(at[21], at[47]); MULADD(at[22], at[46]); MULADD(at[23], at[45]); MULADD(at[24], at[44]); MULADD(at[25], at[43]); MULADD(at[26], at[42]); MULADD(at[27], at[41]); MULADD(at[28], at[40]); MULADD(at[29], at[39]); MULADD(at[30], at[38]); MULADD(at[31], at[37]);
COMBA_STORE(C->dp[36]);
/* 37 */
COMBA_FORWARD;
MULADD(at[6], at[63]); MULADD(at[7], at[62]); MULADD(at[8], at[61]); MULADD(at[9], at[60]); MULADD(at[10], at[59]); MULADD(at[11], at[58]); MULADD(at[12], at[57]); MULADD(at[13], at[56]); MULADD(at[14], at[55]); MULADD(at[15], at[54]); MULADD(at[16], at[53]); MULADD(at[17], at[52]); MULADD(at[18], at[51]); MULADD(at[19], at[50]); MULADD(at[20], at[49]); MULADD(at[21], at[48]); MULADD(at[22], at[47]); MULADD(at[23], at[46]); MULADD(at[24], at[45]); MULADD(at[25], at[44]); MULADD(at[26], at[43]); MULADD(at[27], at[42]); MULADD(at[28], at[41]); MULADD(at[29], at[40]); MULADD(at[30], at[39]); MULADD(at[31], at[38]);
COMBA_STORE(C->dp[37]);
/* 38 */
COMBA_FORWARD;
MULADD(at[7], at[63]); MULADD(at[8], at[62]); MULADD(at[9], at[61]); MULADD(at[10], at[60]); MULADD(at[11], at[59]); MULADD(at[12], at[58]); MULADD(at[13], at[57]); MULADD(at[14], at[56]); MULADD(at[15], at[55]); MULADD(at[16], at[54]); MULADD(at[17], at[53]); MULADD(at[18], at[52]); MULADD(at[19], at[51]); MULADD(at[20], at[50]); MULADD(at[21], at[49]); MULADD(at[22], at[48]); MULADD(at[23], at[47]); MULADD(at[24], at[46]); MULADD(at[25], at[45]); MULADD(at[26], at[44]); MULADD(at[27], at[43]); MULADD(at[28], at[42]); MULADD(at[29], at[41]); MULADD(at[30], at[40]); MULADD(at[31], at[39]);
COMBA_STORE(C->dp[38]);
/* early out at 40 digits, 40*32==1280, or two 640 bit operands */
if (out_size <= 40) { COMBA_STORE2(C->dp[39]); C->used = 40; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return; }
/* 39 */
COMBA_FORWARD;
MULADD(at[8], at[63]); MULADD(at[9], at[62]); MULADD(at[10], at[61]); MULADD(at[11], at[60]); MULADD(at[12], at[59]); MULADD(at[13], at[58]); MULADD(at[14], at[57]); MULADD(at[15], at[56]); MULADD(at[16], at[55]); MULADD(at[17], at[54]); MULADD(at[18], at[53]); MULADD(at[19], at[52]); MULADD(at[20], at[51]); MULADD(at[21], at[50]); MULADD(at[22], at[49]); MULADD(at[23], at[48]); MULADD(at[24], at[47]); MULADD(at[25], at[46]); MULADD(at[26], at[45]); MULADD(at[27], at[44]); MULADD(at[28], at[43]); MULADD(at[29], at[42]); MULADD(at[30], at[41]); MULADD(at[31], at[40]);
COMBA_STORE(C->dp[39]);
/* 40 */
COMBA_FORWARD;
MULADD(at[9], at[63]); MULADD(at[10], at[62]); MULADD(at[11], at[61]); MULADD(at[12], at[60]); MULADD(at[13], at[59]); MULADD(at[14], at[58]); MULADD(at[15], at[57]); MULADD(at[16], at[56]); MULADD(at[17], at[55]); MULADD(at[18], at[54]); MULADD(at[19], at[53]); MULADD(at[20], at[52]); MULADD(at[21], at[51]); MULADD(at[22], at[50]); MULADD(at[23], at[49]); MULADD(at[24], at[48]); MULADD(at[25], at[47]); MULADD(at[26], at[46]); MULADD(at[27], at[45]); MULADD(at[28], at[44]); MULADD(at[29], at[43]); MULADD(at[30], at[42]); MULADD(at[31], at[41]);
COMBA_STORE(C->dp[40]);
/* 41 */
COMBA_FORWARD;
MULADD(at[10], at[63]); MULADD(at[11], at[62]); MULADD(at[12], at[61]); MULADD(at[13], at[60]); MULADD(at[14], at[59]); MULADD(at[15], at[58]); MULADD(at[16], at[57]); MULADD(at[17], at[56]); MULADD(at[18], at[55]); MULADD(at[19], at[54]); MULADD(at[20], at[53]); MULADD(at[21], at[52]); MULADD(at[22], at[51]); MULADD(at[23], at[50]); MULADD(at[24], at[49]); MULADD(at[25], at[48]); MULADD(at[26], at[47]); MULADD(at[27], at[46]); MULADD(at[28], at[45]); MULADD(at[29], at[44]); MULADD(at[30], at[43]); MULADD(at[31], at[42]);
COMBA_STORE(C->dp[41]);
/* 42 */
COMBA_FORWARD;
MULADD(at[11], at[63]); MULADD(at[12], at[62]); MULADD(at[13], at[61]); MULADD(at[14], at[60]); MULADD(at[15], at[59]); MULADD(at[16], at[58]); MULADD(at[17], at[57]); MULADD(at[18], at[56]); MULADD(at[19], at[55]); MULADD(at[20], at[54]); MULADD(at[21], at[53]); MULADD(at[22], at[52]); MULADD(at[23], at[51]); MULADD(at[24], at[50]); MULADD(at[25], at[49]); MULADD(at[26], at[48]); MULADD(at[27], at[47]); MULADD(at[28], at[46]); MULADD(at[29], at[45]); MULADD(at[30], at[44]); MULADD(at[31], at[43]);
COMBA_STORE(C->dp[42]);
/* 43 */
COMBA_FORWARD;
MULADD(at[12], at[63]); MULADD(at[13], at[62]); MULADD(at[14], at[61]); MULADD(at[15], at[60]); MULADD(at[16], at[59]); MULADD(at[17], at[58]); MULADD(at[18], at[57]); MULADD(at[19], at[56]); MULADD(at[20], at[55]); MULADD(at[21], at[54]); MULADD(at[22], at[53]); MULADD(at[23], at[52]); MULADD(at[24], at[51]); MULADD(at[25], at[50]); MULADD(at[26], at[49]); MULADD(at[27], at[48]); MULADD(at[28], at[47]); MULADD(at[29], at[46]); MULADD(at[30], at[45]); MULADD(at[31], at[44]);
COMBA_STORE(C->dp[43]);
/* 44 */
COMBA_FORWARD;
MULADD(at[13], at[63]); MULADD(at[14], at[62]); MULADD(at[15], at[61]); MULADD(at[16], at[60]); MULADD(at[17], at[59]); MULADD(at[18], at[58]); MULADD(at[19], at[57]); MULADD(at[20], at[56]); MULADD(at[21], at[55]); MULADD(at[22], at[54]); MULADD(at[23], at[53]); MULADD(at[24], at[52]); MULADD(at[25], at[51]); MULADD(at[26], at[50]); MULADD(at[27], at[49]); MULADD(at[28], at[48]); MULADD(at[29], at[47]); MULADD(at[30], at[46]); MULADD(at[31], at[45]);
COMBA_STORE(C->dp[44]);
/* 45 */
COMBA_FORWARD;
MULADD(at[14], at[63]); MULADD(at[15], at[62]); MULADD(at[16], at[61]); MULADD(at[17], at[60]); MULADD(at[18], at[59]); MULADD(at[19], at[58]); MULADD(at[20], at[57]); MULADD(at[21], at[56]); MULADD(at[22], at[55]); MULADD(at[23], at[54]); MULADD(at[24], at[53]); MULADD(at[25], at[52]); MULADD(at[26], at[51]); MULADD(at[27], at[50]); MULADD(at[28], at[49]); MULADD(at[29], at[48]); MULADD(at[30], at[47]); MULADD(at[31], at[46]);
COMBA_STORE(C->dp[45]);
/* 46 */
COMBA_FORWARD;
MULADD(at[15], at[63]); MULADD(at[16], at[62]); MULADD(at[17], at[61]); MULADD(at[18], at[60]); MULADD(at[19], at[59]); MULADD(at[20], at[58]); MULADD(at[21], at[57]); MULADD(at[22], at[56]); MULADD(at[23], at[55]); MULADD(at[24], at[54]); MULADD(at[25], at[53]); MULADD(at[26], at[52]); MULADD(at[27], at[51]); MULADD(at[28], at[50]); MULADD(at[29], at[49]); MULADD(at[30], at[48]); MULADD(at[31], at[47]);
COMBA_STORE(C->dp[46]);
/* early out at 48 digits, 48*32==1536, or two 768 bit operands */
if (out_size <= 48) { COMBA_STORE2(C->dp[47]); C->used = 48; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return; }
/* 47 */
COMBA_FORWARD;
MULADD(at[16], at[63]); MULADD(at[17], at[62]); MULADD(at[18], at[61]); MULADD(at[19], at[60]); MULADD(at[20], at[59]); MULADD(at[21], at[58]); MULADD(at[22], at[57]); MULADD(at[23], at[56]); MULADD(at[24], at[55]); MULADD(at[25], at[54]); MULADD(at[26], at[53]); MULADD(at[27], at[52]); MULADD(at[28], at[51]); MULADD(at[29], at[50]); MULADD(at[30], at[49]); MULADD(at[31], at[48]);
COMBA_STORE(C->dp[47]);
/* 48 */
COMBA_FORWARD;
MULADD(at[17], at[63]); MULADD(at[18], at[62]); MULADD(at[19], at[61]); MULADD(at[20], at[60]); MULADD(at[21], at[59]); MULADD(at[22], at[58]); MULADD(at[23], at[57]); MULADD(at[24], at[56]); MULADD(at[25], at[55]); MULADD(at[26], at[54]); MULADD(at[27], at[53]); MULADD(at[28], at[52]); MULADD(at[29], at[51]); MULADD(at[30], at[50]); MULADD(at[31], at[49]);
COMBA_STORE(C->dp[48]);
/* 49 */
COMBA_FORWARD;
MULADD(at[18], at[63]); MULADD(at[19], at[62]); MULADD(at[20], at[61]); MULADD(at[21], at[60]); MULADD(at[22], at[59]); MULADD(at[23], at[58]); MULADD(at[24], at[57]); MULADD(at[25], at[56]); MULADD(at[26], at[55]); MULADD(at[27], at[54]); MULADD(at[28], at[53]); MULADD(at[29], at[52]); MULADD(at[30], at[51]); MULADD(at[31], at[50]);
COMBA_STORE(C->dp[49]);
/* 50 */
COMBA_FORWARD;
MULADD(at[19], at[63]); MULADD(at[20], at[62]); MULADD(at[21], at[61]); MULADD(at[22], at[60]); MULADD(at[23], at[59]); MULADD(at[24], at[58]); MULADD(at[25], at[57]); MULADD(at[26], at[56]); MULADD(at[27], at[55]); MULADD(at[28], at[54]); MULADD(at[29], at[53]); MULADD(at[30], at[52]); MULADD(at[31], at[51]);
COMBA_STORE(C->dp[50]);
/* 51 */
COMBA_FORWARD;
MULADD(at[20], at[63]); MULADD(at[21], at[62]); MULADD(at[22], at[61]); MULADD(at[23], at[60]); MULADD(at[24], at[59]); MULADD(at[25], at[58]); MULADD(at[26], at[57]); MULADD(at[27], at[56]); MULADD(at[28], at[55]); MULADD(at[29], at[54]); MULADD(at[30], at[53]); MULADD(at[31], at[52]);
COMBA_STORE(C->dp[51]);
/* 52 */
COMBA_FORWARD;
MULADD(at[21], at[63]); MULADD(at[22], at[62]); MULADD(at[23], at[61]); MULADD(at[24], at[60]); MULADD(at[25], at[59]); MULADD(at[26], at[58]); MULADD(at[27], at[57]); MULADD(at[28], at[56]); MULADD(at[29], at[55]); MULADD(at[30], at[54]); MULADD(at[31], at[53]);
COMBA_STORE(C->dp[52]);
/* 53 */
COMBA_FORWARD;
MULADD(at[22], at[63]); MULADD(at[23], at[62]); MULADD(at[24], at[61]); MULADD(at[25], at[60]); MULADD(at[26], at[59]); MULADD(at[27], at[58]); MULADD(at[28], at[57]); MULADD(at[29], at[56]); MULADD(at[30], at[55]); MULADD(at[31], at[54]);
COMBA_STORE(C->dp[53]);
/* 54 */
COMBA_FORWARD;
MULADD(at[23], at[63]); MULADD(at[24], at[62]); MULADD(at[25], at[61]); MULADD(at[26], at[60]); MULADD(at[27], at[59]); MULADD(at[28], at[58]); MULADD(at[29], at[57]); MULADD(at[30], at[56]); MULADD(at[31], at[55]);
COMBA_STORE(C->dp[54]);
/* early out at 56 digits, 56*32==1792, or two 896 bit operands */
if (out_size <= 56) { COMBA_STORE2(C->dp[55]); C->used = 56; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return; }
/* 55 */
COMBA_FORWARD;
MULADD(at[24], at[63]); MULADD(at[25], at[62]); MULADD(at[26], at[61]); MULADD(at[27], at[60]); MULADD(at[28], at[59]); MULADD(at[29], at[58]); MULADD(at[30], at[57]); MULADD(at[31], at[56]);
COMBA_STORE(C->dp[55]);
/* 56 */
COMBA_FORWARD;
MULADD(at[25], at[63]); MULADD(at[26], at[62]); MULADD(at[27], at[61]); MULADD(at[28], at[60]); MULADD(at[29], at[59]); MULADD(at[30], at[58]); MULADD(at[31], at[57]);
COMBA_STORE(C->dp[56]);
/* 57 */
COMBA_FORWARD;
MULADD(at[26], at[63]); MULADD(at[27], at[62]); MULADD(at[28], at[61]); MULADD(at[29], at[60]); MULADD(at[30], at[59]); MULADD(at[31], at[58]);
COMBA_STORE(C->dp[57]);
/* 58 */
COMBA_FORWARD;
MULADD(at[27], at[63]); MULADD(at[28], at[62]); MULADD(at[29], at[61]); MULADD(at[30], at[60]); MULADD(at[31], at[59]);
COMBA_STORE(C->dp[58]);
/* 59 */
COMBA_FORWARD;
MULADD(at[28], at[63]); MULADD(at[29], at[62]); MULADD(at[30], at[61]); MULADD(at[31], at[60]);
COMBA_STORE(C->dp[59]);
/* 60 */
COMBA_FORWARD;
MULADD(at[29], at[63]); MULADD(at[30], at[62]); MULADD(at[31], at[61]);
COMBA_STORE(C->dp[60]);
/* 61 */
COMBA_FORWARD;
MULADD(at[30], at[63]); MULADD(at[31], at[62]);
COMBA_STORE(C->dp[61]);
/* 62 */
COMBA_FORWARD;
MULADD(at[31], at[63]);
COMBA_STORE(C->dp[62]);
COMBA_STORE2(C->dp[63]);
C->used = 64;
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

File diff suppressed because it is too large Load Diff

40
src/crypto/tfm/fp_mul_d.c Normal file
View File

@ -0,0 +1,40 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* c = a * b */
void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
{
fp_word w;
int x, oldused;
oldused = c->used;
c->used = a->used;
c->sign = a->sign;
w = 0;
for (x = 0; x < a->used; x++) {
w = ((fp_word)a->dp[x]) * ((fp_word)b) + w;
c->dp[x] = (fp_digit)w;
w = w >> DIGIT_BIT;
}
if (w != 0 && (a->used != FP_SIZE)) {
c->dp[c->used++] = w;
++x;
}
for (; x < oldused; x++) {
c->dp[x] = 0;
}
fp_clamp(c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,22 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* d = a * b (mod c) */
int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
{
fp_int tmp;
fp_zero(&tmp);
fp_mul(a, b, &tmp);
return fp_mod(&tmp, c, d);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,70 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
void fp_read_unsigned_bin(fp_int *a, const unsigned char *b, int c)
{
/* zero the int */
fp_zero (a);
if ((unsigned)c > (FP_SIZE * sizeof(fp_digit))) {
int excess = c - (FP_SIZE * sizeof(fp_digit));
c -= excess;
b += excess;
}
/* If we know the endianness of this architecture, and we're using
32-bit fp_digits, we can optimize this */
#if (defined(ENDIAN_LITTLE) || defined(ENDIAN_BIG)) && !defined(FP_64BIT)
/* But not for both simultaneously */
#if defined(ENDIAN_LITTLE) && defined(ENDIAN_BIG)
#error Both ENDIAN_LITTLE and ENDIAN_BIG defined.
#endif
{
unsigned char *pd = (unsigned char *)a->dp;
a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
/* read the bytes in */
#ifdef ENDIAN_BIG
{
/* Use Duff's device to unroll the loop. */
int idx = (c - 1) & ~3;
switch (c % 4) {
case 0: do { pd[idx+0] = *b++;
case 3: pd[idx+1] = *b++;
case 2: pd[idx+2] = *b++;
case 1: pd[idx+3] = *b++;
idx -= 4;
} while ((c -= 4) > 0);
}
}
#else
for (c -= 1; c >= 0; c -= 1) {
pd[c] = *b++;
}
#endif
}
#else
/* read the bytes in */
for (; c > 0; c--) {
fp_mul_2d (a, 8, a);
a->dp[0] |= *b++;
if (a->used == 0) {
a->used = 1;
}
}
#endif
fp_clamp (a);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,31 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* reverse an array, used for radix code */
void fp_reverse (unsigned char *s, int len)
{
int ix, iy;
unsigned char t;
ix = 0;
iy = len - 1;
while (ix < iy) {
t = s[ix];
s[ix] = s[iy];
s[iy] = t;
++ix;
--iy;
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

40
src/crypto/tfm/fp_rshd.c Normal file
View File

@ -0,0 +1,40 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
void fp_rshd(fp_int *a, int x)
{
int y;
/* too many digits just zero and return */
if (x >= a->used) {
fp_zero(a);
return;
}
/* shift */
for (y = 0; y < a->used - x; y++) {
a->dp[y] = a->dp[y+x];
}
/* zero rest */
for (; y < a->used; y++) {
a->dp[y] = 0;
}
/* decrement count */
a->used -= x;
fp_clamp(a);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

21
src/crypto/tfm/fp_set.c Normal file
View File

@ -0,0 +1,21 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
void fp_set(fp_int *a, fp_digit b)
{
fp_zero(a);
a->dp[0] = b;
a->used = a->dp[0] ? 1 : 0;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

126
src/crypto/tfm/fp_sqr.c Normal file
View File

@ -0,0 +1,126 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* b = a*a */
void fp_sqr(fp_int *A, fp_int *B)
{
int y, old_used;
old_used = B->used;
/* call generic if we're out of range */
if (A->used + A->used > FP_SIZE) {
fp_sqr_comba(A, B);
goto clean;
}
y = A->used;
#if defined(TFM_SQR3) && FP_SIZE >= 6
if (y <= 3) {
fp_sqr_comba3(A,B);
goto clean;
}
#endif
#if defined(TFM_SQR4) && FP_SIZE >= 8
if (y == 4) {
fp_sqr_comba4(A,B);
goto clean;
}
#endif
#if defined(TFM_SQR6) && FP_SIZE >= 12
if (y <= 6) {
fp_sqr_comba6(A,B);
goto clean;
}
#endif
#if defined(TFM_SQR7) && FP_SIZE >= 14
if (y == 7) {
fp_sqr_comba7(A,B);
goto clean;
}
#endif
#if defined(TFM_SQR8) && FP_SIZE >= 16
if (y == 8) {
fp_sqr_comba8(A,B);
goto clean;
}
#endif
#if defined(TFM_SQR9) && FP_SIZE >= 18
if (y == 9) {
fp_sqr_comba9(A,B);
goto clean;
}
#endif
#if defined(TFM_SQR12) && FP_SIZE >= 24
if (y <= 12) {
fp_sqr_comba12(A,B);
goto clean;
}
#endif
#if defined(TFM_SQR17) && FP_SIZE >= 34
if (y <= 17) {
fp_sqr_comba17(A,B);
goto clean;
}
#endif
#if defined(TFM_SMALL_SET)
if (y <= 16) {
fp_sqr_comba_small(A,B);
goto clean;
}
#endif
#if defined(TFM_SQR20) && FP_SIZE >= 40
if (y <= 20) {
fp_sqr_comba20(A,B);
goto clean;
}
#endif
#if defined(TFM_SQR24) && FP_SIZE >= 48
if (y <= 24) {
fp_sqr_comba24(A,B);
goto clean;
}
#endif
#if defined(TFM_SQR28) && FP_SIZE >= 56
if (y <= 28) {
fp_sqr_comba28(A,B);
goto clean;
}
#endif
#if defined(TFM_SQR32) && FP_SIZE >= 64
if (y <= 32) {
fp_sqr_comba32(A,B);
goto clean;
}
#endif
#if defined(TFM_SQR48) && FP_SIZE >= 96
if (y <= 48) {
fp_sqr_comba48(A,B);
goto clean;
}
#endif
#if defined(TFM_SQR64) && FP_SIZE >= 128
if (y <= 64) {
fp_sqr_comba64(A,B);
goto clean;
}
#endif
fp_sqr_comba(A, B);
clean:
for (y = B->used; y < old_used; y++) {
B->dp[y] = 0;
}
}
/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr.c,v $ */
/* $Revision: 1.1 $ */
/* $Date: 2006/12/31 21:25:53 $ */

View File

@ -0,0 +1,677 @@
/*
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
#if defined(TFM_PRESCOTT) && defined(TFM_SSE2)
#undef TFM_SSE2
#define TFM_X86
#endif
#if defined(TFM_X86)
/* x86-32 optimized */
#define COMBA_START
#define CLEAR_CARRY \
c0 = c1 = c2 = 0;
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define CARRY_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_FINI
#define SQRADD(i, j) \
asm( \
"movl %6,%%eax \n\t" \
"mull %%eax \n\t" \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc");
#define SQRADD2(i, j) \
asm( \
"movl %6,%%eax \n\t" \
"mull %7 \n\t" \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
#define SQRADDSC(i, j) \
asm( \
"movl %3,%%eax \n\t" \
"mull %4 \n\t" \
"movl %%eax,%0 \n\t" \
"movl %%edx,%1 \n\t" \
"xorl %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","cc");
#define SQRADDAC(i, j) \
asm( \
"movl %6,%%eax \n\t" \
"mull %7 \n\t" \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc");
#define SQRADDDB \
asm( \
"addl %6,%0 \n\t" \
"adcl %7,%1 \n\t" \
"adcl %8,%2 \n\t" \
"addl %6,%0 \n\t" \
"adcl %7,%1 \n\t" \
"adcl %8,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
#elif defined(TFM_X86_64)
/* x86-64 optimized */
#define COMBA_START
#define CLEAR_CARRY \
c0 = c1 = c2 = 0;
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define CARRY_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_FINI
#define SQRADD(i, j) \
asm( \
"movq %6,%%rax \n\t" \
"mulq %%rax \n\t" \
"addq %%rax,%0 \n\t" \
"adcq %%rdx,%1 \n\t" \
"adcq $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "x"(i) :"%rax","%rdx","cc");
#define SQRADD2(i, j) \
asm( \
"movq %6,%%rax \n\t" \
"mulq %7 \n\t" \
"addq %%rax,%0 \n\t" \
"adcq %%rdx,%1 \n\t" \
"adcq $0,%2 \n\t" \
"addq %%rax,%0 \n\t" \
"adcq %%rdx,%1 \n\t" \
"adcq $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
#define SQRADDSC(i, j) \
asm( \
"movq %3,%%rax \n\t" \
"mulq %4 \n\t" \
"movq %%rax,%0 \n\t" \
"movq %%rdx,%1 \n\t" \
"xorq %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%rax","%rdx","cc");
#define SQRADDAC(i, j) \
asm( \
"movq %6,%%rax \n\t" \
"mulq %7 \n\t" \
"addq %%rax,%0 \n\t" \
"adcq %%rdx,%1 \n\t" \
"adcq $0,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
#define SQRADDDB \
asm( \
"addq %6,%0 \n\t" \
"adcq %7,%1 \n\t" \
"adcq %8,%2 \n\t" \
"addq %6,%0 \n\t" \
"adcq %7,%1 \n\t" \
"adcq %8,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
#elif defined(TFM_SSE2)
/* SSE2 Optimized */
#define COMBA_START
#define CLEAR_CARRY \
c0 = c1 = c2 = 0;
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define CARRY_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_FINI \
asm("emms");
#define SQRADD(i, j) \
asm( \
"movd %6,%%mm0 \n\t" \
"pmuludq %%mm0,%%mm0\n\t" \
"movd %%mm0,%%eax \n\t" \
"psrlq $32,%%mm0 \n\t" \
"addl %%eax,%0 \n\t" \
"movd %%mm0,%%eax \n\t" \
"adcl %%eax,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc");
#define SQRADD2(i, j) \
asm( \
"movd %6,%%mm0 \n\t" \
"movd %7,%%mm1 \n\t" \
"pmuludq %%mm1,%%mm0\n\t" \
"movd %%mm0,%%eax \n\t" \
"psrlq $32,%%mm0 \n\t" \
"movd %%mm0,%%edx \n\t" \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
#define SQRADDSC(i, j) \
asm( \
"movd %6,%%mm0 \n\t" \
"movd %7,%%mm1 \n\t" \
"pmuludq %%mm1,%%mm0\n\t" \
"movd %%mm0,%0 \n\t" \
"psrlq $32,%%mm0 \n\t" \
"movd %%mm0,%1 \n\t" \
"xorl %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j));
#define SQRADDAC(i, j) \
asm( \
"movd %6,%%mm0 \n\t" \
"movd %7,%%mm1 \n\t" \
"pmuludq %%mm1,%%mm0\n\t" \
"movd %%mm0,%%eax \n\t" \
"psrlq $32,%%mm0 \n\t" \
"movd %%mm0,%%edx \n\t" \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","cc");
#define SQRADDDB \
asm( \
"addl %6,%0 \n\t" \
"adcl %7,%1 \n\t" \
"adcl %8,%2 \n\t" \
"addl %6,%0 \n\t" \
"adcl %7,%1 \n\t" \
"adcl %8,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
#elif defined(TFM_ARM)
/* ARM code */
#define COMBA_START
#define CLEAR_CARRY \
c0 = c1 = c2 = 0;
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define CARRY_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_FINI
/* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \
asm( \
" UMULL r0,r1,%6,%6 \n\t" \
" ADDS %0,%0,r0 \n\t" \
" ADCS %1,%1,r1 \n\t" \
" ADC %2,%2,#0 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc");
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
asm( \
" UMULL r0,r1,%6,%7 \n\t" \
" ADDS %0,%0,r0 \n\t" \
" ADCS %1,%1,r1 \n\t" \
" ADC %2,%2,#0 \n\t" \
" ADDS %0,%0,r0 \n\t" \
" ADCS %1,%1,r1 \n\t" \
" ADC %2,%2,#0 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
#define SQRADDSC(i, j) \
asm( \
" UMULL %0,%1,%6,%7 \n\t" \
" SUB %2,%2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "cc");
#define SQRADDAC(i, j) \
asm( \
" UMULL r0,r1,%6,%7 \n\t" \
" ADDS %0,%0,r0 \n\t" \
" ADCS %1,%1,r1 \n\t" \
" ADC %2,%2,#0 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc");
#define SQRADDDB \
asm( \
" ADDS %0,%0,%3 \n\t" \
" ADCS %1,%1,%4 \n\t" \
" ADC %2,%2,%5 \n\t" \
" ADDS %0,%0,%3 \n\t" \
" ADCS %1,%1,%4 \n\t" \
" ADC %2,%2,%5 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
#elif defined(TFM_PPC32)
/* PPC32 */
#define COMBA_START
#define CLEAR_CARRY \
c0 = c1 = c2 = 0;
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define CARRY_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_FINI
/* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \
asm( \
" mullw 16,%6,%6 \n\t" \
" addc %0,%0,16 \n\t" \
" mulhwu 16,%6,%6 \n\t" \
" adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc");
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
asm( \
" mullw 16,%6,%7 \n\t" \
" mulhwu 17,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \
" adde %1,%1,17 \n\t" \
" addze %2,%2 \n\t" \
" addc %0,%0,16 \n\t" \
" adde %1,%1,17 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
#define SQRADDSC(i, j) \
asm( \
" mullw %0,%6,%7 \n\t" \
" mulhwu %1,%6,%7 \n\t" \
" xor %2,%2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
#define SQRADDAC(i, j) \
asm( \
" mullw 16,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \
" mulhwu 16,%6,%7 \n\t" \
" adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
#define SQRADDDB \
asm( \
" addc %0,%0,%3 \n\t" \
" adde %1,%1,%4 \n\t" \
" adde %2,%2,%5 \n\t" \
" addc %0,%0,%3 \n\t" \
" adde %1,%1,%4 \n\t" \
" adde %2,%2,%5 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
#elif defined(TFM_PPC64)
/* PPC64 */
#define COMBA_START
#define CLEAR_CARRY \
c0 = c1 = c2 = 0;
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define CARRY_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_FINI
/* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \
asm( \
" mulld r16,%6,%6 \n\t" \
" addc %0,%0,r16 \n\t" \
" mulhdu r16,%6,%6 \n\t" \
" adde %1,%1,r16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","cc");
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
asm( \
" mulld r16,%6,%7 \n\t" \
" mulhdu r17,%6,%7 \n\t" \
" addc %0,%0,r16 \n\t" \
" adde %1,%1,r17 \n\t" \
" addze %2,%2 \n\t" \
" addc %0,%0,r16 \n\t" \
" adde %1,%1,r17 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","cc");
#define SQRADDSC(i, j) \
asm( \
" mulld %0,%6,%7 \n\t" \
" mulhdu %1,%6,%7 \n\t" \
" xor %2,%2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
#define SQRADDAC(i, j) \
asm( \
" mulld r16,%6,%7 \n\t" \
" addc %0,%0,r16 \n\t" \
" mulhdu r16,%6,%7 \n\t" \
" adde %1,%1,r16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "cc");
#define SQRADDDB \
asm( \
" addc %0,%0,%3 \n\t" \
" adde %1,%1,%4 \n\t" \
" adde %2,%2,%5 \n\t" \
" addc %0,%0,%3 \n\t" \
" adde %1,%1,%4 \n\t" \
" adde %2,%2,%5 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
#elif defined(TFM_AVR32)
/* AVR32 */
#define COMBA_START
#define CLEAR_CARRY \
c0 = c1 = c2 = 0;
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define CARRY_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_FINI
/* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \
asm( \
" mulu.d r2,%6,%6 \n\t" \
" add %0,%0,r2 \n\t" \
" adc %1,%1,r3 \n\t" \
" acr %2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3");
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
asm( \
" mulu.d r2,%6,%7 \n\t" \
" add %0,%0,r2 \n\t" \
" adc %1,%1,r3 \n\t" \
" acr %2, \n\t" \
" add %0,%0,r2 \n\t" \
" adc %1,%1,r3 \n\t" \
" acr %2, \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3");
#define SQRADDSC(i, j) \
asm( \
" mulu.d r2,%6,%7 \n\t" \
" mov %0,r2 \n\t" \
" mov %1,r3 \n\t" \
" eor %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3");
#define SQRADDAC(i, j) \
asm( \
" mulu.d r2,%6,%7 \n\t" \
" add %0,%0,r2 \n\t" \
" adc %1,%1,r3 \n\t" \
" acr %2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3");
#define SQRADDDB \
asm( \
" add %0,%0,%3 \n\t" \
" adc %1,%1,%4 \n\t" \
" adc %2,%2,%5 \n\t" \
" add %0,%0,%3 \n\t" \
" adc %1,%1,%4 \n\t" \
" adc %2,%2,%5 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
#elif defined(TFM_MIPS)
/* MIPS */
#define COMBA_START
#define CLEAR_CARRY \
c0 = c1 = c2 = 0;
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define CARRY_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_FINI
/* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \
asm( \
" multu %6,%6 \n\t" \
" mflo $12 \n\t" \
" mfhi $13 \n\t" \
" addu %0,%0,$12 \n\t" \
" sltu $12,%0,$12 \n\t" \
" addu %1,%1,$13 \n\t" \
" sltu $13,%1,$13 \n\t" \
" addu %1,%1,$12 \n\t" \
" sltu $12,%1,$12 \n\t" \
" addu %2,%2,$13 \n\t" \
" addu %2,%2,$12 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13");
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
asm( \
" multu %6,%7 \n\t" \
" mflo $12 \n\t" \
" mfhi $13 \n\t" \
\
" addu %0,%0,$12 \n\t" \
" sltu $14,%0,$12 \n\t" \
" addu %1,%1,$13 \n\t" \
" sltu $15,%1,$13 \n\t" \
" addu %1,%1,$14 \n\t" \
" sltu $14,%1,$14 \n\t" \
" addu %2,%2,$15 \n\t" \
" addu %2,%2,$14 \n\t" \
\
" addu %0,%0,$12 \n\t" \
" sltu $14,%0,$12 \n\t" \
" addu %1,%1,$13 \n\t" \
" sltu $15,%1,$13 \n\t" \
" addu %1,%1,$14 \n\t" \
" sltu $14,%1,$14 \n\t" \
" addu %2,%2,$15 \n\t" \
" addu %2,%2,$14 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15");
#define SQRADDSC(i, j) \
asm( \
" multu %6,%7 \n\t" \
" mflo %0 \n\t" \
" mfhi %1 \n\t" \
" xor %2,%2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
#define SQRADDAC(i, j) \
asm( \
" multu %6,%7 \n\t" \
" mflo $12 \n\t" \
" mfhi $13 \n\t" \
" addu %0,%0,$12 \n\t" \
" sltu $12,%0,$12 \n\t" \
" addu %1,%1,$13 \n\t" \
" sltu $13,%1,$13 \n\t" \
" addu %1,%1,$12 \n\t" \
" sltu $12,%1,$12 \n\t" \
" addu %2,%2,$13 \n\t" \
" addu %2,%2,$12 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14");
#define SQRADDDB \
asm( \
" addu %0,%0,%3 \n\t" \
" sltu $10,%0,%3 \n\t" \
" addu %1,%1,$10 \n\t" \
" sltu $10,%1,$10 \n\t" \
" addu %1,%1,%4 \n\t" \
" sltu $11,%1,%4 \n\t" \
" addu %2,%2,$10 \n\t" \
" addu %2,%2,$11 \n\t" \
" addu %2,%2,%5 \n\t" \
\
" addu %0,%0,%3 \n\t" \
" sltu $10,%0,%3 \n\t" \
" addu %1,%1,$10 \n\t" \
" sltu $10,%1,$10 \n\t" \
" addu %1,%1,%4 \n\t" \
" sltu $11,%1,%4 \n\t" \
" addu %2,%2,$10 \n\t" \
" addu %2,%2,$11 \n\t" \
" addu %2,%2,%5 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11");
#else
#define TFM_ISO
/* ISO C portable code */
#define COMBA_START
#define CLEAR_CARRY \
c0 = c1 = c2 = 0;
#define COMBA_STORE(x) \
x = c0;
#define COMBA_STORE2(x) \
x = c1;
#define CARRY_FORWARD \
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
#define COMBA_FINI
/* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \
do { fp_word t; \
t = c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \
t = c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \
} while (0);
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
do { fp_word t; \
t = ((fp_word)i) * ((fp_word)j); \
tt = (fp_word)c0 + t; c0 = tt; \
tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \
tt = (fp_word)c0 + t; c0 = tt; \
tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \
} while (0);
#define SQRADDSC(i, j) \
do { fp_word t; \
t = ((fp_word)i) * ((fp_word)j); \
sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \
} while (0);
#define SQRADDAC(i, j) \
do { fp_word t; \
t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t; \
t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t >> DIGIT_BIT; \
} while (0);
#define SQRADDDB \
do { fp_word t; \
t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t; \
t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t; \
c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT); \
} while (0);
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,224 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#if defined(TFM_SQR20) && FP_SIZE >= 40
void fp_sqr_comba20(fp_int *A, fp_int *B)
{
fp_digit *a, b[40], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp;
COMBA_START;
/* clear carries */
CLEAR_CARRY;
/* output 0 */
SQRADD(a[0],a[0]);
COMBA_STORE(b[0]);
/* output 1 */
CARRY_FORWARD;
SQRADD2(a[0], a[1]);
COMBA_STORE(b[1]);
/* output 2 */
CARRY_FORWARD;
SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
COMBA_STORE(b[2]);
/* output 3 */
CARRY_FORWARD;
SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
COMBA_STORE(b[3]);
/* output 4 */
CARRY_FORWARD;
SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
COMBA_STORE(b[4]);
/* output 5 */
CARRY_FORWARD;
SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB;
COMBA_STORE(b[5]);
/* output 6 */
CARRY_FORWARD;
SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]);
COMBA_STORE(b[6]);
/* output 7 */
CARRY_FORWARD;
SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB;
COMBA_STORE(b[7]);
/* output 8 */
CARRY_FORWARD;
SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]);
COMBA_STORE(b[8]);
/* output 9 */
CARRY_FORWARD;
SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB;
COMBA_STORE(b[9]);
/* output 10 */
CARRY_FORWARD;
SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]);
COMBA_STORE(b[10]);
/* output 11 */
CARRY_FORWARD;
SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB;
COMBA_STORE(b[11]);
/* output 12 */
CARRY_FORWARD;
SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]);
COMBA_STORE(b[12]);
/* output 13 */
CARRY_FORWARD;
SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB;
COMBA_STORE(b[13]);
/* output 14 */
CARRY_FORWARD;
SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]);
COMBA_STORE(b[14]);
/* output 15 */
CARRY_FORWARD;
SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB;
COMBA_STORE(b[15]);
/* output 16 */
CARRY_FORWARD;
SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]);
COMBA_STORE(b[16]);
/* output 17 */
CARRY_FORWARD;
SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB;
COMBA_STORE(b[17]);
/* output 18 */
CARRY_FORWARD;
SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]);
COMBA_STORE(b[18]);
/* output 19 */
CARRY_FORWARD;
SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB;
COMBA_STORE(b[19]);
/* output 20 */
CARRY_FORWARD;
SQRADDSC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]);
COMBA_STORE(b[20]);
/* output 21 */
CARRY_FORWARD;
SQRADDSC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB;
COMBA_STORE(b[21]);
/* output 22 */
CARRY_FORWARD;
SQRADDSC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]);
COMBA_STORE(b[22]);
/* output 23 */
CARRY_FORWARD;
SQRADDSC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB;
COMBA_STORE(b[23]);
/* output 24 */
CARRY_FORWARD;
SQRADDSC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]);
COMBA_STORE(b[24]);
/* output 25 */
CARRY_FORWARD;
SQRADDSC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB;
COMBA_STORE(b[25]);
/* output 26 */
CARRY_FORWARD;
SQRADDSC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]);
COMBA_STORE(b[26]);
/* output 27 */
CARRY_FORWARD;
SQRADDSC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB;
COMBA_STORE(b[27]);
/* output 28 */
CARRY_FORWARD;
SQRADDSC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]);
COMBA_STORE(b[28]);
/* output 29 */
CARRY_FORWARD;
SQRADDSC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB;
COMBA_STORE(b[29]);
/* output 30 */
CARRY_FORWARD;
SQRADDSC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]);
COMBA_STORE(b[30]);
/* output 31 */
CARRY_FORWARD;
SQRADDSC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB;
COMBA_STORE(b[31]);
/* output 32 */
CARRY_FORWARD;
SQRADDSC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]);
COMBA_STORE(b[32]);
/* output 33 */
CARRY_FORWARD;
SQRADDSC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB;
COMBA_STORE(b[33]);
/* output 34 */
CARRY_FORWARD;
SQRADD2(a[15], a[19]); SQRADD2(a[16], a[18]); SQRADD(a[17], a[17]);
COMBA_STORE(b[34]);
/* output 35 */
CARRY_FORWARD;
SQRADD2(a[16], a[19]); SQRADD2(a[17], a[18]);
COMBA_STORE(b[35]);
/* output 36 */
CARRY_FORWARD;
SQRADD2(a[17], a[19]); SQRADD(a[18], a[18]);
COMBA_STORE(b[36]);
/* output 37 */
CARRY_FORWARD;
SQRADD2(a[18], a[19]);
COMBA_STORE(b[37]);
/* output 38 */
CARRY_FORWARD;
SQRADD(a[19], a[19]);
COMBA_STORE(b[38]);
COMBA_STORE2(b[39]);
COMBA_FINI;
B->used = 40;
B->sign = FP_ZPOS;
memcpy(B->dp, b, 40 * sizeof(fp_digit));
fp_clamp(B);
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,264 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#if defined(TFM_SQR24) && FP_SIZE >= 48
void fp_sqr_comba24(fp_int *A, fp_int *B)
{
fp_digit *a, b[48], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp;
COMBA_START;
/* clear carries */
CLEAR_CARRY;
/* output 0 */
SQRADD(a[0],a[0]);
COMBA_STORE(b[0]);
/* output 1 */
CARRY_FORWARD;
SQRADD2(a[0], a[1]);
COMBA_STORE(b[1]);
/* output 2 */
CARRY_FORWARD;
SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
COMBA_STORE(b[2]);
/* output 3 */
CARRY_FORWARD;
SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
COMBA_STORE(b[3]);
/* output 4 */
CARRY_FORWARD;
SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
COMBA_STORE(b[4]);
/* output 5 */
CARRY_FORWARD;
SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB;
COMBA_STORE(b[5]);
/* output 6 */
CARRY_FORWARD;
SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]);
COMBA_STORE(b[6]);
/* output 7 */
CARRY_FORWARD;
SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB;
COMBA_STORE(b[7]);
/* output 8 */
CARRY_FORWARD;
SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]);
COMBA_STORE(b[8]);
/* output 9 */
CARRY_FORWARD;
SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB;
COMBA_STORE(b[9]);
/* output 10 */
CARRY_FORWARD;
SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]);
COMBA_STORE(b[10]);
/* output 11 */
CARRY_FORWARD;
SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB;
COMBA_STORE(b[11]);
/* output 12 */
CARRY_FORWARD;
SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]);
COMBA_STORE(b[12]);
/* output 13 */
CARRY_FORWARD;
SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB;
COMBA_STORE(b[13]);
/* output 14 */
CARRY_FORWARD;
SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]);
COMBA_STORE(b[14]);
/* output 15 */
CARRY_FORWARD;
SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB;
COMBA_STORE(b[15]);
/* output 16 */
CARRY_FORWARD;
SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]);
COMBA_STORE(b[16]);
/* output 17 */
CARRY_FORWARD;
SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB;
COMBA_STORE(b[17]);
/* output 18 */
CARRY_FORWARD;
SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]);
COMBA_STORE(b[18]);
/* output 19 */
CARRY_FORWARD;
SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB;
COMBA_STORE(b[19]);
/* output 20 */
CARRY_FORWARD;
SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]);
COMBA_STORE(b[20]);
/* output 21 */
CARRY_FORWARD;
SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB;
COMBA_STORE(b[21]);
/* output 22 */
CARRY_FORWARD;
SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]);
COMBA_STORE(b[22]);
/* output 23 */
CARRY_FORWARD;
SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB;
COMBA_STORE(b[23]);
/* output 24 */
CARRY_FORWARD;
SQRADDSC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]);
COMBA_STORE(b[24]);
/* output 25 */
CARRY_FORWARD;
SQRADDSC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB;
COMBA_STORE(b[25]);
/* output 26 */
CARRY_FORWARD;
SQRADDSC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]);
COMBA_STORE(b[26]);
/* output 27 */
CARRY_FORWARD;
SQRADDSC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB;
COMBA_STORE(b[27]);
/* output 28 */
CARRY_FORWARD;
SQRADDSC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]);
COMBA_STORE(b[28]);
/* output 29 */
CARRY_FORWARD;
SQRADDSC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB;
COMBA_STORE(b[29]);
/* output 30 */
CARRY_FORWARD;
SQRADDSC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]);
COMBA_STORE(b[30]);
/* output 31 */
CARRY_FORWARD;
SQRADDSC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB;
COMBA_STORE(b[31]);
/* output 32 */
CARRY_FORWARD;
SQRADDSC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]);
COMBA_STORE(b[32]);
/* output 33 */
CARRY_FORWARD;
SQRADDSC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB;
COMBA_STORE(b[33]);
/* output 34 */
CARRY_FORWARD;
SQRADDSC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]);
COMBA_STORE(b[34]);
/* output 35 */
CARRY_FORWARD;
SQRADDSC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB;
COMBA_STORE(b[35]);
/* output 36 */
CARRY_FORWARD;
SQRADDSC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]);
COMBA_STORE(b[36]);
/* output 37 */
CARRY_FORWARD;
SQRADDSC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB;
COMBA_STORE(b[37]);
/* output 38 */
CARRY_FORWARD;
SQRADDSC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]);
COMBA_STORE(b[38]);
/* output 39 */
CARRY_FORWARD;
SQRADDSC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB;
COMBA_STORE(b[39]);
/* output 40 */
CARRY_FORWARD;
SQRADDSC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]);
COMBA_STORE(b[40]);
/* output 41 */
CARRY_FORWARD;
SQRADDSC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB;
COMBA_STORE(b[41]);
/* output 42 */
CARRY_FORWARD;
SQRADD2(a[19], a[23]); SQRADD2(a[20], a[22]); SQRADD(a[21], a[21]);
COMBA_STORE(b[42]);
/* output 43 */
CARRY_FORWARD;
SQRADD2(a[20], a[23]); SQRADD2(a[21], a[22]);
COMBA_STORE(b[43]);
/* output 44 */
CARRY_FORWARD;
SQRADD2(a[21], a[23]); SQRADD(a[22], a[22]);
COMBA_STORE(b[44]);
/* output 45 */
CARRY_FORWARD;
SQRADD2(a[22], a[23]);
COMBA_STORE(b[45]);
/* output 46 */
CARRY_FORWARD;
SQRADD(a[23], a[23]);
COMBA_STORE(b[46]);
COMBA_STORE2(b[47]);
COMBA_FINI;
B->used = 48;
B->sign = FP_ZPOS;
memcpy(B->dp, b, 48 * sizeof(fp_digit));
fp_clamp(B);
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,304 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#if defined(TFM_SQR28) && FP_SIZE >= 56
void fp_sqr_comba28(fp_int *A, fp_int *B)
{
fp_digit *a, b[56], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp;
COMBA_START;
/* clear carries */
CLEAR_CARRY;
/* output 0 */
SQRADD(a[0],a[0]);
COMBA_STORE(b[0]);
/* output 1 */
CARRY_FORWARD;
SQRADD2(a[0], a[1]);
COMBA_STORE(b[1]);
/* output 2 */
CARRY_FORWARD;
SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
COMBA_STORE(b[2]);
/* output 3 */
CARRY_FORWARD;
SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
COMBA_STORE(b[3]);
/* output 4 */
CARRY_FORWARD;
SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
COMBA_STORE(b[4]);
/* output 5 */
CARRY_FORWARD;
SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB;
COMBA_STORE(b[5]);
/* output 6 */
CARRY_FORWARD;
SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]);
COMBA_STORE(b[6]);
/* output 7 */
CARRY_FORWARD;
SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB;
COMBA_STORE(b[7]);
/* output 8 */
CARRY_FORWARD;
SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]);
COMBA_STORE(b[8]);
/* output 9 */
CARRY_FORWARD;
SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB;
COMBA_STORE(b[9]);
/* output 10 */
CARRY_FORWARD;
SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]);
COMBA_STORE(b[10]);
/* output 11 */
CARRY_FORWARD;
SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB;
COMBA_STORE(b[11]);
/* output 12 */
CARRY_FORWARD;
SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]);
COMBA_STORE(b[12]);
/* output 13 */
CARRY_FORWARD;
SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB;
COMBA_STORE(b[13]);
/* output 14 */
CARRY_FORWARD;
SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]);
COMBA_STORE(b[14]);
/* output 15 */
CARRY_FORWARD;
SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB;
COMBA_STORE(b[15]);
/* output 16 */
CARRY_FORWARD;
SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]);
COMBA_STORE(b[16]);
/* output 17 */
CARRY_FORWARD;
SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB;
COMBA_STORE(b[17]);
/* output 18 */
CARRY_FORWARD;
SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]);
COMBA_STORE(b[18]);
/* output 19 */
CARRY_FORWARD;
SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB;
COMBA_STORE(b[19]);
/* output 20 */
CARRY_FORWARD;
SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]);
COMBA_STORE(b[20]);
/* output 21 */
CARRY_FORWARD;
SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB;
COMBA_STORE(b[21]);
/* output 22 */
CARRY_FORWARD;
SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]);
COMBA_STORE(b[22]);
/* output 23 */
CARRY_FORWARD;
SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB;
COMBA_STORE(b[23]);
/* output 24 */
CARRY_FORWARD;
SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]);
COMBA_STORE(b[24]);
/* output 25 */
CARRY_FORWARD;
SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB;
COMBA_STORE(b[25]);
/* output 26 */
CARRY_FORWARD;
SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]);
COMBA_STORE(b[26]);
/* output 27 */
CARRY_FORWARD;
SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB;
COMBA_STORE(b[27]);
/* output 28 */
CARRY_FORWARD;
SQRADDSC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]);
COMBA_STORE(b[28]);
/* output 29 */
CARRY_FORWARD;
SQRADDSC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB;
COMBA_STORE(b[29]);
/* output 30 */
CARRY_FORWARD;
SQRADDSC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]);
COMBA_STORE(b[30]);
/* output 31 */
CARRY_FORWARD;
SQRADDSC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB;
COMBA_STORE(b[31]);
/* output 32 */
CARRY_FORWARD;
SQRADDSC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]);
COMBA_STORE(b[32]);
/* output 33 */
CARRY_FORWARD;
SQRADDSC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB;
COMBA_STORE(b[33]);
/* output 34 */
CARRY_FORWARD;
SQRADDSC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]);
COMBA_STORE(b[34]);
/* output 35 */
CARRY_FORWARD;
SQRADDSC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB;
COMBA_STORE(b[35]);
/* output 36 */
CARRY_FORWARD;
SQRADDSC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]);
COMBA_STORE(b[36]);
/* output 37 */
CARRY_FORWARD;
SQRADDSC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB;
COMBA_STORE(b[37]);
/* output 38 */
CARRY_FORWARD;
SQRADDSC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]);
COMBA_STORE(b[38]);
/* output 39 */
CARRY_FORWARD;
SQRADDSC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB;
COMBA_STORE(b[39]);
/* output 40 */
CARRY_FORWARD;
SQRADDSC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]);
COMBA_STORE(b[40]);
/* output 41 */
CARRY_FORWARD;
SQRADDSC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB;
COMBA_STORE(b[41]);
/* output 42 */
CARRY_FORWARD;
SQRADDSC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]);
COMBA_STORE(b[42]);
/* output 43 */
CARRY_FORWARD;
SQRADDSC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB;
COMBA_STORE(b[43]);
/* output 44 */
CARRY_FORWARD;
SQRADDSC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]);
COMBA_STORE(b[44]);
/* output 45 */
CARRY_FORWARD;
SQRADDSC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB;
COMBA_STORE(b[45]);
/* output 46 */
CARRY_FORWARD;
SQRADDSC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]);
COMBA_STORE(b[46]);
/* output 47 */
CARRY_FORWARD;
SQRADDSC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB;
COMBA_STORE(b[47]);
/* output 48 */
CARRY_FORWARD;
SQRADDSC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]);
COMBA_STORE(b[48]);
/* output 49 */
CARRY_FORWARD;
SQRADDSC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB;
COMBA_STORE(b[49]);
/* output 50 */
CARRY_FORWARD;
SQRADD2(a[23], a[27]); SQRADD2(a[24], a[26]); SQRADD(a[25], a[25]);
COMBA_STORE(b[50]);
/* output 51 */
CARRY_FORWARD;
SQRADD2(a[24], a[27]); SQRADD2(a[25], a[26]);
COMBA_STORE(b[51]);
/* output 52 */
CARRY_FORWARD;
SQRADD2(a[25], a[27]); SQRADD(a[26], a[26]);
COMBA_STORE(b[52]);
/* output 53 */
CARRY_FORWARD;
SQRADD2(a[26], a[27]);
COMBA_STORE(b[53]);
/* output 54 */
CARRY_FORWARD;
SQRADD(a[27], a[27]);
COMBA_STORE(b[54]);
COMBA_STORE2(b[55]);
COMBA_FINI;
B->used = 56;
B->sign = FP_ZPOS;
memcpy(B->dp, b, 56 * sizeof(fp_digit));
fp_clamp(B);
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,344 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#if defined(TFM_SQR32) && FP_SIZE >= 64
void fp_sqr_comba32(fp_int *A, fp_int *B)
{
fp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2;
#ifdef TFM_ISO
fp_word tt;
#endif
a = A->dp;
COMBA_START;
/* clear carries */
CLEAR_CARRY;
/* output 0 */
SQRADD(a[0],a[0]);
COMBA_STORE(b[0]);
/* output 1 */
CARRY_FORWARD;
SQRADD2(a[0], a[1]);
COMBA_STORE(b[1]);
/* output 2 */
CARRY_FORWARD;
SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
COMBA_STORE(b[2]);
/* output 3 */
CARRY_FORWARD;
SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
COMBA_STORE(b[3]);
/* output 4 */
CARRY_FORWARD;
SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
COMBA_STORE(b[4]);
/* output 5 */
CARRY_FORWARD;
SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB;
COMBA_STORE(b[5]);
/* output 6 */
CARRY_FORWARD;
SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]);
COMBA_STORE(b[6]);
/* output 7 */
CARRY_FORWARD;
SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB;
COMBA_STORE(b[7]);
/* output 8 */
CARRY_FORWARD;
SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]);
COMBA_STORE(b[8]);
/* output 9 */
CARRY_FORWARD;
SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB;
COMBA_STORE(b[9]);
/* output 10 */
CARRY_FORWARD;
SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]);
COMBA_STORE(b[10]);
/* output 11 */
CARRY_FORWARD;
SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB;
COMBA_STORE(b[11]);
/* output 12 */
CARRY_FORWARD;
SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]);
COMBA_STORE(b[12]);
/* output 13 */
CARRY_FORWARD;
SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB;
COMBA_STORE(b[13]);
/* output 14 */
CARRY_FORWARD;
SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]);
COMBA_STORE(b[14]);
/* output 15 */
CARRY_FORWARD;
SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB;
COMBA_STORE(b[15]);
/* output 16 */
CARRY_FORWARD;
SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]);
COMBA_STORE(b[16]);
/* output 17 */
CARRY_FORWARD;
SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB;
COMBA_STORE(b[17]);
/* output 18 */
CARRY_FORWARD;
SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]);
COMBA_STORE(b[18]);
/* output 19 */
CARRY_FORWARD;
SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB;
COMBA_STORE(b[19]);
/* output 20 */
CARRY_FORWARD;
SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]);
COMBA_STORE(b[20]);
/* output 21 */
CARRY_FORWARD;
SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB;
COMBA_STORE(b[21]);
/* output 22 */
CARRY_FORWARD;
SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]);
COMBA_STORE(b[22]);
/* output 23 */
CARRY_FORWARD;
SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB;
COMBA_STORE(b[23]);
/* output 24 */
CARRY_FORWARD;
SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]);
COMBA_STORE(b[24]);
/* output 25 */
CARRY_FORWARD;
SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB;
COMBA_STORE(b[25]);
/* output 26 */
CARRY_FORWARD;
SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]);
COMBA_STORE(b[26]);
/* output 27 */
CARRY_FORWARD;
SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB;
COMBA_STORE(b[27]);
/* output 28 */
CARRY_FORWARD;
SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]);
COMBA_STORE(b[28]);
/* output 29 */
CARRY_FORWARD;
SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB;
COMBA_STORE(b[29]);
/* output 30 */
CARRY_FORWARD;
SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]);
COMBA_STORE(b[30]);
/* output 31 */
CARRY_FORWARD;
SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB;
COMBA_STORE(b[31]);
/* output 32 */
CARRY_FORWARD;
SQRADDSC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]);
COMBA_STORE(b[32]);
/* output 33 */
CARRY_FORWARD;
SQRADDSC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB;
COMBA_STORE(b[33]);
/* output 34 */
CARRY_FORWARD;
SQRADDSC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]);
COMBA_STORE(b[34]);
/* output 35 */
CARRY_FORWARD;
SQRADDSC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB;
COMBA_STORE(b[35]);
/* output 36 */
CARRY_FORWARD;
SQRADDSC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]);
COMBA_STORE(b[36]);
/* output 37 */
CARRY_FORWARD;
SQRADDSC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB;
COMBA_STORE(b[37]);
/* output 38 */
CARRY_FORWARD;
SQRADDSC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]);
COMBA_STORE(b[38]);
/* output 39 */
CARRY_FORWARD;
SQRADDSC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB;
COMBA_STORE(b[39]);
/* output 40 */
CARRY_FORWARD;
SQRADDSC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]);
COMBA_STORE(b[40]);
/* output 41 */
CARRY_FORWARD;
SQRADDSC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB;
COMBA_STORE(b[41]);
/* output 42 */
CARRY_FORWARD;
SQRADDSC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]);
COMBA_STORE(b[42]);
/* output 43 */
CARRY_FORWARD;
SQRADDSC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB;
COMBA_STORE(b[43]);
/* output 44 */
CARRY_FORWARD;
SQRADDSC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]);
COMBA_STORE(b[44]);
/* output 45 */
CARRY_FORWARD;
SQRADDSC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB;
COMBA_STORE(b[45]);
/* output 46 */
CARRY_FORWARD;
SQRADDSC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]);
COMBA_STORE(b[46]);
/* output 47 */
CARRY_FORWARD;
SQRADDSC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB;
COMBA_STORE(b[47]);
/* output 48 */
CARRY_FORWARD;
SQRADDSC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]);
COMBA_STORE(b[48]);
/* output 49 */
CARRY_FORWARD;
SQRADDSC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB;
COMBA_STORE(b[49]);
/* output 50 */
CARRY_FORWARD;
SQRADDSC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]);
COMBA_STORE(b[50]);
/* output 51 */
CARRY_FORWARD;
SQRADDSC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB;
COMBA_STORE(b[51]);
/* output 52 */
CARRY_FORWARD;
SQRADDSC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]);
COMBA_STORE(b[52]);
/* output 53 */
CARRY_FORWARD;
SQRADDSC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB;
COMBA_STORE(b[53]);
/* output 54 */
CARRY_FORWARD;
SQRADDSC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]);
COMBA_STORE(b[54]);
/* output 55 */
CARRY_FORWARD;
SQRADDSC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB;
COMBA_STORE(b[55]);
/* output 56 */
CARRY_FORWARD;
SQRADDSC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]);
COMBA_STORE(b[56]);
/* output 57 */
CARRY_FORWARD;
SQRADDSC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB;
COMBA_STORE(b[57]);
/* output 58 */
CARRY_FORWARD;
SQRADD2(a[27], a[31]); SQRADD2(a[28], a[30]); SQRADD(a[29], a[29]);
COMBA_STORE(b[58]);
/* output 59 */
CARRY_FORWARD;
SQRADD2(a[28], a[31]); SQRADD2(a[29], a[30]);
COMBA_STORE(b[59]);
/* output 60 */
CARRY_FORWARD;
SQRADD2(a[29], a[31]); SQRADD(a[30], a[30]);
COMBA_STORE(b[60]);
/* output 61 */
CARRY_FORWARD;
SQRADD2(a[30], a[31]);
COMBA_STORE(b[61]);
/* output 62 */
CARRY_FORWARD;
SQRADD(a[31], a[31]);
COMBA_STORE(b[62]);
COMBA_STORE2(b[63]);
COMBA_FINI;
B->used = 64;
B->sign = FP_ZPOS;
memcpy(B->dp, b, 64 * sizeof(fp_digit));
fp_clamp(B);
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,98 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#define TFM_DEFINES
#include "fp_sqr_comba.c"
/* generic comba squarer */
void fp_sqr_comba(fp_int *A, fp_int *B)
{
int pa, ix, iz;
fp_digit c0, c1, c2;
fp_int tmp, *dst;
#ifdef TFM_ISO
fp_word tt;
#endif
/* get size of output and trim */
pa = A->used + A->used;
if (pa >= FP_SIZE) {
pa = FP_SIZE-1;
}
/* number of output digits to produce */
COMBA_START;
CLEAR_CARRY;
if (A == B) {
fp_zero(&tmp);
dst = &tmp;
} else {
fp_zero(B);
dst = B;
}
for (ix = 0; ix < pa; ix++) {
int tx, ty, iy;
fp_digit *tmpy, *tmpx;
/* get offsets into the two bignums */
ty = MIN(A->used-1, ix);
tx = ix - ty;
/* setup temp aliases */
tmpx = A->dp + tx;
tmpy = A->dp + ty;
/* this is the number of times the loop will iterrate,
while (tx++ < a->used && ty-- >= 0) { ... }
*/
iy = MIN(A->used-tx, ty+1);
/* now for squaring tx can never equal ty
* we halve the distance since they approach
* at a rate of 2x and we have to round because
* odd cases need to be executed
*/
iy = MIN(iy, (ty-tx+1)>>1);
/* forward carries */
CARRY_FORWARD;
/* execute loop */
for (iz = 0; iz < iy; iz++) {
fp_digit _tmpx = *tmpx++;
fp_digit _tmpy = *tmpy--;
SQRADD2(_tmpx, _tmpy);
}
/* even columns have the square term in them */
if ((ix&1) == 0) {
fp_digit _a_dp = A->dp[ix>>1];
SQRADD(_a_dp, A->dp[ix>>1]);
}
/* store it */
COMBA_STORE(dst->dp[ix]);
}
COMBA_FINI;
/* setup dest */
dst->used = pa;
fp_clamp (dst);
if (dst != B) {
fp_copy(dst, B);
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

File diff suppressed because it is too large Load Diff

50
src/crypto/tfm/fp_sub.c Normal file
View File

@ -0,0 +1,50 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* c = a - b */
void fp_sub(fp_int *a, fp_int *b, fp_int *c)
{
int sa, sb;
sa = a->sign;
sb = b->sign;
if (sa != sb) {
/* subtract a negative from a positive, OR */
/* subtract a positive from a negative. */
/* In either case, ADD their magnitudes, */
/* and use the sign of the first number. */
c->sign = sa;
s_fp_add (a, b, c);
} else {
/* subtract a positive from a positive, OR */
/* subtract a negative from a negative. */
/* First, take the difference between their */
/* magnitudes, then... */
if (fp_cmp_mag (a, b) != FP_LT) {
/* Copy the sign from the first */
c->sign = sa;
/* The first has a larger or equal magnitude */
s_fp_sub (a, b, c);
} else {
/* The result has the *opposite* sign from */
/* the first number. */
c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS;
/* The second has a larger magnitude */
s_fp_sub (b, a, c);
}
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,29 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
{
int x;
fp_int t;
fp_init_copy(&t, a);
x = 0;
while (fp_iszero (&t) == FP_NO) {
b[x++] = (unsigned char) (t.dp[0] & 255);
fp_div_2d (&t, 8, &t, NULL);
}
fp_reverse (b, x);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,20 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
int fp_unsigned_bin_size(fp_int *a)
{
int size = fp_count_bits (a);
return (size / 8 + ((size & 7) != 0 ? 1 : 0));
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

42
src/crypto/tfm/s_fp_add.c Normal file
View File

@ -0,0 +1,42 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* unsigned addition */
void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
{
int x, y, oldused;
register fp_word t;
y = MAX(a->used, b->used);
oldused = MIN(c->used, FP_SIZE);
c->used = y;
t = 0;
for (x = 0; x < y; x++) {
t += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]);
c->dp[x] = (fp_digit)t;
t >>= DIGIT_BIT;
}
if (t != 0 && x < FP_SIZE) {
c->dp[c->used++] = (fp_digit)t;
++x;
}
c->used = x;
for (; x < oldused; x++) {
c->dp[x] = 0;
}
fp_clamp(c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

40
src/crypto/tfm/s_fp_sub.c Normal file
View File

@ -0,0 +1,40 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
{
int x, oldbused, oldused;
fp_word t;
oldused = c->used;
oldbused = b->used;
c->used = a->used;
t = 0;
for (x = 0; x < oldbused; x++) {
t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t);
c->dp[x] = (fp_digit)t;
t = (t >> DIGIT_BIT)&1;
}
for (; x < a->used; x++) {
t = ((fp_word)a->dp[x]) - t;
c->dp[x] = (fp_digit)t;
t = (t >> DIGIT_BIT)&1;
}
for (; x < oldused; x++) {
c->dp[x] = 0;
}
fp_clamp(c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

498
src/crypto/tfm/tfm.h Normal file
View File

@ -0,0 +1,498 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#ifndef TFM_H_
#define TFM_H_
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <limits.h>
/* 0xMaMiPaDe
* Major
* Minor
* Patch
* Development - 00=release, 01=in-development
*/
#define TFM_VERSION 0x000D0101
#define TFM_VERSION_S "v0.13.1-next"
#ifndef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#endif
#ifndef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))
#endif
/* externally define this symbol to ignore the default settings, useful for changing the build from the make process */
#ifndef TFM_ALREADY_SET
/* do we want the large set of small multiplications ?
Enable these if you are going to be doing a lot of small (<= 16 digit) multiplications say in ECC
Or if you're on a 64-bit machine doing RSA as a 1024-bit integer == 16 digits ;-)
*/
#define TFM_SMALL_SET
/* do we want huge code
Enable these if you are doing 20, 24, 28, 32, 48, 64 digit multiplications (useful for RSA)
Less important on 64-bit machines as 32 digits == 2048 bits
*/
#if 0
#define TFM_MUL3
#define TFM_MUL4
#define TFM_MUL6
#define TFM_MUL7
#define TFM_MUL8
#define TFM_MUL9
#define TFM_MUL12
#define TFM_MUL17
#endif
#define TFM_MUL20
#define TFM_MUL24
#define TFM_MUL28
#define TFM_MUL32
#define TFM_MUL48
#define TFM_MUL64
#if 0
#define TFM_SQR3
#define TFM_SQR4
#define TFM_SQR6
#define TFM_SQR7
#define TFM_SQR8
#define TFM_SQR9
#define TFM_SQR12
#define TFM_SQR17
#endif
#define TFM_SQR20
#define TFM_SQR24
#define TFM_SQR28
#define TFM_SQR32
#define TFM_SQR48
#define TFM_SQR64
/* do we want some overflow checks
Not required if you make sure your numbers are within range (e.g. by default a modulus for fp_exptmod() can only be upto 2048 bits long)
*/
/* #define TFM_CHECK */
/* Is the target a P4 Prescott
*/
/* #define TFM_PRESCOTT */
/* Do we want timing resistant fp_exptmod() ?
* This makes it slower but also timing invariant with respect to the exponent
*/
/* #define TFM_TIMING_RESISTANT */
#endif
/* Max size of any number in bits. Basically the largest size you will be multiplying
* should be half [or smaller] of FP_MAX_SIZE-four_digit
*
* You can externally define this or it defaults to 4096-bits [allowing multiplications upto 2048x2048 bits ]
*/
#ifndef FP_MAX_SIZE
#define FP_MAX_SIZE (4096+(8*DIGIT_BIT))
#endif
/* will this lib work? */
#if (CHAR_BIT & 7)
#error CHAR_BIT must be a multiple of eight.
#endif
#if FP_MAX_SIZE % CHAR_BIT
#error FP_MAX_SIZE must be a multiple of CHAR_BIT
#endif
#if __SIZEOF_LONG__ == 8
#define FP_64BIT
#endif
/* autodetect x86-64 and make sure we are using 64-bit digits with x86-64 asm */
#if defined(__x86_64__)
#if defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM)
#error x86-64 detected, x86-32/SSE2/ARM optimizations are not valid!
#endif
#if !defined(TFM_X86_64) && !defined(TFM_NO_ASM)
#define TFM_X86_64
#endif
#endif
#if defined(TFM_X86_64)
#if !defined(FP_64BIT)
#define FP_64BIT
#endif
#endif
/* try to detect x86-32 */
#if defined(__i386__) && !defined(TFM_SSE2)
#if defined(TFM_X86_64) || defined(TFM_ARM)
#error x86-32 detected, x86-64/ARM optimizations are not valid!
#endif
#if !defined(TFM_X86) && !defined(TFM_NO_ASM)
#define TFM_X86
#endif
#endif
/* make sure we're 32-bit for x86-32/sse/arm/ppc32 */
#if (defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM) || defined(TFM_PPC32)) && defined(FP_64BIT)
#warning x86-32, SSE2 and ARM, PPC32 optimizations require 32-bit digits (undefining)
#undef FP_64BIT
#endif
/* multi asms? */
#ifdef TFM_X86
#define TFM_ASM
#endif
#ifdef TFM_X86_64
#ifdef TFM_ASM
#error TFM_ASM already defined!
#endif
#define TFM_ASM
#endif
#ifdef TFM_SSE2
#ifdef TFM_ASM
#error TFM_ASM already defined!
#endif
#define TFM_ASM
#endif
#ifdef TFM_ARM
#ifdef TFM_ASM
#error TFM_ASM already defined!
#endif
#define TFM_ASM
#endif
#ifdef TFM_PPC32
#ifdef TFM_ASM
#error TFM_ASM already defined!
#endif
#define TFM_ASM
#endif
#ifdef TFM_PPC64
#ifdef TFM_ASM
#error TFM_ASM already defined!
#endif
#define TFM_ASM
#endif
#ifdef TFM_AVR32
#ifdef TFM_ASM
#error TFM_ASM already defined!
#endif
#define TFM_ASM
#endif
/* we want no asm? */
#ifdef TFM_NO_ASM
#undef TFM_X86
#undef TFM_X86_64
#undef TFM_SSE2
#undef TFM_ARM
#undef TFM_PPC32
#undef TFM_PPC64
#undef TFM_AVR32
#undef TFM_ASM
#endif
/* ECC helpers */
#ifdef TFM_ECC192
#ifdef FP_64BIT
#define TFM_MUL3
#define TFM_SQR3
#else
#define TFM_MUL6
#define TFM_SQR6
#endif
#endif
#ifdef TFM_ECC224
#ifdef FP_64BIT
#define TFM_MUL4
#define TFM_SQR4
#else
#define TFM_MUL7
#define TFM_SQR7
#endif
#endif
#ifdef TFM_ECC256
#ifdef FP_64BIT
#define TFM_MUL4
#define TFM_SQR4
#else
#define TFM_MUL8
#define TFM_SQR8
#endif
#endif
#ifdef TFM_ECC384
#ifdef FP_64BIT
#define TFM_MUL6
#define TFM_SQR6
#else
#define TFM_MUL12
#define TFM_SQR12
#endif
#endif
#ifdef TFM_ECC521
#ifdef FP_64BIT
#define TFM_MUL9
#define TFM_SQR9
#else
#define TFM_MUL17
#define TFM_SQR17
#endif
#endif
/* use arc4random on platforms that support it */
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
#define FP_GEN_RANDOM() arc4random()
#define FP_GEN_RANDOM_MAX 0xffffffff
#endif
/* use rand() as fall-back if there's no better rand function */
#ifndef FP_GEN_RANDOM
#define FP_GEN_RANDOM() rand()
#define FP_GEN_RANDOM_MAX RAND_MAX
#endif
/* some default configurations.
*/
#if defined(FP_64BIT)
/* for GCC only on supported platforms */
#ifndef CRYPT
typedef unsigned long long ulong64;
#endif /* CRYPT */
typedef ulong64 fp_digit;
#define SIZEOF_FP_DIGIT 8
typedef unsigned long fp_word __attribute__ ((mode(TI)));
#else
/* this is to make porting into LibTomCrypt easier :-) */
#ifndef CRYPT
#if defined(_MSC_VER) || defined(__BORLANDC__)
typedef unsigned __int64 ulong64;
typedef signed __int64 long64;
#else
typedef unsigned long long ulong64;
typedef signed long long long64;
#endif /* defined(_MSC_VER) ... */
#endif /* CRYPT */
typedef unsigned int fp_digit;
#define SIZEOF_FP_DIGIT 4
typedef ulong64 fp_word;
#endif /* FP_64BIT */
/* # of digits this is */
#define DIGIT_BIT ((CHAR_BIT) * SIZEOF_FP_DIGIT)
#define FP_MASK (fp_digit)(-1)
#define FP_SIZE (FP_MAX_SIZE/DIGIT_BIT)
/* signs */
#define FP_ZPOS 0
#define FP_NEG 1
/* return codes */
#define FP_OKAY 0
#define FP_VAL 1
#define FP_MEM 2
/* equalities */
#define FP_LT -1 /* less than */
#define FP_EQ 0 /* equal to */
#define FP_GT 1 /* greater than */
/* replies */
#define FP_YES 1 /* yes response */
#define FP_NO 0 /* no response */
/* a FP type */
typedef struct {
fp_digit dp[FP_SIZE];
int used,
sign;
} fp_int;
/* functions */
/* returns a TFM ident string useful for debugging... */
const char *fp_ident(void);
/* initialize [or zero] an fp int */
#define fp_init(a) (void)memset((a), 0, sizeof(fp_int))
#define fp_zero(a) fp_init(a)
/* zero/even/odd ? */
#define fp_iszero(a) (((a)->used == 0) ? FP_YES : FP_NO)
#define fp_iseven(a) (((a)->used >= 0 && (((a)->dp[0] & 1) == 0)) ? FP_YES : FP_NO)
#define fp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? FP_YES : FP_NO)
/* set to a small digit */
void fp_set(fp_int *a, fp_digit b);
/* makes a pseudo-random int of a given size */
void fp_rand(fp_int *a, int digits);
/* copy from a to b */
#define fp_copy(a, b) (void)(((a) != (b)) && memcpy((b), (a), sizeof(fp_int)))
#define fp_init_copy(a, b) fp_copy(b, a)
/* clamp digits */
#define fp_clamp(a) { while ((a)->used && (a)->dp[(a)->used-1] == 0) --((a)->used); (a)->sign = (a)->used ? (a)->sign : FP_ZPOS; }
/* negate and absolute */
#define fp_neg(a, b) { fp_copy(a, b); (b)->sign ^= 1; fp_clamp(b); }
#define fp_abs(a, b) { fp_copy(a, b); (b)->sign = 0; }
/* right shift x digits */
void fp_rshd(fp_int *a, int x);
/* left shift x digits */
void fp_lshd(fp_int *a, int x);
/* signed comparison */
int fp_cmp(fp_int *a, fp_int *b);
/* unsigned comparison */
int fp_cmp_mag(fp_int *a, fp_int *b);
/* power of 2 operations */
void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d);
void fp_mod_2d(fp_int *a, int b, fp_int *c);
void fp_mul_2d(fp_int *a, int b, fp_int *c);
void fp_2expt (fp_int *a, int b);
void fp_mul_2(fp_int *a, fp_int *c);
void fp_div_2(fp_int *a, fp_int *c);
/* Counts the number of lsbs which are zero before the first zero bit */
int fp_cnt_lsb(fp_int *a);
/* c = a + b */
void fp_add(fp_int *a, fp_int *b, fp_int *c);
/* c = a - b */
void fp_sub(fp_int *a, fp_int *b, fp_int *c);
/* c = a * b */
void fp_mul(fp_int *a, fp_int *b, fp_int *c);
/* b = a*a */
void fp_sqr(fp_int *a, fp_int *b);
/* a/b => cb + d == a */
int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
/* c = a mod b, 0 <= c < b */
int fp_mod(fp_int *a, fp_int *b, fp_int *c);
/* compare against a single digit */
int fp_cmp_d(fp_int *a, fp_digit b);
/* c = a + b */
void fp_add_d(fp_int *a, fp_digit b, fp_int *c);
/* c = a - b */
void fp_sub_d(fp_int *a, fp_digit b, fp_int *c);
/* c = a * b */
void fp_mul_d(fp_int *a, fp_digit b, fp_int *c);
/* a/b => cb + d == a */
int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d);
/* c = a mod b, 0 <= c < b */
int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c);
/* ---> number theory <--- */
/* d = a + b (mod c) */
int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
/* d = a - b (mod c) */
int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
/* d = a * b (mod c) */
int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
/* c = a * a (mod b) */
int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c);
/* c = 1/a (mod b) */
int fp_invmod(fp_int *a, fp_int *b, fp_int *c);
/* c = (a, b) */
void fp_gcd(fp_int *a, fp_int *b, fp_int *c);
/* c = [a, b] */
void fp_lcm(fp_int *a, fp_int *b, fp_int *c);
/* setups the montgomery reduction */
int fp_montgomery_setup(fp_int *a, fp_digit *mp);
/* computes a = B**n mod b without division or multiplication useful for
* normalizing numbers in a Montgomery system.
*/
void fp_montgomery_calc_normalization(fp_int *a, fp_int *b);
/* computes x/R == x (mod N) via Montgomery Reduction */
void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp);
/* d = a**b (mod c) */
int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
/* primality stuff */
/* perform a Miller-Rabin test of a to the base b and store result in "result" */
void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result);
#define FP_PRIME_SIZE 256
/* 256 trial divisions + 8 Miller-Rabins, returns FP_YES if probable prime */
int fp_isprime(fp_int *a);
/* extended version of fp_isprime, do 't' Miller-Rabins instead of only 8 */
int fp_isprime_ex(fp_int *a, int t);
/* Primality generation flags */
#define TFM_PRIME_BBS 0x0001 /* BBS style prime */
#define TFM_PRIME_SAFE 0x0002 /* Safe prime (p-1)/2 == prime */
#define TFM_PRIME_2MSB_OFF 0x0004 /* force 2nd MSB to 0 */
#define TFM_PRIME_2MSB_ON 0x0008 /* force 2nd MSB to 1 */
/* callback for fp_prime_random, should fill dst with random bytes and return how many read [upto len] */
typedef int tfm_prime_callback(unsigned char *dst, int len, void *dat);
#define fp_prime_random(a, t, size, bbs, cb, dat) fp_prime_random_ex(a, t, ((size) * 8) + 1, (bbs==1)?TFM_PRIME_BBS:0, cb, dat)
int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback cb, void *dat);
/* radix conersions */
int fp_count_bits(fp_int *a);
int fp_unsigned_bin_size(fp_int *a);
void fp_read_unsigned_bin(fp_int *a, const unsigned char *b, int c);
void fp_to_unsigned_bin(fp_int *a, unsigned char *b);
int fp_signed_bin_size(fp_int *a);
void fp_read_signed_bin(fp_int *a, const unsigned char *b, int c);
void fp_to_signed_bin(fp_int *a, unsigned char *b);
int fp_read_radix(fp_int *a, const char *str, int radix);
int fp_radix_size(fp_int *a, int radix, int *size);
int fp_toradix(fp_int *a, char *str, int radix);
int fp_toradix_n(fp_int * a, char *str, int radix, int maxlen);
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -0,0 +1,125 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#ifndef TFM_PRIVATE_H_
#define TFM_PRIVATE_H_
#include "tfm.h"
/* VARIOUS LOW LEVEL STUFFS */
void s_fp_add(fp_int *a, fp_int *b, fp_int *c);
void s_fp_sub(fp_int *a, fp_int *b, fp_int *c);
void fp_reverse(unsigned char *s, int len);
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C);
#ifdef TFM_SMALL_SET
void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL3
void fp_mul_comba3(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL4
void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL6
void fp_mul_comba6(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL7
void fp_mul_comba7(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL8
void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL9
void fp_mul_comba9(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL12
void fp_mul_comba12(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL17
void fp_mul_comba17(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL20
void fp_mul_comba20(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL24
void fp_mul_comba24(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL28
void fp_mul_comba28(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL32
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL48
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL64
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C);
#endif
void fp_sqr_comba(fp_int *A, fp_int *B);
#ifdef TFM_SMALL_SET
void fp_sqr_comba_small(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR3
void fp_sqr_comba3(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR4
void fp_sqr_comba4(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR6
void fp_sqr_comba6(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR7
void fp_sqr_comba7(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR8
void fp_sqr_comba8(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR9
void fp_sqr_comba9(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR12
void fp_sqr_comba12(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR17
void fp_sqr_comba17(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR20
void fp_sqr_comba20(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR24
void fp_sqr_comba24(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR28
void fp_sqr_comba28(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR32
void fp_sqr_comba32(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR48
void fp_sqr_comba48(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR64
void fp_sqr_comba64(fp_int *A, fp_int *B);
#endif
extern const char *fp_s_rmap;
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */