$search
00001 /* 00002 * Minimal code for RSA support from LibTomMath 0.41 00003 * http://libtom.org/ 00004 * http://libtom.org/files/ltm-0.41.tar.bz2 00005 * This library was released in public domain by Tom St Denis. 00006 * 00007 * The combination in this file may not use all of the optimized algorithms 00008 * from LibTomMath and may be considerable slower than the LibTomMath with its 00009 * default settings. The main purpose of having this version here is to make it 00010 * easier to build bignum.c wrapper without having to install and build an 00011 * external library. 00012 * 00013 * If CONFIG_INTERNAL_LIBTOMMATH is defined, bignum.c includes this 00014 * libtommath.c file instead of using the external LibTomMath library. 00015 */ 00016 00017 #ifndef CHAR_BIT 00018 #define CHAR_BIT 8 00019 #endif 00020 00021 #define BN_MP_INVMOD_C 00022 #define BN_S_MP_EXPTMOD_C /* Note: #undef in tommath_superclass.h; this would 00023 * require BN_MP_EXPTMOD_FAST_C instead */ 00024 #define BN_S_MP_MUL_DIGS_C 00025 #define BN_MP_INVMOD_SLOW_C 00026 #define BN_S_MP_SQR_C 00027 #define BN_S_MP_MUL_HIGH_DIGS_C /* Note: #undef in tommath_superclass.h; this 00028 * would require other than mp_reduce */ 00029 00030 #ifdef LTM_FAST 00031 00032 /* Use faster div at the cost of about 1 kB */ 00033 #define BN_MP_MUL_D_C 00034 00035 /* Include faster exptmod (Montgomery) at the cost of about 2.5 kB in code */ 00036 #define BN_MP_EXPTMOD_FAST_C 00037 #define BN_MP_MONTGOMERY_SETUP_C 00038 #define BN_FAST_MP_MONTGOMERY_REDUCE_C 00039 #define BN_MP_MONTGOMERY_CALC_NORMALIZATION_C 00040 #define BN_MP_MUL_2_C 00041 00042 /* Include faster sqr at the cost of about 0.5 kB in code */ 00043 #define BN_FAST_S_MP_SQR_C 00044 00045 #else /* LTM_FAST */ 00046 00047 #define BN_MP_DIV_SMALL 00048 #define BN_MP_INIT_MULTI_C 00049 #define BN_MP_CLEAR_MULTI_C 00050 #define BN_MP_ABS_C 00051 #endif /* LTM_FAST */ 00052 00053 /* Current uses do not require support for negative exponent in exptmod, so we 00054 * can save about 1.5 kB in leaving out invmod. */ 00055 #define LTM_NO_NEG_EXP 00056 00057 /* from tommath.h */ 00058 00059 #ifndef MIN 00060 #define MIN(x,y) ((x)<(y)?(x):(y)) 00061 #endif 00062 00063 #ifndef MAX 00064 #define MAX(x,y) ((x)>(y)?(x):(y)) 00065 #endif 00066 00067 #define OPT_CAST(x) 00068 00069 typedef unsigned long mp_digit; 00070 typedef u64 mp_word; 00071 00072 #define DIGIT_BIT 28 00073 #define MP_28BIT 00074 00075 00076 #define XMALLOC os_malloc 00077 #define XFREE os_free 00078 #define XREALLOC os_realloc 00079 00080 00081 #define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1)) 00082 00083 #define MP_LT -1 /* less than */ 00084 #define MP_EQ 0 /* equal to */ 00085 #define MP_GT 1 /* greater than */ 00086 00087 #define MP_ZPOS 0 /* positive integer */ 00088 #define MP_NEG 1 /* negative */ 00089 00090 #define MP_OKAY 0 /* ok result */ 00091 #define MP_MEM -2 /* out of mem */ 00092 #define MP_VAL -3 /* invalid input */ 00093 00094 #define MP_YES 1 /* yes response */ 00095 #define MP_NO 0 /* no response */ 00096 00097 typedef int mp_err; 00098 00099 /* define this to use lower memory usage routines (exptmods mostly) */ 00100 #define MP_LOW_MEM 00101 00102 /* default precision */ 00103 #ifndef MP_PREC 00104 #ifndef MP_LOW_MEM 00105 #define MP_PREC 32 /* default digits of precision */ 00106 #else 00107 #define MP_PREC 8 /* default digits of precision */ 00108 #endif 00109 #endif 00110 00111 /* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD - BITS_PER_DIGIT*2) */ 00112 #define MP_WARRAY (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1)) 00113 00114 /* the infamous mp_int structure */ 00115 typedef struct { 00116 int used, alloc, sign; 00117 mp_digit *dp; 00118 } mp_int; 00119 00120 00121 /* ---> Basic Manipulations <--- */ 00122 #define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO) 00123 #define mp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? MP_YES : MP_NO) 00124 #define mp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? MP_YES : MP_NO) 00125 00126 00127 /* prototypes for copied functions */ 00128 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1) 00129 static int s_mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode); 00130 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs); 00131 static int s_mp_sqr(mp_int * a, mp_int * b); 00132 static int s_mp_mul_high_digs(mp_int * a, mp_int * b, mp_int * c, int digs); 00133 00134 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs); 00135 00136 #ifdef BN_MP_INIT_MULTI_C 00137 static int mp_init_multi(mp_int *mp, ...); 00138 #endif 00139 #ifdef BN_MP_CLEAR_MULTI_C 00140 static void mp_clear_multi(mp_int *mp, ...); 00141 #endif 00142 static int mp_lshd(mp_int * a, int b); 00143 static void mp_set(mp_int * a, mp_digit b); 00144 static void mp_clamp(mp_int * a); 00145 static void mp_exch(mp_int * a, mp_int * b); 00146 static void mp_rshd(mp_int * a, int b); 00147 static void mp_zero(mp_int * a); 00148 static int mp_mod_2d(mp_int * a, int b, mp_int * c); 00149 static int mp_div_2d(mp_int * a, int b, mp_int * c, mp_int * d); 00150 static int mp_init_copy(mp_int * a, mp_int * b); 00151 static int mp_mul_2d(mp_int * a, int b, mp_int * c); 00152 #ifndef LTM_NO_NEG_EXP 00153 static int mp_div_2(mp_int * a, mp_int * b); 00154 static int mp_invmod(mp_int * a, mp_int * b, mp_int * c); 00155 static int mp_invmod_slow(mp_int * a, mp_int * b, mp_int * c); 00156 #endif /* LTM_NO_NEG_EXP */ 00157 static int mp_copy(mp_int * a, mp_int * b); 00158 static int mp_count_bits(mp_int * a); 00159 static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d); 00160 static int mp_mod(mp_int * a, mp_int * b, mp_int * c); 00161 static int mp_grow(mp_int * a, int size); 00162 static int mp_cmp_mag(mp_int * a, mp_int * b); 00163 #ifdef BN_MP_ABS_C 00164 static int mp_abs(mp_int * a, mp_int * b); 00165 #endif 00166 static int mp_sqr(mp_int * a, mp_int * b); 00167 static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d); 00168 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d); 00169 static int mp_2expt(mp_int * a, int b); 00170 static int mp_reduce_setup(mp_int * a, mp_int * b); 00171 static int mp_reduce(mp_int * x, mp_int * m, mp_int * mu); 00172 static int mp_init_size(mp_int * a, int size); 00173 #ifdef BN_MP_EXPTMOD_FAST_C 00174 static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode); 00175 #endif /* BN_MP_EXPTMOD_FAST_C */ 00176 #ifdef BN_FAST_S_MP_SQR_C 00177 static int fast_s_mp_sqr (mp_int * a, mp_int * b); 00178 #endif /* BN_FAST_S_MP_SQR_C */ 00179 #ifdef BN_MP_MUL_D_C 00180 static int mp_mul_d (mp_int * a, mp_digit b, mp_int * c); 00181 #endif /* BN_MP_MUL_D_C */ 00182 00183 00184 00185 /* functions from bn_<func name>.c */ 00186 00187 00188 /* reverse an array, used for radix code */ 00189 static void bn_reverse (unsigned char *s, int len) 00190 { 00191 int ix, iy; 00192 unsigned char t; 00193 00194 ix = 0; 00195 iy = len - 1; 00196 while (ix < iy) { 00197 t = s[ix]; 00198 s[ix] = s[iy]; 00199 s[iy] = t; 00200 ++ix; 00201 --iy; 00202 } 00203 } 00204 00205 00206 /* low level addition, based on HAC pp.594, Algorithm 14.7 */ 00207 static int s_mp_add (mp_int * a, mp_int * b, mp_int * c) 00208 { 00209 mp_int *x; 00210 int olduse, res, min, max; 00211 00212 /* find sizes, we let |a| <= |b| which means we have to sort 00213 * them. "x" will point to the input with the most digits 00214 */ 00215 if (a->used > b->used) { 00216 min = b->used; 00217 max = a->used; 00218 x = a; 00219 } else { 00220 min = a->used; 00221 max = b->used; 00222 x = b; 00223 } 00224 00225 /* init result */ 00226 if (c->alloc < max + 1) { 00227 if ((res = mp_grow (c, max + 1)) != MP_OKAY) { 00228 return res; 00229 } 00230 } 00231 00232 /* get old used digit count and set new one */ 00233 olduse = c->used; 00234 c->used = max + 1; 00235 00236 { 00237 register mp_digit u, *tmpa, *tmpb, *tmpc; 00238 register int i; 00239 00240 /* alias for digit pointers */ 00241 00242 /* first input */ 00243 tmpa = a->dp; 00244 00245 /* second input */ 00246 tmpb = b->dp; 00247 00248 /* destination */ 00249 tmpc = c->dp; 00250 00251 /* zero the carry */ 00252 u = 0; 00253 for (i = 0; i < min; i++) { 00254 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ 00255 *tmpc = *tmpa++ + *tmpb++ + u; 00256 00257 /* U = carry bit of T[i] */ 00258 u = *tmpc >> ((mp_digit)DIGIT_BIT); 00259 00260 /* take away carry bit from T[i] */ 00261 *tmpc++ &= MP_MASK; 00262 } 00263 00264 /* now copy higher words if any, that is in A+B 00265 * if A or B has more digits add those in 00266 */ 00267 if (min != max) { 00268 for (; i < max; i++) { 00269 /* T[i] = X[i] + U */ 00270 *tmpc = x->dp[i] + u; 00271 00272 /* U = carry bit of T[i] */ 00273 u = *tmpc >> ((mp_digit)DIGIT_BIT); 00274 00275 /* take away carry bit from T[i] */ 00276 *tmpc++ &= MP_MASK; 00277 } 00278 } 00279 00280 /* add carry */ 00281 *tmpc++ = u; 00282 00283 /* clear digits above oldused */ 00284 for (i = c->used; i < olduse; i++) { 00285 *tmpc++ = 0; 00286 } 00287 } 00288 00289 mp_clamp (c); 00290 return MP_OKAY; 00291 } 00292 00293 00294 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */ 00295 static int s_mp_sub (mp_int * a, mp_int * b, mp_int * c) 00296 { 00297 int olduse, res, min, max; 00298 00299 /* find sizes */ 00300 min = b->used; 00301 max = a->used; 00302 00303 /* init result */ 00304 if (c->alloc < max) { 00305 if ((res = mp_grow (c, max)) != MP_OKAY) { 00306 return res; 00307 } 00308 } 00309 olduse = c->used; 00310 c->used = max; 00311 00312 { 00313 register mp_digit u, *tmpa, *tmpb, *tmpc; 00314 register int i; 00315 00316 /* alias for digit pointers */ 00317 tmpa = a->dp; 00318 tmpb = b->dp; 00319 tmpc = c->dp; 00320 00321 /* set carry to zero */ 00322 u = 0; 00323 for (i = 0; i < min; i++) { 00324 /* T[i] = A[i] - B[i] - U */ 00325 *tmpc = *tmpa++ - *tmpb++ - u; 00326 00327 /* U = carry bit of T[i] 00328 * Note this saves performing an AND operation since 00329 * if a carry does occur it will propagate all the way to the 00330 * MSB. As a result a single shift is enough to get the carry 00331 */ 00332 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); 00333 00334 /* Clear carry from T[i] */ 00335 *tmpc++ &= MP_MASK; 00336 } 00337 00338 /* now copy higher words if any, e.g. if A has more digits than B */ 00339 for (; i < max; i++) { 00340 /* T[i] = A[i] - U */ 00341 *tmpc = *tmpa++ - u; 00342 00343 /* U = carry bit of T[i] */ 00344 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); 00345 00346 /* Clear carry from T[i] */ 00347 *tmpc++ &= MP_MASK; 00348 } 00349 00350 /* clear digits above used (since we may not have grown result above) */ 00351 for (i = c->used; i < olduse; i++) { 00352 *tmpc++ = 0; 00353 } 00354 } 00355 00356 mp_clamp (c); 00357 return MP_OKAY; 00358 } 00359 00360 00361 /* init a new mp_int */ 00362 static int mp_init (mp_int * a) 00363 { 00364 int i; 00365 00366 /* allocate memory required and clear it */ 00367 a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC); 00368 if (a->dp == NULL) { 00369 return MP_MEM; 00370 } 00371 00372 /* set the digits to zero */ 00373 for (i = 0; i < MP_PREC; i++) { 00374 a->dp[i] = 0; 00375 } 00376 00377 /* set the used to zero, allocated digits to the default precision 00378 * and sign to positive */ 00379 a->used = 0; 00380 a->alloc = MP_PREC; 00381 a->sign = MP_ZPOS; 00382 00383 return MP_OKAY; 00384 } 00385 00386 00387 /* clear one (frees) */ 00388 static void mp_clear (mp_int * a) 00389 { 00390 int i; 00391 00392 /* only do anything if a hasn't been freed previously */ 00393 if (a->dp != NULL) { 00394 /* first zero the digits */ 00395 for (i = 0; i < a->used; i++) { 00396 a->dp[i] = 0; 00397 } 00398 00399 /* free ram */ 00400 XFREE(a->dp); 00401 00402 /* reset members to make debugging easier */ 00403 a->dp = NULL; 00404 a->alloc = a->used = 0; 00405 a->sign = MP_ZPOS; 00406 } 00407 } 00408 00409 00410 /* high level addition (handles signs) */ 00411 static int mp_add (mp_int * a, mp_int * b, mp_int * c) 00412 { 00413 int sa, sb, res; 00414 00415 /* get sign of both inputs */ 00416 sa = a->sign; 00417 sb = b->sign; 00418 00419 /* handle two cases, not four */ 00420 if (sa == sb) { 00421 /* both positive or both negative */ 00422 /* add their magnitudes, copy the sign */ 00423 c->sign = sa; 00424 res = s_mp_add (a, b, c); 00425 } else { 00426 /* one positive, the other negative */ 00427 /* subtract the one with the greater magnitude from */ 00428 /* the one of the lesser magnitude. The result gets */ 00429 /* the sign of the one with the greater magnitude. */ 00430 if (mp_cmp_mag (a, b) == MP_LT) { 00431 c->sign = sb; 00432 res = s_mp_sub (b, a, c); 00433 } else { 00434 c->sign = sa; 00435 res = s_mp_sub (a, b, c); 00436 } 00437 } 00438 return res; 00439 } 00440 00441 00442 /* high level subtraction (handles signs) */ 00443 static int mp_sub (mp_int * a, mp_int * b, mp_int * c) 00444 { 00445 int sa, sb, res; 00446 00447 sa = a->sign; 00448 sb = b->sign; 00449 00450 if (sa != sb) { 00451 /* subtract a negative from a positive, OR */ 00452 /* subtract a positive from a negative. */ 00453 /* In either case, ADD their magnitudes, */ 00454 /* and use the sign of the first number. */ 00455 c->sign = sa; 00456 res = s_mp_add (a, b, c); 00457 } else { 00458 /* subtract a positive from a positive, OR */ 00459 /* subtract a negative from a negative. */ 00460 /* First, take the difference between their */ 00461 /* magnitudes, then... */ 00462 if (mp_cmp_mag (a, b) != MP_LT) { 00463 /* Copy the sign from the first */ 00464 c->sign = sa; 00465 /* The first has a larger or equal magnitude */ 00466 res = s_mp_sub (a, b, c); 00467 } else { 00468 /* The result has the *opposite* sign from */ 00469 /* the first number. */ 00470 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS; 00471 /* The second has a larger magnitude */ 00472 res = s_mp_sub (b, a, c); 00473 } 00474 } 00475 return res; 00476 } 00477 00478 00479 /* high level multiplication (handles sign) */ 00480 static int mp_mul (mp_int * a, mp_int * b, mp_int * c) 00481 { 00482 int res, neg; 00483 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 00484 00485 /* use Toom-Cook? */ 00486 #ifdef BN_MP_TOOM_MUL_C 00487 if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) { 00488 res = mp_toom_mul(a, b, c); 00489 } else 00490 #endif 00491 #ifdef BN_MP_KARATSUBA_MUL_C 00492 /* use Karatsuba? */ 00493 if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) { 00494 res = mp_karatsuba_mul (a, b, c); 00495 } else 00496 #endif 00497 { 00498 /* can we use the fast multiplier? 00499 * 00500 * The fast multiplier can be used if the output will 00501 * have less than MP_WARRAY digits and the number of 00502 * digits won't affect carry propagation 00503 */ 00504 #ifdef BN_FAST_S_MP_MUL_DIGS_C 00505 int digs = a->used + b->used + 1; 00506 00507 if ((digs < MP_WARRAY) && 00508 MIN(a->used, b->used) <= 00509 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 00510 res = fast_s_mp_mul_digs (a, b, c, digs); 00511 } else 00512 #endif 00513 #ifdef BN_S_MP_MUL_DIGS_C 00514 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */ 00515 #else 00516 #error mp_mul could fail 00517 res = MP_VAL; 00518 #endif 00519 00520 } 00521 c->sign = (c->used > 0) ? neg : MP_ZPOS; 00522 return res; 00523 } 00524 00525 00526 /* d = a * b (mod c) */ 00527 static int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) 00528 { 00529 int res; 00530 mp_int t; 00531 00532 if ((res = mp_init (&t)) != MP_OKAY) { 00533 return res; 00534 } 00535 00536 if ((res = mp_mul (a, b, &t)) != MP_OKAY) { 00537 mp_clear (&t); 00538 return res; 00539 } 00540 res = mp_mod (&t, c, d); 00541 mp_clear (&t); 00542 return res; 00543 } 00544 00545 00546 /* c = a mod b, 0 <= c < b */ 00547 static int mp_mod (mp_int * a, mp_int * b, mp_int * c) 00548 { 00549 mp_int t; 00550 int res; 00551 00552 if ((res = mp_init (&t)) != MP_OKAY) { 00553 return res; 00554 } 00555 00556 if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) { 00557 mp_clear (&t); 00558 return res; 00559 } 00560 00561 if (t.sign != b->sign) { 00562 res = mp_add (b, &t, c); 00563 } else { 00564 res = MP_OKAY; 00565 mp_exch (&t, c); 00566 } 00567 00568 mp_clear (&t); 00569 return res; 00570 } 00571 00572 00573 /* this is a shell function that calls either the normal or Montgomery 00574 * exptmod functions. Originally the call to the montgomery code was 00575 * embedded in the normal function but that wasted alot of stack space 00576 * for nothing (since 99% of the time the Montgomery code would be called) 00577 */ 00578 static int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) 00579 { 00580 int dr; 00581 00582 /* modulus P must be positive */ 00583 if (P->sign == MP_NEG) { 00584 return MP_VAL; 00585 } 00586 00587 /* if exponent X is negative we have to recurse */ 00588 if (X->sign == MP_NEG) { 00589 #ifdef LTM_NO_NEG_EXP 00590 return MP_VAL; 00591 #else /* LTM_NO_NEG_EXP */ 00592 #ifdef BN_MP_INVMOD_C 00593 mp_int tmpG, tmpX; 00594 int err; 00595 00596 /* first compute 1/G mod P */ 00597 if ((err = mp_init(&tmpG)) != MP_OKAY) { 00598 return err; 00599 } 00600 if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) { 00601 mp_clear(&tmpG); 00602 return err; 00603 } 00604 00605 /* now get |X| */ 00606 if ((err = mp_init(&tmpX)) != MP_OKAY) { 00607 mp_clear(&tmpG); 00608 return err; 00609 } 00610 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) { 00611 mp_clear_multi(&tmpG, &tmpX, NULL); 00612 return err; 00613 } 00614 00615 /* and now compute (1/G)**|X| instead of G**X [X < 0] */ 00616 err = mp_exptmod(&tmpG, &tmpX, P, Y); 00617 mp_clear_multi(&tmpG, &tmpX, NULL); 00618 return err; 00619 #else 00620 #error mp_exptmod would always fail 00621 /* no invmod */ 00622 return MP_VAL; 00623 #endif 00624 #endif /* LTM_NO_NEG_EXP */ 00625 } 00626 00627 /* modified diminished radix reduction */ 00628 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defined(BN_S_MP_EXPTMOD_C) 00629 if (mp_reduce_is_2k_l(P) == MP_YES) { 00630 return s_mp_exptmod(G, X, P, Y, 1); 00631 } 00632 #endif 00633 00634 #ifdef BN_MP_DR_IS_MODULUS_C 00635 /* is it a DR modulus? */ 00636 dr = mp_dr_is_modulus(P); 00637 #else 00638 /* default to no */ 00639 dr = 0; 00640 #endif 00641 00642 #ifdef BN_MP_REDUCE_IS_2K_C 00643 /* if not, is it a unrestricted DR modulus? */ 00644 if (dr == 0) { 00645 dr = mp_reduce_is_2k(P) << 1; 00646 } 00647 #endif 00648 00649 /* if the modulus is odd or dr != 0 use the montgomery method */ 00650 #ifdef BN_MP_EXPTMOD_FAST_C 00651 if (mp_isodd (P) == 1 || dr != 0) { 00652 return mp_exptmod_fast (G, X, P, Y, dr); 00653 } else { 00654 #endif 00655 #ifdef BN_S_MP_EXPTMOD_C 00656 /* otherwise use the generic Barrett reduction technique */ 00657 return s_mp_exptmod (G, X, P, Y, 0); 00658 #else 00659 #error mp_exptmod could fail 00660 /* no exptmod for evens */ 00661 return MP_VAL; 00662 #endif 00663 #ifdef BN_MP_EXPTMOD_FAST_C 00664 } 00665 #endif 00666 } 00667 00668 00669 /* compare two ints (signed)*/ 00670 static int mp_cmp (mp_int * a, mp_int * b) 00671 { 00672 /* compare based on sign */ 00673 if (a->sign != b->sign) { 00674 if (a->sign == MP_NEG) { 00675 return MP_LT; 00676 } else { 00677 return MP_GT; 00678 } 00679 } 00680 00681 /* compare digits */ 00682 if (a->sign == MP_NEG) { 00683 /* if negative compare opposite direction */ 00684 return mp_cmp_mag(b, a); 00685 } else { 00686 return mp_cmp_mag(a, b); 00687 } 00688 } 00689 00690 00691 /* compare a digit */ 00692 static int mp_cmp_d(mp_int * a, mp_digit b) 00693 { 00694 /* compare based on sign */ 00695 if (a->sign == MP_NEG) { 00696 return MP_LT; 00697 } 00698 00699 /* compare based on magnitude */ 00700 if (a->used > 1) { 00701 return MP_GT; 00702 } 00703 00704 /* compare the only digit of a to b */ 00705 if (a->dp[0] > b) { 00706 return MP_GT; 00707 } else if (a->dp[0] < b) { 00708 return MP_LT; 00709 } else { 00710 return MP_EQ; 00711 } 00712 } 00713 00714 00715 #ifndef LTM_NO_NEG_EXP 00716 /* hac 14.61, pp608 */ 00717 static int mp_invmod (mp_int * a, mp_int * b, mp_int * c) 00718 { 00719 /* b cannot be negative */ 00720 if (b->sign == MP_NEG || mp_iszero(b) == 1) { 00721 return MP_VAL; 00722 } 00723 00724 #ifdef BN_FAST_MP_INVMOD_C 00725 /* if the modulus is odd we can use a faster routine instead */ 00726 if (mp_isodd (b) == 1) { 00727 return fast_mp_invmod (a, b, c); 00728 } 00729 #endif 00730 00731 #ifdef BN_MP_INVMOD_SLOW_C 00732 return mp_invmod_slow(a, b, c); 00733 #endif 00734 00735 #ifndef BN_FAST_MP_INVMOD_C 00736 #ifndef BN_MP_INVMOD_SLOW_C 00737 #error mp_invmod would always fail 00738 #endif 00739 #endif 00740 return MP_VAL; 00741 } 00742 #endif /* LTM_NO_NEG_EXP */ 00743 00744 00745 /* get the size for an unsigned equivalent */ 00746 static int mp_unsigned_bin_size (mp_int * a) 00747 { 00748 int size = mp_count_bits (a); 00749 return (size / 8 + ((size & 7) != 0 ? 1 : 0)); 00750 } 00751 00752 00753 #ifndef LTM_NO_NEG_EXP 00754 /* hac 14.61, pp608 */ 00755 static int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c) 00756 { 00757 mp_int x, y, u, v, A, B, C, D; 00758 int res; 00759 00760 /* b cannot be negative */ 00761 if (b->sign == MP_NEG || mp_iszero(b) == 1) { 00762 return MP_VAL; 00763 } 00764 00765 /* init temps */ 00766 if ((res = mp_init_multi(&x, &y, &u, &v, 00767 &A, &B, &C, &D, NULL)) != MP_OKAY) { 00768 return res; 00769 } 00770 00771 /* x = a, y = b */ 00772 if ((res = mp_mod(a, b, &x)) != MP_OKAY) { 00773 goto LBL_ERR; 00774 } 00775 if ((res = mp_copy (b, &y)) != MP_OKAY) { 00776 goto LBL_ERR; 00777 } 00778 00779 /* 2. [modified] if x,y are both even then return an error! */ 00780 if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) { 00781 res = MP_VAL; 00782 goto LBL_ERR; 00783 } 00784 00785 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 00786 if ((res = mp_copy (&x, &u)) != MP_OKAY) { 00787 goto LBL_ERR; 00788 } 00789 if ((res = mp_copy (&y, &v)) != MP_OKAY) { 00790 goto LBL_ERR; 00791 } 00792 mp_set (&A, 1); 00793 mp_set (&D, 1); 00794 00795 top: 00796 /* 4. while u is even do */ 00797 while (mp_iseven (&u) == 1) { 00798 /* 4.1 u = u/2 */ 00799 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { 00800 goto LBL_ERR; 00801 } 00802 /* 4.2 if A or B is odd then */ 00803 if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) { 00804 /* A = (A+y)/2, B = (B-x)/2 */ 00805 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) { 00806 goto LBL_ERR; 00807 } 00808 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { 00809 goto LBL_ERR; 00810 } 00811 } 00812 /* A = A/2, B = B/2 */ 00813 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) { 00814 goto LBL_ERR; 00815 } 00816 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { 00817 goto LBL_ERR; 00818 } 00819 } 00820 00821 /* 5. while v is even do */ 00822 while (mp_iseven (&v) == 1) { 00823 /* 5.1 v = v/2 */ 00824 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { 00825 goto LBL_ERR; 00826 } 00827 /* 5.2 if C or D is odd then */ 00828 if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) { 00829 /* C = (C+y)/2, D = (D-x)/2 */ 00830 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) { 00831 goto LBL_ERR; 00832 } 00833 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { 00834 goto LBL_ERR; 00835 } 00836 } 00837 /* C = C/2, D = D/2 */ 00838 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) { 00839 goto LBL_ERR; 00840 } 00841 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { 00842 goto LBL_ERR; 00843 } 00844 } 00845 00846 /* 6. if u >= v then */ 00847 if (mp_cmp (&u, &v) != MP_LT) { 00848 /* u = u - v, A = A - C, B = B - D */ 00849 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { 00850 goto LBL_ERR; 00851 } 00852 00853 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) { 00854 goto LBL_ERR; 00855 } 00856 00857 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { 00858 goto LBL_ERR; 00859 } 00860 } else { 00861 /* v - v - u, C = C - A, D = D - B */ 00862 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { 00863 goto LBL_ERR; 00864 } 00865 00866 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) { 00867 goto LBL_ERR; 00868 } 00869 00870 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { 00871 goto LBL_ERR; 00872 } 00873 } 00874 00875 /* if not zero goto step 4 */ 00876 if (mp_iszero (&u) == 0) 00877 goto top; 00878 00879 /* now a = C, b = D, gcd == g*v */ 00880 00881 /* if v != 1 then there is no inverse */ 00882 if (mp_cmp_d (&v, 1) != MP_EQ) { 00883 res = MP_VAL; 00884 goto LBL_ERR; 00885 } 00886 00887 /* if its too low */ 00888 while (mp_cmp_d(&C, 0) == MP_LT) { 00889 if ((res = mp_add(&C, b, &C)) != MP_OKAY) { 00890 goto LBL_ERR; 00891 } 00892 } 00893 00894 /* too big */ 00895 while (mp_cmp_mag(&C, b) != MP_LT) { 00896 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) { 00897 goto LBL_ERR; 00898 } 00899 } 00900 00901 /* C is now the inverse */ 00902 mp_exch (&C, c); 00903 res = MP_OKAY; 00904 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL); 00905 return res; 00906 } 00907 #endif /* LTM_NO_NEG_EXP */ 00908 00909 00910 /* compare maginitude of two ints (unsigned) */ 00911 static int mp_cmp_mag (mp_int * a, mp_int * b) 00912 { 00913 int n; 00914 mp_digit *tmpa, *tmpb; 00915 00916 /* compare based on # of non-zero digits */ 00917 if (a->used > b->used) { 00918 return MP_GT; 00919 } 00920 00921 if (a->used < b->used) { 00922 return MP_LT; 00923 } 00924 00925 /* alias for a */ 00926 tmpa = a->dp + (a->used - 1); 00927 00928 /* alias for b */ 00929 tmpb = b->dp + (a->used - 1); 00930 00931 /* compare based on digits */ 00932 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { 00933 if (*tmpa > *tmpb) { 00934 return MP_GT; 00935 } 00936 00937 if (*tmpa < *tmpb) { 00938 return MP_LT; 00939 } 00940 } 00941 return MP_EQ; 00942 } 00943 00944 00945 /* reads a unsigned char array, assumes the msb is stored first [big endian] */ 00946 static int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c) 00947 { 00948 int res; 00949 00950 /* make sure there are at least two digits */ 00951 if (a->alloc < 2) { 00952 if ((res = mp_grow(a, 2)) != MP_OKAY) { 00953 return res; 00954 } 00955 } 00956 00957 /* zero the int */ 00958 mp_zero (a); 00959 00960 /* read the bytes in */ 00961 while (c-- > 0) { 00962 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) { 00963 return res; 00964 } 00965 00966 #ifndef MP_8BIT 00967 a->dp[0] |= *b++; 00968 a->used += 1; 00969 #else 00970 a->dp[0] = (*b & MP_MASK); 00971 a->dp[1] |= ((*b++ >> 7U) & 1); 00972 a->used += 2; 00973 #endif 00974 } 00975 mp_clamp (a); 00976 return MP_OKAY; 00977 } 00978 00979 00980 /* store in unsigned [big endian] format */ 00981 static int mp_to_unsigned_bin (mp_int * a, unsigned char *b) 00982 { 00983 int x, res; 00984 mp_int t; 00985 00986 if ((res = mp_init_copy (&t, a)) != MP_OKAY) { 00987 return res; 00988 } 00989 00990 x = 0; 00991 while (mp_iszero (&t) == 0) { 00992 #ifndef MP_8BIT 00993 b[x++] = (unsigned char) (t.dp[0] & 255); 00994 #else 00995 b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7)); 00996 #endif 00997 if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) { 00998 mp_clear (&t); 00999 return res; 01000 } 01001 } 01002 bn_reverse (b, x); 01003 mp_clear (&t); 01004 return MP_OKAY; 01005 } 01006 01007 01008 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */ 01009 static int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d) 01010 { 01011 mp_digit D, r, rr; 01012 int x, res; 01013 mp_int t; 01014 01015 01016 /* if the shift count is <= 0 then we do no work */ 01017 if (b <= 0) { 01018 res = mp_copy (a, c); 01019 if (d != NULL) { 01020 mp_zero (d); 01021 } 01022 return res; 01023 } 01024 01025 if ((res = mp_init (&t)) != MP_OKAY) { 01026 return res; 01027 } 01028 01029 /* get the remainder */ 01030 if (d != NULL) { 01031 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) { 01032 mp_clear (&t); 01033 return res; 01034 } 01035 } 01036 01037 /* copy */ 01038 if ((res = mp_copy (a, c)) != MP_OKAY) { 01039 mp_clear (&t); 01040 return res; 01041 } 01042 01043 /* shift by as many digits in the bit count */ 01044 if (b >= (int)DIGIT_BIT) { 01045 mp_rshd (c, b / DIGIT_BIT); 01046 } 01047 01048 /* shift any bit count < DIGIT_BIT */ 01049 D = (mp_digit) (b % DIGIT_BIT); 01050 if (D != 0) { 01051 register mp_digit *tmpc, mask, shift; 01052 01053 /* mask */ 01054 mask = (((mp_digit)1) << D) - 1; 01055 01056 /* shift for lsb */ 01057 shift = DIGIT_BIT - D; 01058 01059 /* alias */ 01060 tmpc = c->dp + (c->used - 1); 01061 01062 /* carry */ 01063 r = 0; 01064 for (x = c->used - 1; x >= 0; x--) { 01065 /* get the lower bits of this word in a temp */ 01066 rr = *tmpc & mask; 01067 01068 /* shift the current word and mix in the carry bits from the previous word */ 01069 *tmpc = (*tmpc >> D) | (r << shift); 01070 --tmpc; 01071 01072 /* set the carry to the carry bits of the current word found above */ 01073 r = rr; 01074 } 01075 } 01076 mp_clamp (c); 01077 if (d != NULL) { 01078 mp_exch (&t, d); 01079 } 01080 mp_clear (&t); 01081 return MP_OKAY; 01082 } 01083 01084 01085 static int mp_init_copy (mp_int * a, mp_int * b) 01086 { 01087 int res; 01088 01089 if ((res = mp_init (a)) != MP_OKAY) { 01090 return res; 01091 } 01092 return mp_copy (b, a); 01093 } 01094 01095 01096 /* set to zero */ 01097 static void mp_zero (mp_int * a) 01098 { 01099 int n; 01100 mp_digit *tmp; 01101 01102 a->sign = MP_ZPOS; 01103 a->used = 0; 01104 01105 tmp = a->dp; 01106 for (n = 0; n < a->alloc; n++) { 01107 *tmp++ = 0; 01108 } 01109 } 01110 01111 01112 /* copy, b = a */ 01113 static int mp_copy (mp_int * a, mp_int * b) 01114 { 01115 int res, n; 01116 01117 /* if dst == src do nothing */ 01118 if (a == b) { 01119 return MP_OKAY; 01120 } 01121 01122 /* grow dest */ 01123 if (b->alloc < a->used) { 01124 if ((res = mp_grow (b, a->used)) != MP_OKAY) { 01125 return res; 01126 } 01127 } 01128 01129 /* zero b and copy the parameters over */ 01130 { 01131 register mp_digit *tmpa, *tmpb; 01132 01133 /* pointer aliases */ 01134 01135 /* source */ 01136 tmpa = a->dp; 01137 01138 /* destination */ 01139 tmpb = b->dp; 01140 01141 /* copy all the digits */ 01142 for (n = 0; n < a->used; n++) { 01143 *tmpb++ = *tmpa++; 01144 } 01145 01146 /* clear high digits */ 01147 for (; n < b->used; n++) { 01148 *tmpb++ = 0; 01149 } 01150 } 01151 01152 /* copy used count and sign */ 01153 b->used = a->used; 01154 b->sign = a->sign; 01155 return MP_OKAY; 01156 } 01157 01158 01159 /* shift right a certain amount of digits */ 01160 static void mp_rshd (mp_int * a, int b) 01161 { 01162 int x; 01163 01164 /* if b <= 0 then ignore it */ 01165 if (b <= 0) { 01166 return; 01167 } 01168 01169 /* if b > used then simply zero it and return */ 01170 if (a->used <= b) { 01171 mp_zero (a); 01172 return; 01173 } 01174 01175 { 01176 register mp_digit *bottom, *top; 01177 01178 /* shift the digits down */ 01179 01180 /* bottom */ 01181 bottom = a->dp; 01182 01183 /* top [offset into digits] */ 01184 top = a->dp + b; 01185 01186 /* this is implemented as a sliding window where 01187 * the window is b-digits long and digits from 01188 * the top of the window are copied to the bottom 01189 * 01190 * e.g. 01191 01192 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> 01193 /\ | ----> 01194 \-------------------/ ----> 01195 */ 01196 for (x = 0; x < (a->used - b); x++) { 01197 *bottom++ = *top++; 01198 } 01199 01200 /* zero the top digits */ 01201 for (; x < a->used; x++) { 01202 *bottom++ = 0; 01203 } 01204 } 01205 01206 /* remove excess digits */ 01207 a->used -= b; 01208 } 01209 01210 01211 /* swap the elements of two integers, for cases where you can't simply swap the 01212 * mp_int pointers around 01213 */ 01214 static void mp_exch (mp_int * a, mp_int * b) 01215 { 01216 mp_int t; 01217 01218 t = *a; 01219 *a = *b; 01220 *b = t; 01221 } 01222 01223 01224 /* trim unused digits 01225 * 01226 * This is used to ensure that leading zero digits are 01227 * trimed and the leading "used" digit will be non-zero 01228 * Typically very fast. Also fixes the sign if there 01229 * are no more leading digits 01230 */ 01231 static void mp_clamp (mp_int * a) 01232 { 01233 /* decrease used while the most significant digit is 01234 * zero. 01235 */ 01236 while (a->used > 0 && a->dp[a->used - 1] == 0) { 01237 --(a->used); 01238 } 01239 01240 /* reset the sign flag if used == 0 */ 01241 if (a->used == 0) { 01242 a->sign = MP_ZPOS; 01243 } 01244 } 01245 01246 01247 /* grow as required */ 01248 static int mp_grow (mp_int * a, int size) 01249 { 01250 int i; 01251 mp_digit *tmp; 01252 01253 /* if the alloc size is smaller alloc more ram */ 01254 if (a->alloc < size) { 01255 /* ensure there are always at least MP_PREC digits extra on top */ 01256 size += (MP_PREC * 2) - (size % MP_PREC); 01257 01258 /* reallocate the array a->dp 01259 * 01260 * We store the return in a temporary variable 01261 * in case the operation failed we don't want 01262 * to overwrite the dp member of a. 01263 */ 01264 tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size); 01265 if (tmp == NULL) { 01266 /* reallocation failed but "a" is still valid [can be freed] */ 01267 return MP_MEM; 01268 } 01269 01270 /* reallocation succeeded so set a->dp */ 01271 a->dp = tmp; 01272 01273 /* zero excess digits */ 01274 i = a->alloc; 01275 a->alloc = size; 01276 for (; i < a->alloc; i++) { 01277 a->dp[i] = 0; 01278 } 01279 } 01280 return MP_OKAY; 01281 } 01282 01283 01284 #ifdef BN_MP_ABS_C 01285 /* b = |a| 01286 * 01287 * Simple function copies the input and fixes the sign to positive 01288 */ 01289 static int mp_abs (mp_int * a, mp_int * b) 01290 { 01291 int res; 01292 01293 /* copy a to b */ 01294 if (a != b) { 01295 if ((res = mp_copy (a, b)) != MP_OKAY) { 01296 return res; 01297 } 01298 } 01299 01300 /* force the sign of b to positive */ 01301 b->sign = MP_ZPOS; 01302 01303 return MP_OKAY; 01304 } 01305 #endif 01306 01307 01308 /* set to a digit */ 01309 static void mp_set (mp_int * a, mp_digit b) 01310 { 01311 mp_zero (a); 01312 a->dp[0] = b & MP_MASK; 01313 a->used = (a->dp[0] != 0) ? 1 : 0; 01314 } 01315 01316 01317 #ifndef LTM_NO_NEG_EXP 01318 /* b = a/2 */ 01319 static int mp_div_2(mp_int * a, mp_int * b) 01320 { 01321 int x, res, oldused; 01322 01323 /* copy */ 01324 if (b->alloc < a->used) { 01325 if ((res = mp_grow (b, a->used)) != MP_OKAY) { 01326 return res; 01327 } 01328 } 01329 01330 oldused = b->used; 01331 b->used = a->used; 01332 { 01333 register mp_digit r, rr, *tmpa, *tmpb; 01334 01335 /* source alias */ 01336 tmpa = a->dp + b->used - 1; 01337 01338 /* dest alias */ 01339 tmpb = b->dp + b->used - 1; 01340 01341 /* carry */ 01342 r = 0; 01343 for (x = b->used - 1; x >= 0; x--) { 01344 /* get the carry for the next iteration */ 01345 rr = *tmpa & 1; 01346 01347 /* shift the current digit, add in carry and store */ 01348 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); 01349 01350 /* forward carry to next iteration */ 01351 r = rr; 01352 } 01353 01354 /* zero excess digits */ 01355 tmpb = b->dp + b->used; 01356 for (x = b->used; x < oldused; x++) { 01357 *tmpb++ = 0; 01358 } 01359 } 01360 b->sign = a->sign; 01361 mp_clamp (b); 01362 return MP_OKAY; 01363 } 01364 #endif /* LTM_NO_NEG_EXP */ 01365 01366 01367 /* shift left by a certain bit count */ 01368 static int mp_mul_2d (mp_int * a, int b, mp_int * c) 01369 { 01370 mp_digit d; 01371 int res; 01372 01373 /* copy */ 01374 if (a != c) { 01375 if ((res = mp_copy (a, c)) != MP_OKAY) { 01376 return res; 01377 } 01378 } 01379 01380 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) { 01381 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) { 01382 return res; 01383 } 01384 } 01385 01386 /* shift by as many digits in the bit count */ 01387 if (b >= (int)DIGIT_BIT) { 01388 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) { 01389 return res; 01390 } 01391 } 01392 01393 /* shift any bit count < DIGIT_BIT */ 01394 d = (mp_digit) (b % DIGIT_BIT); 01395 if (d != 0) { 01396 register mp_digit *tmpc, shift, mask, r, rr; 01397 register int x; 01398 01399 /* bitmask for carries */ 01400 mask = (((mp_digit)1) << d) - 1; 01401 01402 /* shift for msbs */ 01403 shift = DIGIT_BIT - d; 01404 01405 /* alias */ 01406 tmpc = c->dp; 01407 01408 /* carry */ 01409 r = 0; 01410 for (x = 0; x < c->used; x++) { 01411 /* get the higher bits of the current word */ 01412 rr = (*tmpc >> shift) & mask; 01413 01414 /* shift the current word and OR in the carry */ 01415 *tmpc = ((*tmpc << d) | r) & MP_MASK; 01416 ++tmpc; 01417 01418 /* set the carry to the carry bits of the current word */ 01419 r = rr; 01420 } 01421 01422 /* set final carry */ 01423 if (r != 0) { 01424 c->dp[(c->used)++] = r; 01425 } 01426 } 01427 mp_clamp (c); 01428 return MP_OKAY; 01429 } 01430 01431 01432 #ifdef BN_MP_INIT_MULTI_C 01433 static int mp_init_multi(mp_int *mp, ...) 01434 { 01435 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */ 01436 int n = 0; /* Number of ok inits */ 01437 mp_int* cur_arg = mp; 01438 va_list args; 01439 01440 va_start(args, mp); /* init args to next argument from caller */ 01441 while (cur_arg != NULL) { 01442 if (mp_init(cur_arg) != MP_OKAY) { 01443 /* Oops - error! Back-track and mp_clear what we already 01444 succeeded in init-ing, then return error. 01445 */ 01446 va_list clean_args; 01447 01448 /* end the current list */ 01449 va_end(args); 01450 01451 /* now start cleaning up */ 01452 cur_arg = mp; 01453 va_start(clean_args, mp); 01454 while (n--) { 01455 mp_clear(cur_arg); 01456 cur_arg = va_arg(clean_args, mp_int*); 01457 } 01458 va_end(clean_args); 01459 res = MP_MEM; 01460 break; 01461 } 01462 n++; 01463 cur_arg = va_arg(args, mp_int*); 01464 } 01465 va_end(args); 01466 return res; /* Assumed ok, if error flagged above. */ 01467 } 01468 #endif 01469 01470 01471 #ifdef BN_MP_CLEAR_MULTI_C 01472 static void mp_clear_multi(mp_int *mp, ...) 01473 { 01474 mp_int* next_mp = mp; 01475 va_list args; 01476 va_start(args, mp); 01477 while (next_mp != NULL) { 01478 mp_clear(next_mp); 01479 next_mp = va_arg(args, mp_int*); 01480 } 01481 va_end(args); 01482 } 01483 #endif 01484 01485 01486 /* shift left a certain amount of digits */ 01487 static int mp_lshd (mp_int * a, int b) 01488 { 01489 int x, res; 01490 01491 /* if its less than zero return */ 01492 if (b <= 0) { 01493 return MP_OKAY; 01494 } 01495 01496 /* grow to fit the new digits */ 01497 if (a->alloc < a->used + b) { 01498 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) { 01499 return res; 01500 } 01501 } 01502 01503 { 01504 register mp_digit *top, *bottom; 01505 01506 /* increment the used by the shift amount then copy upwards */ 01507 a->used += b; 01508 01509 /* top */ 01510 top = a->dp + a->used - 1; 01511 01512 /* base */ 01513 bottom = a->dp + a->used - 1 - b; 01514 01515 /* much like mp_rshd this is implemented using a sliding window 01516 * except the window goes the otherway around. Copying from 01517 * the bottom to the top. see bn_mp_rshd.c for more info. 01518 */ 01519 for (x = a->used - 1; x >= b; x--) { 01520 *top-- = *bottom--; 01521 } 01522 01523 /* zero the lower digits */ 01524 top = a->dp; 01525 for (x = 0; x < b; x++) { 01526 *top++ = 0; 01527 } 01528 } 01529 return MP_OKAY; 01530 } 01531 01532 01533 /* returns the number of bits in an int */ 01534 static int mp_count_bits (mp_int * a) 01535 { 01536 int r; 01537 mp_digit q; 01538 01539 /* shortcut */ 01540 if (a->used == 0) { 01541 return 0; 01542 } 01543 01544 /* get number of digits and add that */ 01545 r = (a->used - 1) * DIGIT_BIT; 01546 01547 /* take the last digit and count the bits in it */ 01548 q = a->dp[a->used - 1]; 01549 while (q > ((mp_digit) 0)) { 01550 ++r; 01551 q >>= ((mp_digit) 1); 01552 } 01553 return r; 01554 } 01555 01556 01557 /* calc a value mod 2**b */ 01558 static int mp_mod_2d (mp_int * a, int b, mp_int * c) 01559 { 01560 int x, res; 01561 01562 /* if b is <= 0 then zero the int */ 01563 if (b <= 0) { 01564 mp_zero (c); 01565 return MP_OKAY; 01566 } 01567 01568 /* if the modulus is larger than the value than return */ 01569 if (b >= (int) (a->used * DIGIT_BIT)) { 01570 res = mp_copy (a, c); 01571 return res; 01572 } 01573 01574 /* copy */ 01575 if ((res = mp_copy (a, c)) != MP_OKAY) { 01576 return res; 01577 } 01578 01579 /* zero digits above the last digit of the modulus */ 01580 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { 01581 c->dp[x] = 0; 01582 } 01583 /* clear the digit that is not completely outside/inside the modulus */ 01584 c->dp[b / DIGIT_BIT] &= 01585 (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1)); 01586 mp_clamp (c); 01587 return MP_OKAY; 01588 } 01589 01590 01591 #ifdef BN_MP_DIV_SMALL 01592 01593 /* slower bit-bang division... also smaller */ 01594 static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d) 01595 { 01596 mp_int ta, tb, tq, q; 01597 int res, n, n2; 01598 01599 /* is divisor zero ? */ 01600 if (mp_iszero (b) == 1) { 01601 return MP_VAL; 01602 } 01603 01604 /* if a < b then q=0, r = a */ 01605 if (mp_cmp_mag (a, b) == MP_LT) { 01606 if (d != NULL) { 01607 res = mp_copy (a, d); 01608 } else { 01609 res = MP_OKAY; 01610 } 01611 if (c != NULL) { 01612 mp_zero (c); 01613 } 01614 return res; 01615 } 01616 01617 /* init our temps */ 01618 if ((res = mp_init_multi(&ta, &tb, &tq, &q, NULL) != MP_OKAY)) { 01619 return res; 01620 } 01621 01622 01623 mp_set(&tq, 1); 01624 n = mp_count_bits(a) - mp_count_bits(b); 01625 if (((res = mp_abs(a, &ta)) != MP_OKAY) || 01626 ((res = mp_abs(b, &tb)) != MP_OKAY) || 01627 ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) || 01628 ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) { 01629 goto LBL_ERR; 01630 } 01631 01632 while (n-- >= 0) { 01633 if (mp_cmp(&tb, &ta) != MP_GT) { 01634 if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) || 01635 ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) { 01636 goto LBL_ERR; 01637 } 01638 } 01639 if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) || 01640 ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) { 01641 goto LBL_ERR; 01642 } 01643 } 01644 01645 /* now q == quotient and ta == remainder */ 01646 n = a->sign; 01647 n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG); 01648 if (c != NULL) { 01649 mp_exch(c, &q); 01650 c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2; 01651 } 01652 if (d != NULL) { 01653 mp_exch(d, &ta); 01654 d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n; 01655 } 01656 LBL_ERR: 01657 mp_clear_multi(&ta, &tb, &tq, &q, NULL); 01658 return res; 01659 } 01660 01661 #else 01662 01663 /* integer signed division. 01664 * c*b + d == a [e.g. a/b, c=quotient, d=remainder] 01665 * HAC pp.598 Algorithm 14.20 01666 * 01667 * Note that the description in HAC is horribly 01668 * incomplete. For example, it doesn't consider 01669 * the case where digits are removed from 'x' in 01670 * the inner loop. It also doesn't consider the 01671 * case that y has fewer than three digits, etc.. 01672 * 01673 * The overall algorithm is as described as 01674 * 14.20 from HAC but fixed to treat these cases. 01675 */ 01676 static int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d) 01677 { 01678 mp_int q, x, y, t1, t2; 01679 int res, n, t, i, norm, neg; 01680 01681 /* is divisor zero ? */ 01682 if (mp_iszero (b) == 1) { 01683 return MP_VAL; 01684 } 01685 01686 /* if a < b then q=0, r = a */ 01687 if (mp_cmp_mag (a, b) == MP_LT) { 01688 if (d != NULL) { 01689 res = mp_copy (a, d); 01690 } else { 01691 res = MP_OKAY; 01692 } 01693 if (c != NULL) { 01694 mp_zero (c); 01695 } 01696 return res; 01697 } 01698 01699 if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) { 01700 return res; 01701 } 01702 q.used = a->used + 2; 01703 01704 if ((res = mp_init (&t1)) != MP_OKAY) { 01705 goto LBL_Q; 01706 } 01707 01708 if ((res = mp_init (&t2)) != MP_OKAY) { 01709 goto LBL_T1; 01710 } 01711 01712 if ((res = mp_init_copy (&x, a)) != MP_OKAY) { 01713 goto LBL_T2; 01714 } 01715 01716 if ((res = mp_init_copy (&y, b)) != MP_OKAY) { 01717 goto LBL_X; 01718 } 01719 01720 /* fix the sign */ 01721 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 01722 x.sign = y.sign = MP_ZPOS; 01723 01724 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ 01725 norm = mp_count_bits(&y) % DIGIT_BIT; 01726 if (norm < (int)(DIGIT_BIT-1)) { 01727 norm = (DIGIT_BIT-1) - norm; 01728 if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) { 01729 goto LBL_Y; 01730 } 01731 if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) { 01732 goto LBL_Y; 01733 } 01734 } else { 01735 norm = 0; 01736 } 01737 01738 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ 01739 n = x.used - 1; 01740 t = y.used - 1; 01741 01742 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ 01743 if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */ 01744 goto LBL_Y; 01745 } 01746 01747 while (mp_cmp (&x, &y) != MP_LT) { 01748 ++(q.dp[n - t]); 01749 if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) { 01750 goto LBL_Y; 01751 } 01752 } 01753 01754 /* reset y by shifting it back down */ 01755 mp_rshd (&y, n - t); 01756 01757 /* step 3. for i from n down to (t + 1) */ 01758 for (i = n; i >= (t + 1); i--) { 01759 if (i > x.used) { 01760 continue; 01761 } 01762 01763 /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 01764 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ 01765 if (x.dp[i] == y.dp[t]) { 01766 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1); 01767 } else { 01768 mp_word tmp; 01769 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT); 01770 tmp |= ((mp_word) x.dp[i - 1]); 01771 tmp /= ((mp_word) y.dp[t]); 01772 if (tmp > (mp_word) MP_MASK) 01773 tmp = MP_MASK; 01774 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK)); 01775 } 01776 01777 /* while (q{i-t-1} * (yt * b + y{t-1})) > 01778 xi * b**2 + xi-1 * b + xi-2 01779 01780 do q{i-t-1} -= 1; 01781 */ 01782 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK; 01783 do { 01784 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK; 01785 01786 /* find left hand */ 01787 mp_zero (&t1); 01788 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; 01789 t1.dp[1] = y.dp[t]; 01790 t1.used = 2; 01791 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) { 01792 goto LBL_Y; 01793 } 01794 01795 /* find right hand */ 01796 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; 01797 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; 01798 t2.dp[2] = x.dp[i]; 01799 t2.used = 3; 01800 } while (mp_cmp_mag(&t1, &t2) == MP_GT); 01801 01802 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ 01803 if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) { 01804 goto LBL_Y; 01805 } 01806 01807 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { 01808 goto LBL_Y; 01809 } 01810 01811 if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) { 01812 goto LBL_Y; 01813 } 01814 01815 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ 01816 if (x.sign == MP_NEG) { 01817 if ((res = mp_copy (&y, &t1)) != MP_OKAY) { 01818 goto LBL_Y; 01819 } 01820 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { 01821 goto LBL_Y; 01822 } 01823 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) { 01824 goto LBL_Y; 01825 } 01826 01827 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK; 01828 } 01829 } 01830 01831 /* now q is the quotient and x is the remainder 01832 * [which we have to normalize] 01833 */ 01834 01835 /* get sign before writing to c */ 01836 x.sign = x.used == 0 ? MP_ZPOS : a->sign; 01837 01838 if (c != NULL) { 01839 mp_clamp (&q); 01840 mp_exch (&q, c); 01841 c->sign = neg; 01842 } 01843 01844 if (d != NULL) { 01845 mp_div_2d (&x, norm, &x, NULL); 01846 mp_exch (&x, d); 01847 } 01848 01849 res = MP_OKAY; 01850 01851 LBL_Y:mp_clear (&y); 01852 LBL_X:mp_clear (&x); 01853 LBL_T2:mp_clear (&t2); 01854 LBL_T1:mp_clear (&t1); 01855 LBL_Q:mp_clear (&q); 01856 return res; 01857 } 01858 01859 #endif 01860 01861 01862 #ifdef MP_LOW_MEM 01863 #define TAB_SIZE 32 01864 #else 01865 #define TAB_SIZE 256 01866 #endif 01867 01868 static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 01869 { 01870 mp_int M[TAB_SIZE], res, mu; 01871 mp_digit buf; 01872 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 01873 int (*redux)(mp_int*,mp_int*,mp_int*); 01874 01875 /* find window size */ 01876 x = mp_count_bits (X); 01877 if (x <= 7) { 01878 winsize = 2; 01879 } else if (x <= 36) { 01880 winsize = 3; 01881 } else if (x <= 140) { 01882 winsize = 4; 01883 } else if (x <= 450) { 01884 winsize = 5; 01885 } else if (x <= 1303) { 01886 winsize = 6; 01887 } else if (x <= 3529) { 01888 winsize = 7; 01889 } else { 01890 winsize = 8; 01891 } 01892 01893 #ifdef MP_LOW_MEM 01894 if (winsize > 5) { 01895 winsize = 5; 01896 } 01897 #endif 01898 01899 /* init M array */ 01900 /* init first cell */ 01901 if ((err = mp_init(&M[1])) != MP_OKAY) { 01902 return err; 01903 } 01904 01905 /* now init the second half of the array */ 01906 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 01907 if ((err = mp_init(&M[x])) != MP_OKAY) { 01908 for (y = 1<<(winsize-1); y < x; y++) { 01909 mp_clear (&M[y]); 01910 } 01911 mp_clear(&M[1]); 01912 return err; 01913 } 01914 } 01915 01916 /* create mu, used for Barrett reduction */ 01917 if ((err = mp_init (&mu)) != MP_OKAY) { 01918 goto LBL_M; 01919 } 01920 01921 if (redmode == 0) { 01922 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) { 01923 goto LBL_MU; 01924 } 01925 redux = mp_reduce; 01926 } else { 01927 if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) { 01928 goto LBL_MU; 01929 } 01930 redux = mp_reduce_2k_l; 01931 } 01932 01933 /* create M table 01934 * 01935 * The M table contains powers of the base, 01936 * e.g. M[x] = G**x mod P 01937 * 01938 * The first half of the table is not 01939 * computed though accept for M[0] and M[1] 01940 */ 01941 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) { 01942 goto LBL_MU; 01943 } 01944 01945 /* compute the value at M[1<<(winsize-1)] by squaring 01946 * M[1] (winsize-1) times 01947 */ 01948 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 01949 goto LBL_MU; 01950 } 01951 01952 for (x = 0; x < (winsize - 1); x++) { 01953 /* square it */ 01954 if ((err = mp_sqr (&M[1 << (winsize - 1)], 01955 &M[1 << (winsize - 1)])) != MP_OKAY) { 01956 goto LBL_MU; 01957 } 01958 01959 /* reduce modulo P */ 01960 if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) { 01961 goto LBL_MU; 01962 } 01963 } 01964 01965 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P) 01966 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1) 01967 */ 01968 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 01969 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 01970 goto LBL_MU; 01971 } 01972 if ((err = redux (&M[x], P, &mu)) != MP_OKAY) { 01973 goto LBL_MU; 01974 } 01975 } 01976 01977 /* setup result */ 01978 if ((err = mp_init (&res)) != MP_OKAY) { 01979 goto LBL_MU; 01980 } 01981 mp_set (&res, 1); 01982 01983 /* set initial mode and bit cnt */ 01984 mode = 0; 01985 bitcnt = 1; 01986 buf = 0; 01987 digidx = X->used - 1; 01988 bitcpy = 0; 01989 bitbuf = 0; 01990 01991 for (;;) { 01992 /* grab next digit as required */ 01993 if (--bitcnt == 0) { 01994 /* if digidx == -1 we are out of digits */ 01995 if (digidx == -1) { 01996 break; 01997 } 01998 /* read next digit and reset the bitcnt */ 01999 buf = X->dp[digidx--]; 02000 bitcnt = (int) DIGIT_BIT; 02001 } 02002 02003 /* grab the next msb from the exponent */ 02004 y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1; 02005 buf <<= (mp_digit)1; 02006 02007 /* if the bit is zero and mode == 0 then we ignore it 02008 * These represent the leading zero bits before the first 1 bit 02009 * in the exponent. Technically this opt is not required but it 02010 * does lower the # of trivial squaring/reductions used 02011 */ 02012 if (mode == 0 && y == 0) { 02013 continue; 02014 } 02015 02016 /* if the bit is zero and mode == 1 then we square */ 02017 if (mode == 1 && y == 0) { 02018 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 02019 goto LBL_RES; 02020 } 02021 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 02022 goto LBL_RES; 02023 } 02024 continue; 02025 } 02026 02027 /* else we add it to the window */ 02028 bitbuf |= (y << (winsize - ++bitcpy)); 02029 mode = 2; 02030 02031 if (bitcpy == winsize) { 02032 /* ok window is filled so square as required and multiply */ 02033 /* square first */ 02034 for (x = 0; x < winsize; x++) { 02035 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 02036 goto LBL_RES; 02037 } 02038 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 02039 goto LBL_RES; 02040 } 02041 } 02042 02043 /* then multiply */ 02044 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { 02045 goto LBL_RES; 02046 } 02047 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 02048 goto LBL_RES; 02049 } 02050 02051 /* empty window and reset */ 02052 bitcpy = 0; 02053 bitbuf = 0; 02054 mode = 1; 02055 } 02056 } 02057 02058 /* if bits remain then square/multiply */ 02059 if (mode == 2 && bitcpy > 0) { 02060 /* square then multiply if the bit is set */ 02061 for (x = 0; x < bitcpy; x++) { 02062 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 02063 goto LBL_RES; 02064 } 02065 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 02066 goto LBL_RES; 02067 } 02068 02069 bitbuf <<= 1; 02070 if ((bitbuf & (1 << winsize)) != 0) { 02071 /* then multiply */ 02072 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { 02073 goto LBL_RES; 02074 } 02075 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 02076 goto LBL_RES; 02077 } 02078 } 02079 } 02080 } 02081 02082 mp_exch (&res, Y); 02083 err = MP_OKAY; 02084 LBL_RES:mp_clear (&res); 02085 LBL_MU:mp_clear (&mu); 02086 LBL_M: 02087 mp_clear(&M[1]); 02088 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 02089 mp_clear (&M[x]); 02090 } 02091 return err; 02092 } 02093 02094 02095 /* computes b = a*a */ 02096 static int mp_sqr (mp_int * a, mp_int * b) 02097 { 02098 int res; 02099 02100 #ifdef BN_MP_TOOM_SQR_C 02101 /* use Toom-Cook? */ 02102 if (a->used >= TOOM_SQR_CUTOFF) { 02103 res = mp_toom_sqr(a, b); 02104 /* Karatsuba? */ 02105 } else 02106 #endif 02107 #ifdef BN_MP_KARATSUBA_SQR_C 02108 if (a->used >= KARATSUBA_SQR_CUTOFF) { 02109 res = mp_karatsuba_sqr (a, b); 02110 } else 02111 #endif 02112 { 02113 #ifdef BN_FAST_S_MP_SQR_C 02114 /* can we use the fast comba multiplier? */ 02115 if ((a->used * 2 + 1) < MP_WARRAY && 02116 a->used < 02117 (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) { 02118 res = fast_s_mp_sqr (a, b); 02119 } else 02120 #endif 02121 #ifdef BN_S_MP_SQR_C 02122 res = s_mp_sqr (a, b); 02123 #else 02124 #error mp_sqr could fail 02125 res = MP_VAL; 02126 #endif 02127 } 02128 b->sign = MP_ZPOS; 02129 return res; 02130 } 02131 02132 02133 /* reduces a modulo n where n is of the form 2**p - d 02134 This differs from reduce_2k since "d" can be larger 02135 than a single digit. 02136 */ 02137 static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d) 02138 { 02139 mp_int q; 02140 int p, res; 02141 02142 if ((res = mp_init(&q)) != MP_OKAY) { 02143 return res; 02144 } 02145 02146 p = mp_count_bits(n); 02147 top: 02148 /* q = a/2**p, a = a mod 2**p */ 02149 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { 02150 goto ERR; 02151 } 02152 02153 /* q = q * d */ 02154 if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { 02155 goto ERR; 02156 } 02157 02158 /* a = a + q */ 02159 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { 02160 goto ERR; 02161 } 02162 02163 if (mp_cmp_mag(a, n) != MP_LT) { 02164 s_mp_sub(a, n, a); 02165 goto top; 02166 } 02167 02168 ERR: 02169 mp_clear(&q); 02170 return res; 02171 } 02172 02173 02174 /* determines the setup value */ 02175 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d) 02176 { 02177 int res; 02178 mp_int tmp; 02179 02180 if ((res = mp_init(&tmp)) != MP_OKAY) { 02181 return res; 02182 } 02183 02184 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { 02185 goto ERR; 02186 } 02187 02188 if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) { 02189 goto ERR; 02190 } 02191 02192 ERR: 02193 mp_clear(&tmp); 02194 return res; 02195 } 02196 02197 02198 /* computes a = 2**b 02199 * 02200 * Simple algorithm which zeroes the int, grows it then just sets one bit 02201 * as required. 02202 */ 02203 static int mp_2expt (mp_int * a, int b) 02204 { 02205 int res; 02206 02207 /* zero a as per default */ 02208 mp_zero (a); 02209 02210 /* grow a to accomodate the single bit */ 02211 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) { 02212 return res; 02213 } 02214 02215 /* set the used count of where the bit will go */ 02216 a->used = b / DIGIT_BIT + 1; 02217 02218 /* put the single bit in its place */ 02219 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT); 02220 02221 return MP_OKAY; 02222 } 02223 02224 02225 /* pre-calculate the value required for Barrett reduction 02226 * For a given modulus "b" it calulates the value required in "a" 02227 */ 02228 static int mp_reduce_setup (mp_int * a, mp_int * b) 02229 { 02230 int res; 02231 02232 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) { 02233 return res; 02234 } 02235 return mp_div (a, b, a, NULL); 02236 } 02237 02238 02239 /* reduces x mod m, assumes 0 < x < m**2, mu is 02240 * precomputed via mp_reduce_setup. 02241 * From HAC pp.604 Algorithm 14.42 02242 */ 02243 static int mp_reduce (mp_int * x, mp_int * m, mp_int * mu) 02244 { 02245 mp_int q; 02246 int res, um = m->used; 02247 02248 /* q = x */ 02249 if ((res = mp_init_copy (&q, x)) != MP_OKAY) { 02250 return res; 02251 } 02252 02253 /* q1 = x / b**(k-1) */ 02254 mp_rshd (&q, um - 1); 02255 02256 /* according to HAC this optimization is ok */ 02257 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { 02258 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) { 02259 goto CLEANUP; 02260 } 02261 } else { 02262 #ifdef BN_S_MP_MUL_HIGH_DIGS_C 02263 if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { 02264 goto CLEANUP; 02265 } 02266 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C) 02267 if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { 02268 goto CLEANUP; 02269 } 02270 #else 02271 { 02272 #error mp_reduce would always fail 02273 res = MP_VAL; 02274 goto CLEANUP; 02275 } 02276 #endif 02277 } 02278 02279 /* q3 = q2 / b**(k+1) */ 02280 mp_rshd (&q, um + 1); 02281 02282 /* x = x mod b**(k+1), quick (no division) */ 02283 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { 02284 goto CLEANUP; 02285 } 02286 02287 /* q = q * m mod b**(k+1), quick (no division) */ 02288 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) { 02289 goto CLEANUP; 02290 } 02291 02292 /* x = x - q */ 02293 if ((res = mp_sub (x, &q, x)) != MP_OKAY) { 02294 goto CLEANUP; 02295 } 02296 02297 /* If x < 0, add b**(k+1) to it */ 02298 if (mp_cmp_d (x, 0) == MP_LT) { 02299 mp_set (&q, 1); 02300 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) { 02301 goto CLEANUP; 02302 } 02303 if ((res = mp_add (x, &q, x)) != MP_OKAY) { 02304 goto CLEANUP; 02305 } 02306 } 02307 02308 /* Back off if it's too big */ 02309 while (mp_cmp (x, m) != MP_LT) { 02310 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) { 02311 goto CLEANUP; 02312 } 02313 } 02314 02315 CLEANUP: 02316 mp_clear (&q); 02317 02318 return res; 02319 } 02320 02321 02322 /* multiplies |a| * |b| and only computes upto digs digits of result 02323 * HAC pp. 595, Algorithm 14.12 Modified so you can control how 02324 * many digits of output are created. 02325 */ 02326 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 02327 { 02328 mp_int t; 02329 int res, pa, pb, ix, iy; 02330 mp_digit u; 02331 mp_word r; 02332 mp_digit tmpx, *tmpt, *tmpy; 02333 02334 /* can we use the fast multiplier? */ 02335 if (((digs) < MP_WARRAY) && 02336 MIN (a->used, b->used) < 02337 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 02338 return fast_s_mp_mul_digs (a, b, c, digs); 02339 } 02340 02341 if ((res = mp_init_size (&t, digs)) != MP_OKAY) { 02342 return res; 02343 } 02344 t.used = digs; 02345 02346 /* compute the digits of the product directly */ 02347 pa = a->used; 02348 for (ix = 0; ix < pa; ix++) { 02349 /* set the carry to zero */ 02350 u = 0; 02351 02352 /* limit ourselves to making digs digits of output */ 02353 pb = MIN (b->used, digs - ix); 02354 02355 /* setup some aliases */ 02356 /* copy of the digit from a used within the nested loop */ 02357 tmpx = a->dp[ix]; 02358 02359 /* an alias for the destination shifted ix places */ 02360 tmpt = t.dp + ix; 02361 02362 /* an alias for the digits of b */ 02363 tmpy = b->dp; 02364 02365 /* compute the columns of the output and propagate the carry */ 02366 for (iy = 0; iy < pb; iy++) { 02367 /* compute the column as a mp_word */ 02368 r = ((mp_word)*tmpt) + 02369 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 02370 ((mp_word) u); 02371 02372 /* the new column is the lower part of the result */ 02373 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 02374 02375 /* get the carry word from the result */ 02376 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 02377 } 02378 /* set carry if it is placed below digs */ 02379 if (ix + iy < digs) { 02380 *tmpt = u; 02381 } 02382 } 02383 02384 mp_clamp (&t); 02385 mp_exch (&t, c); 02386 02387 mp_clear (&t); 02388 return MP_OKAY; 02389 } 02390 02391 02392 /* Fast (comba) multiplier 02393 * 02394 * This is the fast column-array [comba] multiplier. It is 02395 * designed to compute the columns of the product first 02396 * then handle the carries afterwards. This has the effect 02397 * of making the nested loops that compute the columns very 02398 * simple and schedulable on super-scalar processors. 02399 * 02400 * This has been modified to produce a variable number of 02401 * digits of output so if say only a half-product is required 02402 * you don't have to compute the upper half (a feature 02403 * required for fast Barrett reduction). 02404 * 02405 * Based on Algorithm 14.12 on pp.595 of HAC. 02406 * 02407 */ 02408 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 02409 { 02410 int olduse, res, pa, ix, iz; 02411 mp_digit W[MP_WARRAY]; 02412 register mp_word _W; 02413 02414 /* grow the destination as required */ 02415 if (c->alloc < digs) { 02416 if ((res = mp_grow (c, digs)) != MP_OKAY) { 02417 return res; 02418 } 02419 } 02420 02421 /* number of output digits to produce */ 02422 pa = MIN(digs, a->used + b->used); 02423 02424 /* clear the carry */ 02425 _W = 0; 02426 for (ix = 0; ix < pa; ix++) { 02427 int tx, ty; 02428 int iy; 02429 mp_digit *tmpx, *tmpy; 02430 02431 /* get offsets into the two bignums */ 02432 ty = MIN(b->used-1, ix); 02433 tx = ix - ty; 02434 02435 /* setup temp aliases */ 02436 tmpx = a->dp + tx; 02437 tmpy = b->dp + ty; 02438 02439 /* this is the number of times the loop will iterrate, essentially 02440 while (tx++ < a->used && ty-- >= 0) { ... } 02441 */ 02442 iy = MIN(a->used-tx, ty+1); 02443 02444 /* execute loop */ 02445 for (iz = 0; iz < iy; ++iz) { 02446 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 02447 02448 } 02449 02450 /* store term */ 02451 W[ix] = ((mp_digit)_W) & MP_MASK; 02452 02453 /* make next carry */ 02454 _W = _W >> ((mp_word)DIGIT_BIT); 02455 } 02456 02457 /* setup dest */ 02458 olduse = c->used; 02459 c->used = pa; 02460 02461 { 02462 register mp_digit *tmpc; 02463 tmpc = c->dp; 02464 for (ix = 0; ix < pa+1; ix++) { 02465 /* now extract the previous digit [below the carry] */ 02466 *tmpc++ = W[ix]; 02467 } 02468 02469 /* clear unused digits [that existed in the old copy of c] */ 02470 for (; ix < olduse; ix++) { 02471 *tmpc++ = 0; 02472 } 02473 } 02474 mp_clamp (c); 02475 return MP_OKAY; 02476 } 02477 02478 02479 /* init an mp_init for a given size */ 02480 static int mp_init_size (mp_int * a, int size) 02481 { 02482 int x; 02483 02484 /* pad size so there are always extra digits */ 02485 size += (MP_PREC * 2) - (size % MP_PREC); 02486 02487 /* alloc mem */ 02488 a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size); 02489 if (a->dp == NULL) { 02490 return MP_MEM; 02491 } 02492 02493 /* set the members */ 02494 a->used = 0; 02495 a->alloc = size; 02496 a->sign = MP_ZPOS; 02497 02498 /* zero the digits */ 02499 for (x = 0; x < size; x++) { 02500 a->dp[x] = 0; 02501 } 02502 02503 return MP_OKAY; 02504 } 02505 02506 02507 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */ 02508 static int s_mp_sqr (mp_int * a, mp_int * b) 02509 { 02510 mp_int t; 02511 int res, ix, iy, pa; 02512 mp_word r; 02513 mp_digit u, tmpx, *tmpt; 02514 02515 pa = a->used; 02516 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) { 02517 return res; 02518 } 02519 02520 /* default used is maximum possible size */ 02521 t.used = 2*pa + 1; 02522 02523 for (ix = 0; ix < pa; ix++) { 02524 /* first calculate the digit at 2*ix */ 02525 /* calculate double precision result */ 02526 r = ((mp_word) t.dp[2*ix]) + 02527 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); 02528 02529 /* store lower part in result */ 02530 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); 02531 02532 /* get the carry */ 02533 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 02534 02535 /* left hand side of A[ix] * A[iy] */ 02536 tmpx = a->dp[ix]; 02537 02538 /* alias for where to store the results */ 02539 tmpt = t.dp + (2*ix + 1); 02540 02541 for (iy = ix + 1; iy < pa; iy++) { 02542 /* first calculate the product */ 02543 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); 02544 02545 /* now calculate the double precision result, note we use 02546 * addition instead of *2 since it's easier to optimize 02547 */ 02548 r = ((mp_word) *tmpt) + r + r + ((mp_word) u); 02549 02550 /* store lower part */ 02551 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 02552 02553 /* get carry */ 02554 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 02555 } 02556 /* propagate upwards */ 02557 while (u != ((mp_digit) 0)) { 02558 r = ((mp_word) *tmpt) + ((mp_word) u); 02559 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 02560 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 02561 } 02562 } 02563 02564 mp_clamp (&t); 02565 mp_exch (&t, b); 02566 mp_clear (&t); 02567 return MP_OKAY; 02568 } 02569 02570 02571 /* multiplies |a| * |b| and does not compute the lower digs digits 02572 * [meant to get the higher part of the product] 02573 */ 02574 static int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 02575 { 02576 mp_int t; 02577 int res, pa, pb, ix, iy; 02578 mp_digit u; 02579 mp_word r; 02580 mp_digit tmpx, *tmpt, *tmpy; 02581 02582 /* can we use the fast multiplier? */ 02583 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C 02584 if (((a->used + b->used + 1) < MP_WARRAY) 02585 && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 02586 return fast_s_mp_mul_high_digs (a, b, c, digs); 02587 } 02588 #endif 02589 02590 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) { 02591 return res; 02592 } 02593 t.used = a->used + b->used + 1; 02594 02595 pa = a->used; 02596 pb = b->used; 02597 for (ix = 0; ix < pa; ix++) { 02598 /* clear the carry */ 02599 u = 0; 02600 02601 /* left hand side of A[ix] * B[iy] */ 02602 tmpx = a->dp[ix]; 02603 02604 /* alias to the address of where the digits will be stored */ 02605 tmpt = &(t.dp[digs]); 02606 02607 /* alias for where to read the right hand side from */ 02608 tmpy = b->dp + (digs - ix); 02609 02610 for (iy = digs - ix; iy < pb; iy++) { 02611 /* calculate the double precision result */ 02612 r = ((mp_word)*tmpt) + 02613 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 02614 ((mp_word) u); 02615 02616 /* get the lower part */ 02617 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 02618 02619 /* carry the carry */ 02620 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 02621 } 02622 *tmpt = u; 02623 } 02624 mp_clamp (&t); 02625 mp_exch (&t, c); 02626 mp_clear (&t); 02627 return MP_OKAY; 02628 } 02629 02630 02631 #ifdef BN_MP_MONTGOMERY_SETUP_C 02632 /* setups the montgomery reduction stuff */ 02633 static int 02634 mp_montgomery_setup (mp_int * n, mp_digit * rho) 02635 { 02636 mp_digit x, b; 02637 02638 /* fast inversion mod 2**k 02639 * 02640 * Based on the fact that 02641 * 02642 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) 02643 * => 2*X*A - X*X*A*A = 1 02644 * => 2*(1) - (1) = 1 02645 */ 02646 b = n->dp[0]; 02647 02648 if ((b & 1) == 0) { 02649 return MP_VAL; 02650 } 02651 02652 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ 02653 x *= 2 - b * x; /* here x*a==1 mod 2**8 */ 02654 #if !defined(MP_8BIT) 02655 x *= 2 - b * x; /* here x*a==1 mod 2**16 */ 02656 #endif 02657 #if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT)) 02658 x *= 2 - b * x; /* here x*a==1 mod 2**32 */ 02659 #endif 02660 #ifdef MP_64BIT 02661 x *= 2 - b * x; /* here x*a==1 mod 2**64 */ 02662 #endif 02663 02664 /* rho = -1/m mod b */ 02665 *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK; 02666 02667 return MP_OKAY; 02668 } 02669 #endif 02670 02671 02672 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C 02673 /* computes xR**-1 == x (mod N) via Montgomery Reduction 02674 * 02675 * This is an optimized implementation of montgomery_reduce 02676 * which uses the comba method to quickly calculate the columns of the 02677 * reduction. 02678 * 02679 * Based on Algorithm 14.32 on pp.601 of HAC. 02680 */ 02681 int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) 02682 { 02683 int ix, res, olduse; 02684 mp_word W[MP_WARRAY]; 02685 02686 /* get old used count */ 02687 olduse = x->used; 02688 02689 /* grow a as required */ 02690 if (x->alloc < n->used + 1) { 02691 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) { 02692 return res; 02693 } 02694 } 02695 02696 /* first we have to get the digits of the input into 02697 * an array of double precision words W[...] 02698 */ 02699 { 02700 register mp_word *_W; 02701 register mp_digit *tmpx; 02702 02703 /* alias for the W[] array */ 02704 _W = W; 02705 02706 /* alias for the digits of x*/ 02707 tmpx = x->dp; 02708 02709 /* copy the digits of a into W[0..a->used-1] */ 02710 for (ix = 0; ix < x->used; ix++) { 02711 *_W++ = *tmpx++; 02712 } 02713 02714 /* zero the high words of W[a->used..m->used*2] */ 02715 for (; ix < n->used * 2 + 1; ix++) { 02716 *_W++ = 0; 02717 } 02718 } 02719 02720 /* now we proceed to zero successive digits 02721 * from the least significant upwards 02722 */ 02723 for (ix = 0; ix < n->used; ix++) { 02724 /* mu = ai * m' mod b 02725 * 02726 * We avoid a double precision multiplication (which isn't required) 02727 * by casting the value down to a mp_digit. Note this requires 02728 * that W[ix-1] have the carry cleared (see after the inner loop) 02729 */ 02730 register mp_digit mu; 02731 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); 02732 02733 /* a = a + mu * m * b**i 02734 * 02735 * This is computed in place and on the fly. The multiplication 02736 * by b**i is handled by offseting which columns the results 02737 * are added to. 02738 * 02739 * Note the comba method normally doesn't handle carries in the 02740 * inner loop In this case we fix the carry from the previous 02741 * column since the Montgomery reduction requires digits of the 02742 * result (so far) [see above] to work. This is 02743 * handled by fixing up one carry after the inner loop. The 02744 * carry fixups are done in order so after these loops the 02745 * first m->used words of W[] have the carries fixed 02746 */ 02747 { 02748 register int iy; 02749 register mp_digit *tmpn; 02750 register mp_word *_W; 02751 02752 /* alias for the digits of the modulus */ 02753 tmpn = n->dp; 02754 02755 /* Alias for the columns set by an offset of ix */ 02756 _W = W + ix; 02757 02758 /* inner loop */ 02759 for (iy = 0; iy < n->used; iy++) { 02760 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); 02761 } 02762 } 02763 02764 /* now fix carry for next digit, W[ix+1] */ 02765 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); 02766 } 02767 02768 /* now we have to propagate the carries and 02769 * shift the words downward [all those least 02770 * significant digits we zeroed]. 02771 */ 02772 { 02773 register mp_digit *tmpx; 02774 register mp_word *_W, *_W1; 02775 02776 /* nox fix rest of carries */ 02777 02778 /* alias for current word */ 02779 _W1 = W + ix; 02780 02781 /* alias for next word, where the carry goes */ 02782 _W = W + ++ix; 02783 02784 for (; ix <= n->used * 2 + 1; ix++) { 02785 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); 02786 } 02787 02788 /* copy out, A = A/b**n 02789 * 02790 * The result is A/b**n but instead of converting from an 02791 * array of mp_word to mp_digit than calling mp_rshd 02792 * we just copy them in the right order 02793 */ 02794 02795 /* alias for destination word */ 02796 tmpx = x->dp; 02797 02798 /* alias for shifted double precision result */ 02799 _W = W + n->used; 02800 02801 for (ix = 0; ix < n->used + 1; ix++) { 02802 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); 02803 } 02804 02805 /* zero oldused digits, if the input a was larger than 02806 * m->used+1 we'll have to clear the digits 02807 */ 02808 for (; ix < olduse; ix++) { 02809 *tmpx++ = 0; 02810 } 02811 } 02812 02813 /* set the max used and clamp */ 02814 x->used = n->used + 1; 02815 mp_clamp (x); 02816 02817 /* if A >= m then A = A - m */ 02818 if (mp_cmp_mag (x, n) != MP_LT) { 02819 return s_mp_sub (x, n, x); 02820 } 02821 return MP_OKAY; 02822 } 02823 #endif 02824 02825 02826 #ifdef BN_MP_MUL_2_C 02827 /* b = a*2 */ 02828 static int mp_mul_2(mp_int * a, mp_int * b) 02829 { 02830 int x, res, oldused; 02831 02832 /* grow to accomodate result */ 02833 if (b->alloc < a->used + 1) { 02834 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) { 02835 return res; 02836 } 02837 } 02838 02839 oldused = b->used; 02840 b->used = a->used; 02841 02842 { 02843 register mp_digit r, rr, *tmpa, *tmpb; 02844 02845 /* alias for source */ 02846 tmpa = a->dp; 02847 02848 /* alias for dest */ 02849 tmpb = b->dp; 02850 02851 /* carry */ 02852 r = 0; 02853 for (x = 0; x < a->used; x++) { 02854 02855 /* get what will be the *next* carry bit from the 02856 * MSB of the current digit 02857 */ 02858 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1)); 02859 02860 /* now shift up this digit, add in the carry [from the previous] */ 02861 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK; 02862 02863 /* copy the carry that would be from the source 02864 * digit into the next iteration 02865 */ 02866 r = rr; 02867 } 02868 02869 /* new leading digit? */ 02870 if (r != 0) { 02871 /* add a MSB which is always 1 at this point */ 02872 *tmpb = 1; 02873 ++(b->used); 02874 } 02875 02876 /* now zero any excess digits on the destination 02877 * that we didn't write to 02878 */ 02879 tmpb = b->dp + b->used; 02880 for (x = b->used; x < oldused; x++) { 02881 *tmpb++ = 0; 02882 } 02883 } 02884 b->sign = a->sign; 02885 return MP_OKAY; 02886 } 02887 #endif 02888 02889 02890 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C 02891 /* 02892 * shifts with subtractions when the result is greater than b. 02893 * 02894 * The method is slightly modified to shift B unconditionally upto just under 02895 * the leading bit of b. This saves alot of multiple precision shifting. 02896 */ 02897 static int mp_montgomery_calc_normalization (mp_int * a, mp_int * b) 02898 { 02899 int x, bits, res; 02900 02901 /* how many bits of last digit does b use */ 02902 bits = mp_count_bits (b) % DIGIT_BIT; 02903 02904 if (b->used > 1) { 02905 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) { 02906 return res; 02907 } 02908 } else { 02909 mp_set(a, 1); 02910 bits = 1; 02911 } 02912 02913 02914 /* now compute C = A * B mod b */ 02915 for (x = bits - 1; x < (int)DIGIT_BIT; x++) { 02916 if ((res = mp_mul_2 (a, a)) != MP_OKAY) { 02917 return res; 02918 } 02919 if (mp_cmp_mag (a, b) != MP_LT) { 02920 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) { 02921 return res; 02922 } 02923 } 02924 } 02925 02926 return MP_OKAY; 02927 } 02928 #endif 02929 02930 02931 #ifdef BN_MP_EXPTMOD_FAST_C 02932 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 02933 * 02934 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. 02935 * The value of k changes based on the size of the exponent. 02936 * 02937 * Uses Montgomery or Diminished Radix reduction [whichever appropriate] 02938 */ 02939 02940 static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 02941 { 02942 mp_int M[TAB_SIZE], res; 02943 mp_digit buf, mp; 02944 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 02945 02946 /* use a pointer to the reduction algorithm. This allows us to use 02947 * one of many reduction algorithms without modding the guts of 02948 * the code with if statements everywhere. 02949 */ 02950 int (*redux)(mp_int*,mp_int*,mp_digit); 02951 02952 /* find window size */ 02953 x = mp_count_bits (X); 02954 if (x <= 7) { 02955 winsize = 2; 02956 } else if (x <= 36) { 02957 winsize = 3; 02958 } else if (x <= 140) { 02959 winsize = 4; 02960 } else if (x <= 450) { 02961 winsize = 5; 02962 } else if (x <= 1303) { 02963 winsize = 6; 02964 } else if (x <= 3529) { 02965 winsize = 7; 02966 } else { 02967 winsize = 8; 02968 } 02969 02970 #ifdef MP_LOW_MEM 02971 if (winsize > 5) { 02972 winsize = 5; 02973 } 02974 #endif 02975 02976 /* init M array */ 02977 /* init first cell */ 02978 if ((err = mp_init(&M[1])) != MP_OKAY) { 02979 return err; 02980 } 02981 02982 /* now init the second half of the array */ 02983 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 02984 if ((err = mp_init(&M[x])) != MP_OKAY) { 02985 for (y = 1<<(winsize-1); y < x; y++) { 02986 mp_clear (&M[y]); 02987 } 02988 mp_clear(&M[1]); 02989 return err; 02990 } 02991 } 02992 02993 /* determine and setup reduction code */ 02994 if (redmode == 0) { 02995 #ifdef BN_MP_MONTGOMERY_SETUP_C 02996 /* now setup montgomery */ 02997 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { 02998 goto LBL_M; 02999 } 03000 #else 03001 err = MP_VAL; 03002 goto LBL_M; 03003 #endif 03004 03005 /* automatically pick the comba one if available (saves quite a few calls/ifs) */ 03006 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C 03007 if (((P->used * 2 + 1) < MP_WARRAY) && 03008 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 03009 redux = fast_mp_montgomery_reduce; 03010 } else 03011 #endif 03012 { 03013 #ifdef BN_MP_MONTGOMERY_REDUCE_C 03014 /* use slower baseline Montgomery method */ 03015 redux = mp_montgomery_reduce; 03016 #else 03017 err = MP_VAL; 03018 goto LBL_M; 03019 #endif 03020 } 03021 } else if (redmode == 1) { 03022 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C) 03023 /* setup DR reduction for moduli of the form B**k - b */ 03024 mp_dr_setup(P, &mp); 03025 redux = mp_dr_reduce; 03026 #else 03027 err = MP_VAL; 03028 goto LBL_M; 03029 #endif 03030 } else { 03031 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C) 03032 /* setup DR reduction for moduli of the form 2**k - b */ 03033 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { 03034 goto LBL_M; 03035 } 03036 redux = mp_reduce_2k; 03037 #else 03038 err = MP_VAL; 03039 goto LBL_M; 03040 #endif 03041 } 03042 03043 /* setup result */ 03044 if ((err = mp_init (&res)) != MP_OKAY) { 03045 goto LBL_M; 03046 } 03047 03048 /* create M table 03049 * 03050 03051 * 03052 * The first half of the table is not computed though accept for M[0] and M[1] 03053 */ 03054 03055 if (redmode == 0) { 03056 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C 03057 /* now we need R mod m */ 03058 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) { 03059 goto LBL_RES; 03060 } 03061 #else 03062 err = MP_VAL; 03063 goto LBL_RES; 03064 #endif 03065 03066 /* now set M[1] to G * R mod m */ 03067 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) { 03068 goto LBL_RES; 03069 } 03070 } else { 03071 mp_set(&res, 1); 03072 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) { 03073 goto LBL_RES; 03074 } 03075 } 03076 03077 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ 03078 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 03079 goto LBL_RES; 03080 } 03081 03082 for (x = 0; x < (winsize - 1); x++) { 03083 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { 03084 goto LBL_RES; 03085 } 03086 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { 03087 goto LBL_RES; 03088 } 03089 } 03090 03091 /* create upper table */ 03092 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 03093 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 03094 goto LBL_RES; 03095 } 03096 if ((err = redux (&M[x], P, mp)) != MP_OKAY) { 03097 goto LBL_RES; 03098 } 03099 } 03100 03101 /* set initial mode and bit cnt */ 03102 mode = 0; 03103 bitcnt = 1; 03104 buf = 0; 03105 digidx = X->used - 1; 03106 bitcpy = 0; 03107 bitbuf = 0; 03108 03109 for (;;) { 03110 /* grab next digit as required */ 03111 if (--bitcnt == 0) { 03112 /* if digidx == -1 we are out of digits so break */ 03113 if (digidx == -1) { 03114 break; 03115 } 03116 /* read next digit and reset bitcnt */ 03117 buf = X->dp[digidx--]; 03118 bitcnt = (int)DIGIT_BIT; 03119 } 03120 03121 /* grab the next msb from the exponent */ 03122 y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1; 03123 buf <<= (mp_digit)1; 03124 03125 /* if the bit is zero and mode == 0 then we ignore it 03126 * These represent the leading zero bits before the first 1 bit 03127 * in the exponent. Technically this opt is not required but it 03128 * does lower the # of trivial squaring/reductions used 03129 */ 03130 if (mode == 0 && y == 0) { 03131 continue; 03132 } 03133 03134 /* if the bit is zero and mode == 1 then we square */ 03135 if (mode == 1 && y == 0) { 03136 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 03137 goto LBL_RES; 03138 } 03139 if ((err = redux (&res, P, mp)) != MP_OKAY) { 03140 goto LBL_RES; 03141 } 03142 continue; 03143 } 03144 03145 /* else we add it to the window */ 03146 bitbuf |= (y << (winsize - ++bitcpy)); 03147 mode = 2; 03148 03149 if (bitcpy == winsize) { 03150 /* ok window is filled so square as required and multiply */ 03151 /* square first */ 03152 for (x = 0; x < winsize; x++) { 03153 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 03154 goto LBL_RES; 03155 } 03156 if ((err = redux (&res, P, mp)) != MP_OKAY) { 03157 goto LBL_RES; 03158 } 03159 } 03160 03161 /* then multiply */ 03162 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { 03163 goto LBL_RES; 03164 } 03165 if ((err = redux (&res, P, mp)) != MP_OKAY) { 03166 goto LBL_RES; 03167 } 03168 03169 /* empty window and reset */ 03170 bitcpy = 0; 03171 bitbuf = 0; 03172 mode = 1; 03173 } 03174 } 03175 03176 /* if bits remain then square/multiply */ 03177 if (mode == 2 && bitcpy > 0) { 03178 /* square then multiply if the bit is set */ 03179 for (x = 0; x < bitcpy; x++) { 03180 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 03181 goto LBL_RES; 03182 } 03183 if ((err = redux (&res, P, mp)) != MP_OKAY) { 03184 goto LBL_RES; 03185 } 03186 03187 /* get next bit of the window */ 03188 bitbuf <<= 1; 03189 if ((bitbuf & (1 << winsize)) != 0) { 03190 /* then multiply */ 03191 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { 03192 goto LBL_RES; 03193 } 03194 if ((err = redux (&res, P, mp)) != MP_OKAY) { 03195 goto LBL_RES; 03196 } 03197 } 03198 } 03199 } 03200 03201 if (redmode == 0) { 03202 /* fixup result if Montgomery reduction is used 03203 * recall that any value in a Montgomery system is 03204 * actually multiplied by R mod n. So we have 03205 * to reduce one more time to cancel out the factor 03206 * of R. 03207 */ 03208 if ((err = redux(&res, P, mp)) != MP_OKAY) { 03209 goto LBL_RES; 03210 } 03211 } 03212 03213 /* swap res with Y */ 03214 mp_exch (&res, Y); 03215 err = MP_OKAY; 03216 LBL_RES:mp_clear (&res); 03217 LBL_M: 03218 mp_clear(&M[1]); 03219 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 03220 mp_clear (&M[x]); 03221 } 03222 return err; 03223 } 03224 #endif 03225 03226 03227 #ifdef BN_FAST_S_MP_SQR_C 03228 /* the jist of squaring... 03229 * you do like mult except the offset of the tmpx [one that 03230 * starts closer to zero] can't equal the offset of tmpy. 03231 * So basically you set up iy like before then you min it with 03232 * (ty-tx) so that it never happens. You double all those 03233 * you add in the inner loop 03234 03235 After that loop you do the squares and add them in. 03236 */ 03237 03238 static int fast_s_mp_sqr (mp_int * a, mp_int * b) 03239 { 03240 int olduse, res, pa, ix, iz; 03241 mp_digit W[MP_WARRAY], *tmpx; 03242 mp_word W1; 03243 03244 /* grow the destination as required */ 03245 pa = a->used + a->used; 03246 if (b->alloc < pa) { 03247 if ((res = mp_grow (b, pa)) != MP_OKAY) { 03248 return res; 03249 } 03250 } 03251 03252 /* number of output digits to produce */ 03253 W1 = 0; 03254 for (ix = 0; ix < pa; ix++) { 03255 int tx, ty, iy; 03256 mp_word _W; 03257 mp_digit *tmpy; 03258 03259 /* clear counter */ 03260 _W = 0; 03261 03262 /* get offsets into the two bignums */ 03263 ty = MIN(a->used-1, ix); 03264 tx = ix - ty; 03265 03266 /* setup temp aliases */ 03267 tmpx = a->dp + tx; 03268 tmpy = a->dp + ty; 03269 03270 /* this is the number of times the loop will iterrate, essentially 03271 while (tx++ < a->used && ty-- >= 0) { ... } 03272 */ 03273 iy = MIN(a->used-tx, ty+1); 03274 03275 /* now for squaring tx can never equal ty 03276 * we halve the distance since they approach at a rate of 2x 03277 * and we have to round because odd cases need to be executed 03278 */ 03279 iy = MIN(iy, (ty-tx+1)>>1); 03280 03281 /* execute loop */ 03282 for (iz = 0; iz < iy; iz++) { 03283 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 03284 } 03285 03286 /* double the inner product and add carry */ 03287 _W = _W + _W + W1; 03288 03289 /* even columns have the square term in them */ 03290 if ((ix&1) == 0) { 03291 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]); 03292 } 03293 03294 /* store it */ 03295 W[ix] = (mp_digit)(_W & MP_MASK); 03296 03297 /* make next carry */ 03298 W1 = _W >> ((mp_word)DIGIT_BIT); 03299 } 03300 03301 /* setup dest */ 03302 olduse = b->used; 03303 b->used = a->used+a->used; 03304 03305 { 03306 mp_digit *tmpb; 03307 tmpb = b->dp; 03308 for (ix = 0; ix < pa; ix++) { 03309 *tmpb++ = W[ix] & MP_MASK; 03310 } 03311 03312 /* clear unused digits [that existed in the old copy of c] */ 03313 for (; ix < olduse; ix++) { 03314 *tmpb++ = 0; 03315 } 03316 } 03317 mp_clamp (b); 03318 return MP_OKAY; 03319 } 03320 #endif 03321 03322 03323 #ifdef BN_MP_MUL_D_C 03324 /* multiply by a digit */ 03325 static int 03326 mp_mul_d (mp_int * a, mp_digit b, mp_int * c) 03327 { 03328 mp_digit u, *tmpa, *tmpc; 03329 mp_word r; 03330 int ix, res, olduse; 03331 03332 /* make sure c is big enough to hold a*b */ 03333 if (c->alloc < a->used + 1) { 03334 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) { 03335 return res; 03336 } 03337 } 03338 03339 /* get the original destinations used count */ 03340 olduse = c->used; 03341 03342 /* set the sign */ 03343 c->sign = a->sign; 03344 03345 /* alias for a->dp [source] */ 03346 tmpa = a->dp; 03347 03348 /* alias for c->dp [dest] */ 03349 tmpc = c->dp; 03350 03351 /* zero carry */ 03352 u = 0; 03353 03354 /* compute columns */ 03355 for (ix = 0; ix < a->used; ix++) { 03356 /* compute product and carry sum for this term */ 03357 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b); 03358 03359 /* mask off higher bits to get a single digit */ 03360 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); 03361 03362 /* send carry into next iteration */ 03363 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 03364 } 03365 03366 /* store final carry [if any] and increment ix offset */ 03367 *tmpc++ = u; 03368 ++ix; 03369 03370 /* now zero digits above the top */ 03371 while (ix++ < olduse) { 03372 *tmpc++ = 0; 03373 } 03374 03375 /* set used count */ 03376 c->used = a->used + 1; 03377 mp_clamp(c); 03378 03379 return MP_OKAY; 03380 } 03381 #endif