Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

powmod functions from GMP #193

Merged
merged 13 commits into from
Feb 13, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -1281,13 +1281,13 @@ case $host in
penryn-*-*)
path_64="x86_64/core2/penryn x86_64/core2 x86_64" ;;
nehalem-*-*)
path_64="x86_64/nehalem x86_64" ;;
path_64="x86_64/nehalem x86_64/core2 x86_64" ;;
westmere-*-*)
path_64="x86_64/nehalem/westmere x86_64/nehalem x86_64" ;;
path_64="x86_64/nehalem/westmere x86_64/nehalem x86_64/core2 x86_64" ;;
sandybridge-*-*)
path_64="x86_64/sandybridge x86_64" ;;
path_64="x86_64/sandybridge x86_64/nehalem x86_64/core2 x86_64" ;;
ivybridge-*-*)
path_64="x86_64/sandybridge/ivybridge x86_64/sandybridge x86_64" ;;
path_64="x86_64/sandybridge/ivybridge x86_64/sandybridge x86_64/nehalem x86_64/core2 x86_64" ;;
haswell-*-*)
path_64="x86_64/haswell/avx x86_64/haswell x86_64/sandybridge x86_64" ;;
skylake-*-*)
Expand Down Expand Up @@ -2199,11 +2199,13 @@ gmp_mpn_functions="$extra_functions \
mod_1_1 mod_1_2 mod_1_3 tdiv_q mp_bases fib_table \
mulmid_basecase mulmid mulmid_n toom42_mulmid mulmod_bexpp1 mulmod_2expm1 \
mulmod_2expp1_basecase mul_fft \
mul mul_n mul_basecase sqr_basecase random random2 pow_1 \
mul mul_n mul_basecase sqr_basecase random random2 \
pow_1 powlo powm binvert \
urandomb urandomm randomb rrandom invert \
rootrem sizeinbase sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp perfect_square_p \
bdivmod gcd gcd_1 gcdext tdiv_qr jacobi_base jacobi jacobi_2 get_d \
mullow_n mulhigh_n mullow_n_basecase mullow_basecase redc_1 redc_2 \
mullow_n mulhigh_n mullow_n_basecase mullow_basecase \
redc_1 redc_2 redc_n \
sb_divappr_q toom3_mul toom3_mul_n toom4_mul toom4_mul_n \
dc_div_q dc_divappr_q sb_div_q sb_div_qr dc_div_qr dc_div_qr_n inv_divappr_q_n \
inv_divappr_q inv_div_q inv_div_qr inv_div_qr_n rootrem_basecase \
Expand Down
1 change: 1 addition & 0 deletions gmp-h.in
Original file line number Diff line number Diff line change
Expand Up @@ -1642,6 +1642,7 @@ __GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_

#define mpn_redc_2 __MPN(redc_2)
__GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));

#define mpn_redc_n __MPN(redc_n)
__GMP_DECLSPEC void mpn_redc_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));

Expand Down
59 changes: 54 additions & 5 deletions gmp-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1080,6 +1080,16 @@ __GMP_DECLSPEC int mpn_is_invert __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t))
#define mpn_invert_trunc __MPN(invert_trunc)
__GMP_DECLSPEC void mpn_invert_trunc __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));

#define mpn_binvert __MPN(binvert)
__GMP_DECLSPEC void mpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
#define mpn_binvert_itch __MPN(binvert_itch)
__GMP_DECLSPEC mp_size_t mpn_binvert_itch __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;

#define mpn_powm __MPN(powm)
__GMP_DECLSPEC void mpn_powm __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
#define mpn_powlo __MPN(powlo)
__GMP_DECLSPEC void mpn_powlo __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));

#ifndef mpn_divrem_euclidean_qr_1 /* if not done with cpuvec in a fat binary */
#define mpn_divrem_euclidean_qr_1 __MPN(divrem_euclidean_qr_1)
__GMP_DECLSPEC mp_limb_t mpn_divrem_euclidean_qr_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t,mp_limb_t));
Expand Down Expand Up @@ -2014,6 +2024,23 @@ __GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t);
#define FFT_MULMOD_2EXPP1_CUTOFF 128
#endif

#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2

#ifndef REDC_1_TO_REDC_2_THRESHOLD
#define REDC_1_TO_REDC_2_THRESHOLD 15
#endif
#ifndef REDC_2_TO_REDC_N_THRESHOLD
#define REDC_2_TO_REDC_N_THRESHOLD 100
#endif

#else

#ifndef REDC_1_TO_REDC_N_THRESHOLD
#define REDC_1_TO_REDC_N_THRESHOLD 100
#endif

#endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */

#ifndef DC_DIV_QR_THRESHOLD
#define DC_DIV_QR_THRESHOLD (3 * MUL_KARATSUBA_THRESHOLD)
#endif
Expand Down Expand Up @@ -2714,6 +2741,17 @@ __GMP_DECLSPEC void mpz_out_raw_m __GMP_PROTO((mpir_out_ptr, mpz_srcptr));
} \
} while (0)

#define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp) \
do { \
int __cnt; \
mp_bitcnt_t __totbits; \
ASSERT ((size) > 0); \
ASSERT ((ptr)[(size)-1] != 0); \
count_leading_zeros (__cnt, (ptr)[(size)-1]); \
__totbits = (mp_bitcnt_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \
(result) = (__totbits + (base2exp)-1) / (base2exp); \
} while (0)

/* eliminate mp_bases lookups for base==16 */
#define MPN_SIZEINBASE_16(result, ptr, size) \
do { \
Expand Down Expand Up @@ -4336,6 +4374,10 @@ extern mp_size_t mullow_dc_threshold;
#define MULLOW_MUL_THRESHOLD mullow_mul_threshold
extern mp_size_t mullow_mul_threshold;

#undef MULMID_TOOM42_THRESHOLD
#define MULMID_TOOM42_THRESHOLD mulmid_toom42_threshold
extern mp_size_t mulmid_toom42_threshold;

#undef MULHIGH_BASECASE_THRESHOLD
#define MULHIGH_BASECASE_THRESHOLD mulhigh_basecase_threshold
extern mp_size_t mulhigh_basecase_threshold;
Expand Down Expand Up @@ -4394,11 +4436,6 @@ extern mp_size_t dc_divappr_q_threshold;
#define INV_DIVAPPR_Q_THRESHOLD inv_divappr_q_threshold
extern mp_size_t inv_divappr_q_threshold;


#undef POWM_THRESHOLD
#define POWM_THRESHOLD powm_threshold
extern mp_size_t powm_threshold;

#undef ROOTREM_THRESHOLD
#define ROOTREM_THRESHOLD rootrem_threshold
extern mp_size_t rootrem_threshold;
Expand Down Expand Up @@ -4427,6 +4464,18 @@ extern mp_size_t mod_1_2_threshold;
#define MOD_1_3_THRESHOLD mod_1_3_threshold
extern mp_size_t mod_1_3_threshold;

#undef REDC_1_TO_REDC_2_THRESHOLD
#define REDC_1_TO_REDC_2_THRESHOLD redc_1_to_redc_2_threshold
extern mp_size_t redc_1_to_redc_2_threshold;

#undef REDC_2_TO_REDC_N_THRESHOLD
#define REDC_2_TO_REDC_N_THRESHOLD redc_2_to_redc_n_threshold
extern mp_size_t redc_2_to_redc_n_threshold;

#undef REDC_1_TO_REDC_N_THRESHOLD
#define REDC_1_TO_REDC_N_THRESHOLD redc_1_to_redc_n_threshold
extern mp_size_t redc_1_to_redc_n_threshold;

#undef MATRIX22_STRASSEN_THRESHOLD
#define MATRIX22_STRASSEN_THRESHOLD matrix22_strassen_threshold
extern mp_size_t matrix22_strassen_threshold;
Expand Down
2 changes: 2 additions & 0 deletions mpn/asm-defs.m4
Original file line number Diff line number Diff line change
Expand Up @@ -1359,6 +1359,8 @@ define_mpn(preinv_divrem_1)
define_mpn(preinv_mod_1)
define_mpn(nand_n)
define_mpn(nior_n)
define_mpn(powm)
define_mpn(powlo)
define_mpn(random)
define_mpn(random2)
define_mpn(redc_1)
Expand Down
125 changes: 125 additions & 0 deletions mpn/generic/binvert.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/* Compute {up,n}^(-1) mod B^n.

Contributed to the GNU project by Torbjorn Granlund.

THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.

Copyright (C) 2004-2007, 2009, 2012 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:

* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.

or

* the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any
later version.

or both in parallel, as here.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.

You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library. If not,
see https://www.gnu.org/licenses/. */

#include "mpir.h"
#include "gmp-impl.h"


/*
r[k+1] = r[k] - r[k] * (u*r[k] - 1)
r[k+1] = r[k] + r[k] - r[k]*(u*r[k])
*/

#if TUNE_PROGRAM_BUILD
#define NPOWS \
((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
#else
#define NPOWS \
((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (BINV_NEWTON_THRESHOLD))
#endif

mp_size_t
mpn_binvert_itch (mp_size_t n)
{
mp_size_t itch_local = mpn_mulmod_bnm1_next_size (n);
mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, n, (n + 1) >> 1);
return itch_local + itch_out;
}

void
mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
{
mp_ptr xp;
mp_size_t rn, newrn;
mp_size_t sizes[NPOWS], *sizp;
mp_limb_t di;

/* Compute the computation precisions from highest to lowest, leaving the
base case size in 'rn'. */
sizp = sizes;
for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1)
*sizp++ = rn;

xp = scratch;

/* Compute a base value of rn limbs. */
MPN_ZERO (xp, rn);
xp[0] = 1;
/* JPF: GMP goes the other way around and has renamed to binvert_limb */
modlimb_invert (di, up[0]);
/* JPF: GMP vs MPIR diff; MPIR returns a two limbs overflow and use sub rather than add */
if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD))
mpn_sb_bdiv_q (rp, xp+rn, xp, rn, up, rn, di);
else
mpn_dc_bdiv_q (rp, xp, rn, up, rn, di);

/* Use Newton iterations to get the desired precision. */
if (rn == n)
return;
newrn = *--sizp;
for (; newrn < n;)
{
mp_size_t m;

/* X <- UR. */
m = mpn_mulmod_bnm1_next_size (newrn);
mpn_mulmod_bnm1 (xp, m, up, newrn, rp, rn, xp + m);
mpn_sub_1 (xp + m, xp, rn - (m - newrn), 1);

/* R = R(X/B^rn) */
mpn_mullow_n (rp + rn, rp, xp + rn, newrn - rn);
mpn_neg (rp + rn, rp + rn, newrn - rn);

rn = newrn;
newrn = *--sizp;
}
/* Last iteration would overflow in the mullow call */
{
mp_size_t m;

/* X <- UR. */
m = mpn_mulmod_bnm1_next_size (newrn);
mpn_mulmod_bnm1 (xp, m, up, newrn, rp, rn, xp + m);
mpn_sub_1 (xp + m, xp, rn - (m - newrn), 1);

/* R = R(X/B^rn) */
mpn_mullow_n (xp + newrn, rp, xp + rn, newrn - rn); /* JPF: would overflow */
/* At most we need 2*(newrn - rn) limbs at xp + newrn, so need 3*newrn - 2*rn */
/* As 2rn > newrn, and n == newrn, this gives at max 2*n for xp */
/* which we already ensure */
mpn_neg (rp + rn, xp + newrn, newrn - rn);
}
}
Loading