| Bill Allombert on Sun, 11 Jan 2004 19:22:15 +0100 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
| Re: Flx_sqr/sqri comparaison |
On Sun, Jan 11, 2004 at 01:14:34PM +0100, Bill Allombert wrote: > Hello PARI-dev, > I have a plan to reuse GMP fast multiplication in Flx_sqr. Well, here a prototype patch that implement that. Dues to the bench above it is also worthwhile to do it with the PARI kernel. This patch deserve some tuning. Here the results : i : old : PARI : GMP 1 : 120 : 140 : 170 2 : 80 : 70 : 80 3 : 80 : 50 : 50 4 : 90 : 60 : 70 5 : 130 : 70 : 100 6 : 190 : 110 : 120 7 : 300 : 150 : 160 8 : 460 : 230 : 280 9 : 700 : 340 : 370 10 : 1060 : 490 : 560 11 : 1620 : 750 : 780 12 : 2450 : 1120 : 960 13 : 3690 : 1690 : 1000 14 : 5600 : 2560 : 1140 15 : 8420 : 3830 : 1220 16 : 12670 : 5780 : 1540 I am still puzzled by straight Flx_sqr being so slow. Cheers, Bill
Index: src/basemath/Flx.c
===================================================================
RCS file: /home/cvs/pari/src/basemath/Flx.c,v
retrieving revision 1.10
diff -u -r1.10 Flx.c
--- src/basemath/Flx.c 11 Jan 2004 00:21:04 -0000 1.10
+++ src/basemath/Flx.c 11 Jan 2004 17:35:14 -0000
@@ -14,6 +14,8 @@
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#include "pari.h"
+GEN muliispec(GEN x, GEN y, long nx, long ny);
+GEN sqrispec(GEN x, long nx);
/* Not so fast arithmetic with polynomials with small coefficients. */
@@ -210,15 +212,25 @@
return x;
}
+/*Do not renormalize. Must not use z[0],z[1]*/
+static GEN
+Flx_red_lg_i(GEN z, long l, ulong p)
+{
+ long i;
+ ulong *y=(ulong *)z;
+ GEN x = cgetg(l, t_VECSMALL);
+ for (i=2; i<l; i++)
+ x[i] = (long) y[i]%p;
+ return x;
+}
+
GEN
Flx_red(GEN z, ulong p)
{
- long i,l;
- GEN x;
- l = lg(z); x = cgetg(l, t_VECSMALL);
- for (i=2; i<l; i++)
- x[i] = z[i]%p;
- x[1] = z[1]; return Flx_renormalize(x,l);
+ long l = lg(z);
+ GEN x = Flx_red_lg_i(z,l,p);
+ x[1] = z[1];
+ return Flx_renormalize(x,l);
}
GEN
@@ -356,6 +368,18 @@
return y;
}
+static long
+maxlenghtcoeffpol (ulong p, long n)
+{
+ pari_sp ltop=avma;
+ long l;
+ GEN z=muluu(p-1,p-1);
+ z=mulis(z,n);
+ l=lgefint(z)-2;
+ avma=ltop;
+ return l;
+}
+
INLINE ulong
Flx_mullimb_ok(GEN x, GEN y, ulong p, long a, long b)
{ /* Assume OK_ULONG*/
@@ -367,7 +391,8 @@
p1 += y[i] * x[-i];
if (p1 & HIGHBIT) p1 %= p;
}
- return p1 % p;
+ if (p1>=p) p1 %= p;
+ return p1;
}
INLINE ulong
@@ -382,7 +407,7 @@
}
/* assume nx >= ny > 0 */
-GEN
+static GEN
Flx_mulspec_basecase(GEN x, GEN y, ulong p, long nx, long ny)
{
long i,lz,nz;
@@ -405,6 +430,13 @@
z -= 2; return Flx_renormalize(z, lz);
}
+static GEN
+Flx_mulspec_mulii(GEN a, GEN b, ulong p, long na, long nb)
+{
+ GEN z=muliispec(a,b,na,nb);
+ return Flx_red_lg_i(z,lgefint(z),p);
+}
+
/* fast product (Karatsuba) of polynomials a,b. These are not real GENs, a+2,
* b+2 were sent instead. na, nb = number of terms of a, b.
* Only c, c0, c1, c2 are genuine GEN.
@@ -422,6 +454,8 @@
if (!nb) return Flx_zero(0);
av = avma;
+ if (maxlenghtcoeffpol(p,nb)==1)
+ return Flx_shiftip(av,Flx_mulspec_mulii(a,b,p,na,nb), v);
if (nb < Flx_MUL_LIMIT)
return Flx_shiftip(av,Flx_mulspec_basecase(a,b,p,na,nb), v);
i=(na>>1); n0=na-i; na=i;
@@ -464,7 +498,7 @@
z[1] = x[1]; return z;
}
-GEN
+static GEN
Flx_sqrspec_basecase(GEN x, ulong p, long nx)
{
long i, lz, nz;
@@ -520,6 +554,13 @@
z[1] = x[1]; return z;
}
+static GEN
+Flx_sqrspec_sqri(GEN a, ulong p, long na)
+{
+ GEN z=sqrispec(a,na);
+ return Flx_red_lg_i(z,lgefint(z),p);
+}
+
GEN
Flx_sqrspec(GEN a, ulong p, long na)
{
@@ -529,6 +570,8 @@
while (na && !a[0]) { a++; na--; v += 2; }
av = avma;
+ if (maxlenghtcoeffpol(p,na)==1)
+ return Flx_shiftip(av, Flx_sqrspec_sqri(a,p,na), v);
if (na < Flx_SQR_LIMIT)
return Flx_shiftip(av, Flx_sqrspec_basecase(a,p,na), v);
i=(na>>1); n0=na-i; na=i;
Index: src/kernel/gmp/mp.c
===================================================================
RCS file: /home/cvs/pari/src/kernel/gmp/mp.c,v
retrieving revision 1.31
diff -u -r1.31 mp.c
--- src/kernel/gmp/mp.c 14 Oct 2003 08:57:35 -0000 1.31
+++ src/kernel/gmp/mp.c 11 Jan 2004 17:35:16 -0000
@@ -791,8 +791,7 @@
z[1] = evalsigne(s) | evalexpo(m+e); return z;
}
-INLINE GEN
-muliispec(GEN x, GEN y, long nx, long ny);
+GEN muliispec(GEN x, GEN y, long nx, long ny);
/* We must have nx>=ny. This lets garbage on the stack.
This handle squares correctly (mpn_mul is optimized
@@ -800,7 +799,7 @@
*/
INLINE GEN
-quickmulii(GEN x, GEN y, long nx, long ny)
+muliispec_mirror(GEN x, GEN y, long nx, long ny)
{
GEN cx=new_chunk(nx),cy;
GEN z;
@@ -826,11 +825,11 @@
long i, l, lz2 = (lz+2)>>1, lz3 = lz-lz2;
GEN lo1, lo2, hi;
- hi = quickmulii(x,y, lz2,lz2);
+ hi = muliispec_mirror(x,y, lz2,lz2);
i = lz2; while (i<lz && !x[i]) i++;
- lo1 = quickmulii(y,x+i, lz2,lz-i);
+ lo1 = muliispec_mirror(y,x+i, lz2,lz-i);
i = lz2; while (i<ly && !y[i]) i++;
- lo2 = quickmulii(x,y+i, lz2,ly-i);
+ lo2 = muliispec_mirror(x,y+i, lz2,ly-i);
if (signe(lo1))
{
if (ly!=lz) { lo2 = addshiftw(lo1,lo2,1); lz3++; }
@@ -861,7 +860,7 @@
#ifdef KARAMULR_VARIANT
GEN hi = karamulrr1(y+2, x+2, lz+flag-2, lz-2);
#else
- GEN hi = quickmulii(y+2, x+2, lz+flag-2, lz-2);
+ GEN hi = muliispec_mirror(y+2, x+2, lz+flag-2, lz-2);
#endif
long i, garde = hi[lz];
if (hi[2] < 0)
@@ -1753,7 +1752,7 @@
/********************************************************************/
/* nx >= ny = num. of digits of x, y (not GEN, see mulii) */
-INLINE GEN
+GEN
muliispec(GEN x, GEN y, long nx, long ny)
{
GEN zd;
@@ -1775,7 +1774,7 @@
return zd;
}
-INLINE GEN
+GEN
sqrispec(GEN x, long nx)
{
GEN zd;
Index: src/kernel/none/mp.c
===================================================================
RCS file: /home/cvs/pari/src/kernel/none/mp.c,v
retrieving revision 1.121
diff -u -r1.121 mp.c
--- src/kernel/none/mp.c 10 Jan 2004 16:33:02 -0000 1.121
+++ src/kernel/none/mp.c 11 Jan 2004 17:35:17 -0000
@@ -720,7 +720,7 @@
z[1] = evalsigne(s) | evalexpo(m+e); return z;
}
-static GEN muliispec(GEN a, GEN b, long na, long nb);
+GEN muliispec(GEN a, GEN b, long na, long nb);
/*#define KARAMULR_VARIANT*/
#ifdef KARAMULR_VARIANT
@@ -1974,7 +1974,7 @@
/* Fast product (Karatsuba) of integers. a and b are "special" GENs
* c,c0,c1,c2 are genuine GENs.
*/
-static GEN
+GEN
muliispec(GEN a, GEN b, long na, long nb)
{
GEN a0,c,c0;
@@ -2100,7 +2100,7 @@
return z;
}
-static GEN
+GEN
sqrispec(GEN a, long na)
{
GEN a0,c;