Bill Allombert on Tue, 20 Jan 2004 00:55:33 +0100 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
patch for Montgomery-style reduction in Flxq_pow |
Hello Pari-dev, Here a new patch that implement Montgomery-style reduction for Flxq_pow. This patch ned more tuning, and the inversion function would deserve to not be quadratic. Here the benchmark used: --- install(Flx_invmontgomery,GL); install(Flxq_pow,GGGL); install(FpXQ_pow,GGGG); install(Flx_redmontgomery,GGGL); install(Flx_rem,GGL); install(ZX_Flx,GL); montgo1(P,p)=Flx_ZX(Flx_invmontgomery(ZX_Flx(P,p),p)) montgo2(P,p)=variable(P)/polrecip(P*Mod(1,p))+O(x^poldegree(P)) montgo(P,p)=montgo1(P,p)==montgo2(P,p) { print("i\t:\ttmg\t:\tmon\t:\trem"); for(k=1,15,e=2^k; P=Pol(vector(e,i,random(17))); Pv=ZX_Flx(P,17); gettime(); mg=Flx_invmontgomery(Pv,17); tmg=gettime(); Xv=ZX_Flx(x^(2*e-2),17); gettime(); for(i=1,2^(16-k),Flx_rem(Xv,Pv,17)); trem=gettime(); for(i=1,2^(16-k),Flx_redmontgomery(Xv,mg,Pv,17)); tmon=gettime(); print(k,"\t:\t",tmg,"\t:\t",tmon,"\t:\t",trem); ) } ----- i : tmg : mon : rem 1 : 0 : 50 : 40 2 : 0 : 30 : 30 3 : 0 : 20 : 20 4 : 0 : 20 : 10 5 : 0 : 30 : 20 6 : 0 : 30 : 30 7 : 0 : 50 : 50 8 : 0 : 70 : 90 9 : 0 : 100 : 170 10 : 20 : 150 : 340 11 : 80 : 220 : 670 12 : 310 : 330 : 1320 13 : 1220 : 510 : 2650 14 : 4920 : 760 : 5520 15 : 19750 : 1170 : 11430 (this is with the PARI kernel, GMP would be faster) Cheers, Bill. Index: src/basemath/Flx.c =================================================================== RCS file: /home/cvs/pari/src/basemath/Flx.c,v retrieving revision 1.12 diff -u -r1.12 Flx.c --- src/basemath/Flx.c 17 Jan 2004 18:47:50 -0000 1.12 +++ src/basemath/Flx.c 19 Jan 2004 23:12:08 -0000 @@ -233,6 +233,18 @@ return Flx_renormalize(x,l); } +/*UNCLEAN modify y[2] (FpX_Fp_add do the same)*/ +GEN +Flx_Fl_add(GEN y,ulong x,ulong p) +{ + long v=varn(y); + if (lg(y)==2) + return Fl_Flx(x,v); + y[2] = adduumod((ulong)y[2],x,p); + if (y[2]==0 && degpol(y) == 0) return Flx_zero(v); + return y; +} + GEN Flx_addspec(GEN x, GEN y, ulong p, long lx, long ly) { @@ -261,6 +273,27 @@ } GEN +Flx_subspec(GEN x, GEN y, ulong p, long lx, long ly) +{ + long i,lz; + GEN z; + + if (ly <= lx) + { + lz = lx+2; z = cgetg(lz, t_VECSMALL)+2; + for (i=0; i<ly; i++) z[i] = (long)subuumod((ulong)x[i],(ulong)y[i],p); + for ( ; i<lx; i++) z[i] = x[i]; + } + else + { + lz = ly+2; z = cgetg(lz, t_VECSMALL)+2; + for (i=0; i<lx; i++) z[i] = (long)subuumod((ulong)x[i],(ulong)y[i],p); + for ( ; i<ly; i++) z[i] = y[i]? (long)(p - y[i]): y[i]; + } + return Flx_renormalize(z-2, lz); +} + +GEN Flx_sub(GEN x, GEN y, ulong p) { long i,lz,lx = lg(x), ly = lg(y); @@ -282,12 +315,21 @@ } GEN +Flx_negspec(GEN x, ulong p, long l) +{ + long i; + GEN z = cgetg(l+2, t_VECSMALL) + 2; + for (i=0; i<l; i++) + z[i] = x[i]? (long)p - x[i]: 0; + return z-2; +} + + +GEN Flx_neg(GEN x, ulong p) { - long i, l = lg(x); - GEN z = cgetg(l, t_VECSMALL); + GEN z = Flx_negspec(x+2, p, lgpol(x)); z[1] = x[1]; - for (i=2; i<l; i++) z[i] = x[i]? (long)p - x[i]: 0; return z; } @@ -426,7 +468,7 @@ for ( ; i<nx; i++) z[i] = (long)Flx_mullimb(x+i,y,p,0,ny); for ( ; i<nz; i++) z[i] = (long)Flx_mullimb(x+i,y,p,i-nx+1,ny); } - z -= 2; return Flx_renormalize(z, lz); + z -= 2; return z; } INLINE GEN @@ -541,7 +583,7 @@ z[i] = (long)p1; } } - z -= 2; return Flx_renormalize(z,lz); + z -= 2; return z; } GEN @@ -780,6 +822,80 @@ } GEN +Flx_recipspec(GEN x, long l, long n) +{ + long i; + GEN z=cgetg(n+2,t_VECSMALL)+2; + for(i=0; i<l; i++) + z[n-i-1] = x[i]; + for( ; i<n; i++) + z[n-i-1] = 0; + return Flx_renormalize(z-2,n+2); +} + +GEN +Flx_recip(GEN x) +{ + GEN z=Flx_recipspec(x+2,lgpol(x),lgpol(x)); + z[1]=x[1]; + return z; +} + +/* + * x/polrecip(P)+O(x^n) + */ +GEN Flx_invmontgomery(GEN T, ulong p) +{ + pari_sp ltop=avma; + long i, l=lg(T), k; + long vt=T[1]; + GEN r; + ulong c=T[l-1], ci=1; + if (l<5) return Flx_zero(vt); + if (c!=1) + { + ci=invumod(c,p); + T=Flx_Fl_mul(T, ci, p); + } + r=cgetg(l-1,t_VECSMALL); r[1]=vt; + r[2]=0; r[3]=1; + for (i=4;i<l-1;i++) + { + r[i] = 0; + for (k=3;k<i;k++) + r[i]= (long) subuumod((ulong)r[i],muluumod((ulong)T[l-i+k-1],(ulong)r[k],p),p); + } + r = Flx_renormalize(r,l-1); + if (c!=1) r=Flx_Fl_mul(r,ci,p); + return gerepileupto(ltop, r); +} + +/* Compute x mod T where lg(x)<=2*lg(T)-2 + * and mg is the Montgomery inverse of T. + */ +GEN Flx_redmontgomery(GEN x, GEN mg, GEN T, ulong p) +{ + pari_sp ltop=avma; + GEN z; + long l=lgpol(x); + long lt=degpol(T); /*We discard the leading term*/ + long lead=lt-1; + long ld=l-lt+1; + long lm=min(ld,lgpol(mg)); + if (l<=lt) + return Flx_copy(x); + new_chunk(lt); + z=Flx_recipspec(x+2+lead,ld,ld); /* z = rec(x)*/ + z=Flx_mulspec(z+2,mg+2,p,min(ld,lgpol(z)),lm);/* z = rec(x) * mg */ + z=Flx_recipspec(z+2,lgpol(z),ld); /* z = rec (rec(x) * mg) */ + z=Flx_mulspec(z+2,T+2,p,min(ld,lgpol(z)),lt); /* z *= pol */ + avma=ltop; + z=Flx_subspec(x+2,z+2,p,lt,min(lt,lgpol(z))); /* z = x - z */ + z[1]=T[1]; + return z; +} + +GEN Flx_deriv(GEN z, ulong p) { long i,l = lg(z)-1; @@ -1098,6 +1214,7 @@ typedef struct { GEN pol; + GEN mg; ulong p; } Flxq_muldata; @@ -1106,13 +1223,13 @@ _u_sqr(void *data, GEN x) { Flxq_muldata *D = (Flxq_muldata*)data; - return Flxq_sqr(x, D->pol, D->p); + return Flx_redmontgomery(Flx_sqr(x,D->p),D->mg, D->pol, D->p); } static GEN _u_mul(void *data, GEN x, GEN y) { Flxq_muldata *D = (Flxq_muldata*)data; - return Flxq_mul(x,y, D->pol, D->p); + return Flx_redmontgomery(Flx_mul(x,y,D->p),D->mg, D->pol, D->p); } /* n-Power of x in Z/pZ[X]/(pol), as t_VECSMALL. */ @@ -1125,6 +1242,8 @@ GEN y; D.pol = pol; D.p = p; + D.mg = Flx_invmontgomery(pol,p); + x=Flx_rem(x, pol, p); y = leftright_pow(x, n, (void*)&D, &_u_sqr, &_u_mul); return gerepileupto(av, y); }