Peter Bruin on Wed, 28 Oct 2015 14:37:44 +0100 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
Re: Faster digits and fromdigits in base 2^k, and FlxqM_mul_Kronecker |
Hi Bill, > as I understand, x[i] is treated as a ulong, and not as a long. The > meaning of "here $k$ may be any positive \kbd{long}." should be > clarified to be more explicit when k is equal or larger than > BITS_IN_LONG > > But more importantly, the fromdigits_2k_zv should be called > zv_fromdigits_2k, because the input is a zv and not the output. > > And in fact, more accurately, it should be nv_fromdigits_2k because > the entries are treated as unsigned. (the original functions > binary_2k_zv and binary_zv have the same problem) Here are updated patches (the first for the fromdigits_2k and binary_2k functions, the second to use them in FlxqM_mul_Kronecker). Apart from simply renaming fromdigits_2k_zv to nv_fromdigits_2k, I clarified the documentation as follows: --- a/doc/usersch5.tex +++ b/doc/usersch5.tex @@ -2373,9 +2373,11 @@ where each $x[i]$ is a \typ{INT} with $0\leq x[i] < 2^k$, return the integer $\sum_{i = 1}^l x[i] 2^{k(l-i)}$, as a \typ{INT}. -\fun{GEN}{fromdigits_2k_zv}{GEN x, long k} as \tet{fromdigits_2k}, but -with $x$ being a \typ{VECSMALL} and each $x[i]$ being a \kbd{long} -with $0\leq x[i] < 2^k$; here $k$ may be any positive \kbd{long}. +\fun{GEN}{nv_fromdigits_2k}{GEN x, long k} as \tet{fromdigits_2k}, but +with $x$ being a \typ{VECSMALL} and each $x[i]$ being a \kbd{ulong} +with $0\leq x[i] < 2^{\min\{k,\tet{BITS_IN_LONG}\}}$. Here $k$ may be +any positive \kbd{long}, and the $x[i]$ are regarded as $k$-bit +integers by truncating or extending with zeroes. \subsec{Integer valuation} For integers $x$ and $p$, such that $x\neq 0$ and $|p| > 1$, we define I hope this addresses your point. Thanks, Peter
diff --git a/doc/usersch5.tex b/doc/usersch5.tex index 78c4497..069a1a4 100644 --- a/doc/usersch5.tex +++ b/doc/usersch5.tex @@ -2347,7 +2347,7 @@ shifts the mantissa $$f, s[m], s[m+1],\ldots s[M]$$ right by $n$ bits. -\subsec{From \typ{INT} to bits} +\subsec{From \typ{INT} to bits or digits in base $2^k$ and back} \fun{GEN}{binary_zv}{GEN x} given a \typ{INT} $x$, return a \typ{VECSMALL} of bits, from most significant to least significant. @@ -2368,6 +2368,17 @@ proper \kbd{GEN}), return the integer $\sum_{i = 1}^l x[i] 2^{l-i}$, as a \fun{ulong}{bits_to_u}{GEN v, long l} same as \tet{bits_to_int}, where $l < \tet{BITS_IN_LONG}$, so we can return an \kbd{ulong}. +\fun{GEN}{fromdigits_2k}{GEN x, long k} converse of \tet{binary_2k}; +given a \typ{VEC} $x$ of length $l$ and a positive \kbd{long} $k$, +where each $x[i]$ is a \typ{INT} with $0\leq x[i] < 2^k$, return the +integer $\sum_{i = 1}^l x[i] 2^{k(l-i)}$, as a \typ{INT}. + +\fun{GEN}{nv_fromdigits_2k}{GEN x, long k} as \tet{fromdigits_2k}, but +with $x$ being a \typ{VECSMALL} and each $x[i]$ being a \kbd{ulong} +with $0\leq x[i] < 2^{\min\{k,\tet{BITS_IN_LONG}\}}$. Here $k$ may be +any positive \kbd{long}, and the $x[i]$ are regarded as $k$-bit +integers by truncating or extending with zeroes. + \subsec{Integer valuation} For integers $x$ and $p$, such that $x\neq 0$ and $|p| > 1$, we define $v_p(x)$ to be the largest integer exponent $e$ such that $p^e$ divides $x$. diff --git a/src/basemath/arith2.c b/src/basemath/arith2.c index 01726a0..1935132 100644 --- a/src/basemath/arith2.c +++ b/src/basemath/arith2.c @@ -1501,6 +1501,8 @@ fromdigits(GEN x, GEN B) if (typ(x)!=t_VEC || !RgV_is_ZV(x)) pari_err_TYPE("fromdigits",x); B = check_basis(B); if (lg(x)==1) { avma = av; return gen_0; } + if (Z_ispow2(B)) + return fromdigits_2k(x, expi(B)); x = vecreverse(x); return gerepileupto(av, gen_fromdigits(x, B, NULL, &Z_ring)); } diff --git a/src/basemath/bit.c b/src/basemath/bit.c index 6e9e253..183472b 100644 --- a/src/basemath/bit.c +++ b/src/basemath/bit.c @@ -81,6 +81,117 @@ bits_to_u(GEN v, long l) return u; } +/* set BITS_IN_LONG bits starting at word *w plus *r bits, + * clearing subsequent bits in the last word touched */ +INLINE void +int_set_ulong(ulong a, GEN *w, long *r) +{ + if (*r) { + **w |= (a << *r); + *w = int_nextW(*w); + **w = (a >> (BITS_IN_LONG - *r)); + } else { + **w = a; + *w = int_nextW(*w); + } +} + +/* set k bits starting at word *w plus *r bits, + * clearing subsequent bits in the last word touched */ +INLINE void +int_set_bits(ulong a, long k, GEN *w, long *r) +{ + if (*r) { + **w |= a << *r; + a >>= BITS_IN_LONG - *r; + } else { + **w = a; + a = 0; + } + *r += k; + if (*r >= BITS_IN_LONG) { + *w = int_nextW(*w); + *r -= BITS_IN_LONG; + for (; *r >= BITS_IN_LONG; *r -= BITS_IN_LONG) { + **w = a; + a = 0; + *w = int_nextW(*w); + } + if (*r) + **w = a; + } +} + +/* set k bits from z (t_INT) starting at word *w plus *r bits, + * clearing subsequent bits in the last word touched */ +INLINE void +int_set_int(GEN z, long k, GEN *w, long *r) +{ + long l = lgefint(z) - 2; + GEN y; + if (!l) { + int_set_bits(0, k, w, r); + return; + } + y = int_LSW(z); + for (; l > 1; l--) { + int_set_ulong((ulong) *y, w, r); + y = int_nextW(y); + k -= BITS_IN_LONG; + } + if (k) + int_set_bits((ulong) *y, k, w, r); +} + +GEN +nv_fromdigits_2k(GEN x, long k) +{ + long l = lg(x) - 1, r; + GEN w, z; + if (!l) return gen_0; + z = cgetipos(nbits2lg(k * l)); + w = int_LSW(z); + r = 0; + for (; l; l--) + int_set_bits(uel(x, l), k, &w, &r); + return int_normalize(z, 0); +} + +GEN +fromdigits_2k(GEN x, long k) +{ + long l, m; + GEN w, y, z; + for (l = lg(x) - 1; l && !signe(gel(x, 1)); x++, l--); + if (k == 1) return bits_to_int(x, l); + if (!l) return gen_0; + m = expi(gel(x, 1)) + 1; + z = cgetipos(nbits2lg(k * (l - 1) + m)); + w = int_LSW(z); + if (!(k & (BITS_IN_LONG - 1))) { + long i, j, t = k >> TWOPOTBITS_IN_LONG; + for (; l; l--) { + j = lgefint(gel(x, l)) - 2; + y = int_LSW(gel(x, l)); + for (i = 0; i < j; i++) { + *w = *y; + y = int_nextW(y); + w = int_nextW(w); + } + for (; i < t; i++) { + *w = 0; + w = int_nextW(w); + } + } + } else { + long r = 0; + for (; l > 1; l--) + int_set_int(gel(x, l), k, &w, &r); + int_set_int(gel(x, 1), m, &w, &r); + } + return int_normalize(z, 0); +} + GEN binaire(GEN x) { @@ -139,41 +250,101 @@ binaire(GEN x) return y; } +/* extract k bits (as ulong) starting at word *w plus *r bits */ +INLINE ulong +int_get_bits(long k, GEN *w, long *r) +{ + ulong mask = (1UL << k) - 1; + ulong a = (((ulong) **w) >> *r) & mask; + *r += k; + if (*r >= BITS_IN_LONG) { + *r -= BITS_IN_LONG; + *w = int_nextW(*w); + if (*r) + a |= (**w << (k - *r)) & mask; + } + return a; +} + +/* extract BITS_IN_LONG bits starting at word *w plus *r bits */ +INLINE ulong +int_get_ulong(GEN *w, long *r) +{ + ulong a = ((ulong) **w) >> *r; + *w = int_nextW(*w); + if (*r) + a |= (**w << (BITS_IN_LONG - *r)); + return a; +} + +/* extract k bits (as t_INT) starting at word *w plus *r bits */ +INLINE GEN +int_get_int(long k, GEN *w, long *r) +{ + GEN z = cgetipos(nbits2lg(k)); + GEN y = int_LSW(z); + for (; k >= BITS_IN_LONG; k -= BITS_IN_LONG) { + *y = int_get_ulong(w, r); + y = int_nextW(y); + } + if (k) + *y = int_get_bits(k, w, r); + return int_normalize(z, 0); +} + /* assume k < BITS_IN_LONG */ GEN binary_2k_zv(GEN x, long k) { - long iv, j, n, nmodk, nk; - GEN v, vk; + long l, n, r; + GEN v, w; if (k == 1) return binary_zv(x); - if (!signe(x)) return cgetg(1,t_VECSMALL); - v = binary_zv(x); - n = lg(v)-1; - nk = n / k; nmodk = n % k; - if (nmodk) nk++; - vk = cgetg(nk+1, t_VECSMALL); - iv = n - k; - if (!nmodk) nmodk = k; - for (j = nk; j >= 2; j--,iv-=k) vk[j] = bits_to_u(v+iv, k); - vk[1] = bits_to_u(v,nmodk); - return vk; + if (!signe(x)) return cgetg(1, t_VECSMALL); + n = expi(x) + 1; + l = (n + k - 1) / k; + v = cgetg(l + 1, t_VECSMALL); + w = int_LSW(x); + r = 0; + for (; l > 1; l--) { + uel(v, l) = int_get_bits(k, &w, &r); + n -= k; + } + uel(v, 1) = int_get_bits(n, &w, &r); + return v; } + GEN binary_2k(GEN x, long k) { - long iv, j, n, nmodk, nk; - GEN v, vk; - if (!signe(x)) return cgetg(1,t_VEC); - v = binary_zv(x); - n = lg(v)-1; - nk = n / k; nmodk = n % k; - if (nmodk) nk++; - vk = cgetg(nk+1, t_VEC); - iv = n - k; - if (!nmodk) nmodk = k; - for (j = nk; j >= 2; j--,iv-=k) gel(vk,j) = bits_to_int(v+iv, k); - gel(vk,1) = bits_to_int(v, nmodk); - return vk; + long l, n; + GEN v, w, y, z; + if (k == 1) return binaire(x); + if (!signe(x)) return cgetg(1, t_VEC); + n = expi(x) + 1; + l = (n + k - 1) / k; + v = cgetg(l + 1, t_VEC); + w = int_LSW(x); + if (!(k & (BITS_IN_LONG - 1))) { + long m, t = k >> TWOPOTBITS_IN_LONG, u = lgefint(x) - 2; + for (; l; l--) { + m = minss(t, u); + z = cgetipos(m + 2); + y = int_LSW(z); + for (; m; m--) { + *y = *w; + y = int_nextW(y); + w = int_nextW(w); + } + gel(v, l) = int_normalize(z, 0); + u -= t; + } + } else { + long r = 0; + for (; l > 1; l--, n -= k) + gel(v, l) = int_get_int(k, &w, &r); + gel(v, 1) = int_get_int(n, &w, &r); + } + return v; } /* return 1 if bit n of x is set, 0 otherwise */ diff --git a/src/headers/paridecl.h b/src/headers/paridecl.h index d4a4743..514745b 100644 --- a/src/headers/paridecl.h +++ b/src/headers/paridecl.h @@ -2100,6 +2100,8 @@ GEN binary_zv(GEN x); GEN binary_2k(GEN x, long k); GEN binary_2k_zv(GEN x, long k); long bittest(GEN x, long n); +GEN fromdigits_2k(GEN x, long k); +GEN nv_fromdigits_2k(GEN x, long k); GEN gbitand(GEN x, GEN y); GEN gbitneg(GEN x, long n); GEN gbitnegimply(GEN x, GEN y);
diff --git a/src/basemath/Flx.c b/src/basemath/Flx.c index 2fb5d56..d5eb2be 100644 --- a/src/basemath/Flx.c +++ b/src/basemath/Flx.c @@ -4229,6 +4229,18 @@ kron_pack_Flx_spec_3(GEN x, long l) { } static GEN +kron_pack_Flx_spec_bits(GEN x, long b, long l) { + GEN y; + long i; + if (l == 0) + return gen_0; + y = cgetg(l + 1, t_VECSMALL); + for(i = 1; i <= l; i++) + y[i] = x[l - i]; + return nv_fromdigits_2k(y, b); +} + +static GEN kron_unpack_Flx(GEN z, ulong p) { long i, l = lgefint(z); @@ -4250,6 +4262,37 @@ kron_unpack_Flx_3(GEN x, ulong p) { return Z_mod2BIL_Flx_3(x, d, p); } +/* assume b < BITS_IN_LONG */ +static GEN +kron_unpack_Flx_bits_narrow(GEN z, long b, ulong p) { + GEN v = binary_2k_zv(z, b), x; + long i, l = lg(v) + 1; + x = cgetg(l, t_VECSMALL); + for (i = 2; i < l; i++) + x[i] = v[l - i] % p; + return Flx_renormalize(x, l); +} + +static GEN +kron_unpack_Flx_bits_wide(GEN z, long b, ulong p, ulong pi) { + GEN v = binary_2k(z, b), x, y; + long i, l = lg(v) + 1, ly; + x = cgetg(l, t_VECSMALL); + for (i = 2; i < l; i++) { + y = gel(v, l - i); + ly = lgefint(y); + switch (ly) { + case 2: x[i] = 0; break; + case 3: x[i] = *int_W_lg(y, 0, ly) % p; break; + case 4: x[i] = remll_pre(*int_W_lg(y, 1, ly), *int_W_lg(y, 0, ly), p, pi); break; + case 5: x[i] = remlll_pre(*int_W_lg(y, 2, ly), *int_W_lg(y, 1, ly), + *int_W_lg(y, 0, ly), p, pi); break; + default: x[i] = umodiu(gel(v, l - i), p); + } + } + return Flx_renormalize(x, l); +} + static GEN FlxM_pack_ZM(GEN M, GEN (*pack)(GEN, long)) { long i, j, l, lc; @@ -4268,6 +4311,24 @@ FlxM_pack_ZM(GEN M, GEN (*pack)(GEN, long)) { } static GEN +FlxM_pack_ZM_bits(GEN M, long b) +{ + long i, j, l, lc; + GEN N = cgetg_copy(M, &l), x; + if (l == 1) + return N; + lc = lgcols(M); + for (j = 1; j < l; j++) { + gel(N, j) = cgetg(lc, t_COL); + for (i = 1; i < lc; i++) { + x = gcoeff(M, i, j); + gcoeff(N, i, j) = kron_pack_Flx_spec_bits(x + 2, b, lgpol(x)); + } + } + return N; +} + +static GEN ZM_unpack_FlxqM(GEN M, GEN T, ulong p, GEN (*unpack)(GEN, ulong)) { long i, j, l, lc, sv = get_Flx_var(T); @@ -4286,44 +4347,82 @@ ZM_unpack_FlxqM(GEN M, GEN T, ulong p, GEN (*unpack)(GEN, ulong)) return N; } +static GEN +ZM_unpack_FlxqM_bits(GEN M, long b, GEN T, ulong p) +{ + long i, j, l, lc, sv = get_Flx_var(T); + GEN N = cgetg_copy(M, &l), x; + if (l == 1) + return N; + lc = lgcols(M); + if (b < BITS_IN_LONG) { + for (j = 1; j < l; j++) { + gel(N, j) = cgetg(lc, t_COL); + for (i = 1; i < lc; i++) { + x = kron_unpack_Flx_bits_narrow(gcoeff(M, i, j), b, p); + x[1] = sv; + gcoeff(N, i, j) = Flx_rem(x, T, p); + } + } + } else { + ulong pi = get_Fl_red(p); + for (j = 1; j < l; j++) { + gel(N, j) = cgetg(lc, t_COL); + for (i = 1; i < lc; i++) { + x = kron_unpack_Flx_bits_wide(gcoeff(M, i, j), b, p, pi); + x[1] = sv; + gcoeff(N, i, j) = Flx_rem(x, T, p); + } + } + } + return N; +} + GEN FlxqM_mul_Kronecker(GEN A, GEN B, GEN T, ulong p) { pari_sp av = avma; - long l, n = lg(A) - 1; + long b, d = degpol(T), n = lg(A) - 1; GEN C, D, z; GEN (*pack)(GEN, long), (*unpack)(GEN, ulong); int is_sqr = A==B; - /* - cf. Flx_mulspec(), maxlengthcoeffpol() - z can be 1, 2, 3 or (theoretically) 4 words long - */ - z = muliu(muliu(sqru(p - 1), degpol(T)), n); - l = lgefint(z); + z = muliu(muliu(sqru(p - 1), d), n); + b = expi(z) + 1; + /* only do expensive bit-packing if it saves at least 1 limb */ + if (b <= BITS_IN_HALFULONG) { + if (nbits2lg(d*b) - 2 == (d + 1)/2) + b = BITS_IN_HALFULONG; + } else { + long l = lgefint(z) - 2; + if (nbits2lg(d*b) - 2 == d*l) + b = l*BITS_IN_LONG; + } avma = av; - switch (l - 2) { - case 1: - if (HIGHWORD(z[2]) == 0) { - pack = kron_pack_Flx_spec_half; - unpack = int_to_Flx_half; - } else { - pack = kron_pack_Flx_spec; - unpack = kron_unpack_Flx; - } + switch (b) { + case BITS_IN_HALFULONG: + pack = kron_pack_Flx_spec_half; + unpack = int_to_Flx_half; break; - case 2: + case BITS_IN_LONG: + pack = kron_pack_Flx_spec; + unpack = kron_unpack_Flx; + break; + case 2*BITS_IN_LONG: pack = kron_pack_Flx_spec_2; unpack = kron_unpack_Flx_2; break; - case 3: + case 3*BITS_IN_LONG: pack = kron_pack_Flx_spec_3; unpack = kron_unpack_Flx_3; break; default: - /* not implemented, probably won't occur in practice */ - return NULL; + A = FlxM_pack_ZM_bits(A, b); + B = is_sqr? A: FlxM_pack_ZM_bits(B, b); + C = ZM_mul(A, B); + D = ZM_unpack_FlxqM_bits(C, b, T, p); + return gerepilecopy(av, D); } A = FlxM_pack_ZM(A, pack); B = is_sqr? A: FlxM_pack_ZM(B, pack); diff --git a/src/basemath/alglin1.c b/src/basemath/alglin1.c index 383f2eb..e3bab48 100644 --- a/src/basemath/alglin1.c +++ b/src/basemath/alglin1.c @@ -981,11 +981,8 @@ FlxqM_mul(GEN A, GEN B, GEN T, ulong p) { if (n == 0) return cgetg(1, t_MAT); - if (n > 1) { - GEN C = FlxqM_mul_Kronecker(A, B, T, p); - if (C != NULL) - return C; - } + if (n > 1) + return FlxqM_mul_Kronecker(A, B, T, p); ff = get_Flxq_field(&E, T, p); return gen_matmul(A, B, E, ff); }