Peter Bruin on Tue, 10 Feb 2015 00:00:07 +0100


[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]

Re: FlxqM_mul_Kronecker


Hello Bill,

> On Sat, Feb 07, 2015 at 01:22:47PM +0100, Peter Bruin wrote:
>> Bonjour,
>> 
>> Here is a patch to add a new (private) function FlxqM_mul_Kronecker that
>> I have been working on since the Atelier PARI/GP in January.  It uses
>> Kronecker substitution to multiply matrices over non-prime finite fields
>> of small characteristics.  Over the range of matrices I tested it on, it
>> is on average 60%-80% faster than the existing FlxqM_mul; for some
>> examples, the speedup is up to 4-5 times.
>
> I read your patch.

Thanks!  More changes are in the attached patch.  (I guess it is not
possible for me to directly update the peter-FlxqM_mul_Kronecker
branch?)

> I think the function FlxM_to_ZM and ZM_to_FlxqM should be renamed
> to something more awkward, maybe FlxM_pack_ZM/ZM_pack_FlxqM

They are now called kron_pack_FlxM and kron_unpack_FlxM; how about that?

> The function zx_to_int* should also be renamed. zx denotes polynomials
> with small signed coefficient. Maybe we should add pack/unpack in the
> name since the functions are only wrapper for use as (*pack) or
> (*unpack).

These have been renamed to kron_pack_Flx_spec* and kron_unpack_Flx*.

> In FlxqM_mul_Kronecker, maybe a switch(l-2) would be better.

OK, done.

Although the new int_to_Flx was renamed to kron_unpack_Flx, I left the
renaming of the old int_to_Flx to int_LSWfirst_to_Flx in place because
it is both more awkward and more descriptive.

Finally, I discovered that kron_unpack_FlxqM had an extraneous
evalvarn(); this is also fixed by the attached patch.

Thanks,

Peter


diff --git a/src/basemath/Flx.c b/src/basemath/Flx.c
index c12d467..5dca5e7 100644
--- a/src/basemath/Flx.c
+++ b/src/basemath/Flx.c
@@ -644,28 +644,6 @@ Flx_mulspec_basecase(GEN x, GEN y, ulong p, long nx, long ny)
   z -= 2; return Flx_renormalize(z, lz);
 }
 
-static GEN
-Flx_to_int_spec(GEN x, long l) {
-  long i;
-  GEN w, y;
-  if (l == 0)
-    return gen_0;
-  y = cgetipos(l + 2);
-  for (i = 0, w = int_LSW(y); i < l; i++, w = int_nextW(w))
-    *w = x[i];
-  return y;
-}
-
-static GEN
-int_to_Flx(GEN z, ulong p)
-{
-  long i, l = lgefint(z);
-  GEN x = cgetg(l, t_VECSMALL), w;
-  for (w = int_LSW(z), i = 2; i < l; w = int_nextW(w), i++)
-    x[i] = ((ulong) *w) % p;
-  return Flx_renormalize(x, l);
-}
-
 /* z has least significant word first */
 static GEN
 int_LSWfirst_to_Flx(GEN z, ulong p)
@@ -3994,36 +3972,58 @@ FlxqV_roots_to_pol(GEN V, GEN T, ulong p, long v)
 /*** FlxqM ***/
 
 static GEN
-zx_to_int_halfspec(GEN x, long l) {
+kron_pack_Flx_spec_half(GEN x, long l) {
   if (l == 0)
     return gen_0;
   return Flx_to_int_halfspec(x, l);
 }
 
 static GEN
-zx_to_int_spec_2(GEN x, long l) {
+kron_pack_Flx_spec(GEN x, long l) {
+  long i;
+  GEN w, y;
+  if (l == 0)
+    return gen_0;
+  y = cgetipos(l + 2);
+  for (i = 0, w = int_LSW(y); i < l; i++, w = int_nextW(w))
+    *w = x[i];
+  return y;
+}
+
+static GEN
+kron_pack_Flx_spec_2(GEN x, long l) {
   return Flx_eval2BILspec(x, 2, l);
 }
 
 static GEN
-zx_to_int_spec_3(GEN x, long l) {
+kron_pack_Flx_spec_3(GEN x, long l) {
   return Flx_eval2BILspec(x, 3, l);
 }
 
 static GEN
-int_to_Flx_2(GEN x, ulong p) {
+kron_unpack_Flx(GEN z, ulong p)
+{
+  long i, l = lgefint(z);
+  GEN x = cgetg(l, t_VECSMALL), w;
+  for (w = int_LSW(z), i = 2; i < l; w = int_nextW(w), i++)
+    x[i] = ((ulong) *w) % p;
+  return Flx_renormalize(x, l);
+}
+
+static GEN
+kron_unpack_Flx_2(GEN x, ulong p) {
   long d = (lgefint(x)-1)/2 - 1;
   return Z_mod2BIL_Flx_2(x, d, p);
 }
 
 static GEN
-int_to_Flx_3(GEN x, ulong p) {
+kron_unpack_Flx_3(GEN x, ulong p) {
   long d = lgefint(x)/3 - 1;
   return Z_mod2BIL_Flx_3(x, d, p);
 }
 
 static GEN
-FlxM_to_ZM(GEN M, GEN (*pack)(GEN, long)) {
+kron_pack_FlxM(GEN M, GEN (*pack)(GEN, long)) {
   long i, j, l, lc;
   GEN N = cgetg_copy(M, &l), x;
   if (l == 1)
@@ -4040,9 +4040,9 @@ FlxM_to_ZM(GEN M, GEN (*pack)(GEN, long)) {
 }
 
 static GEN
-ZM_to_FlxqM(GEN M, GEN T, ulong p, GEN (*unpack)(GEN, ulong))
+kron_unpack_FlxqM(GEN M, GEN T, ulong p, GEN (*unpack)(GEN, ulong))
 {
-  long i, j, l, lc, sv = evalvarn(get_Flx_var(T));
+  long i, j, l, lc, sv = get_Flx_var(T);
   GEN N = cgetg_copy(M, &l), x;
   if (l == 1)
     return N;
@@ -4073,28 +4073,32 @@ FlxqM_mul_Kronecker(GEN A, GEN B, GEN T, ulong p) {
   l = lgefint(z);
   avma = av;
 
-  if (l == 3) {
+  switch (l - 2) {
+  case 1:
     if (HIGHWORD(z[2]) == 0) {
-      pack = zx_to_int_halfspec;
+      pack = kron_pack_Flx_spec_half;
       unpack = int_to_Flx_half;
     } else {
-      pack = Flx_to_int_spec;
-      unpack = int_to_Flx;
+      pack = kron_pack_Flx_spec;
+      unpack = kron_unpack_Flx;
     }
-  } else if (l == 4) {
-    pack = zx_to_int_spec_2;
-    unpack = int_to_Flx_2;
-  } else if (l == 5) {
-    pack = zx_to_int_spec_3;
-    unpack = int_to_Flx_3;
-  } else {
+    break;
+  case 2:
+    pack = kron_pack_Flx_spec_2;
+    unpack = kron_unpack_Flx_2;
+    break;
+  case 3:
+    pack = kron_pack_Flx_spec_3;
+    unpack = kron_unpack_Flx_3;
+    break;
+  default:
     /* not implemented, probably won't occur in practice */
     return NULL;
   }
-  A = FlxM_to_ZM(A, pack);
-  B = FlxM_to_ZM(B, pack);
+  A = kron_pack_FlxM(A, pack);
+  B = kron_pack_FlxM(B, pack);
   C = ZM_mul(A, B);
-  D = ZM_to_FlxqM(C, T, p, unpack);
+  D = kron_unpack_FlxqM(C, T, p, unpack);
   return gerepilecopy(av, D);
 }