Lorenz Minder on Wed, 20 May 2009 12:15:33 +0200


[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]

Re: Another problem with matrix inversion


Hi,

BA:
> On Fri, May 15, 2009 at 05:34:09AM +0200, Lorenz Minder wrote:
> > Hi,
> > 
> > I take it, the API for FpM_gauss() must not change, right?
> 
> Yes, it should stay as documented, though you can change FpM_gauss_sp
> as long as it does not affect FpM_gauss().

Ok, so the trouble is there is no such thing such as FpM_gauss_sp().
Everyting is directly in FpM_gauss() for the large modulus case.
Anyways, I wrote two patches, ZnM_funcs.patch (first) and
remap_Fp_funcs.patch (on top of that) that I hope do things in an
acceptable way.

Here is the description.  ZnM_funcs.patch implements ZnM_gauss(),
ZnM_inv(), Zlm_gauss() and Zlm_inv().  Those functions call ZnM_gauss_fl()
respectively Zlm_gauss_fl() (both static).

Zlm_gauss_fl() and ZnM_gauss_fl() are very similar to the FpM_gauss() and Flm_gauss_sp() functions, essentially a cut and paste of them.  They
differ in two ways.

First, they take an additional argument "mod_is_prime",
which, if set to true, makes them behave exactly like FpM_gauss() and
Flm_gauss_sp(), i.e., they will err out if they trip over any
non-invertible pivot they may encounter.

Second, every pivot is checked for invertibility, and if it happens
to find a non-invertible one, and mod_is_prime is false, then it will
call functions to do the splitting and recomposing business, which is
factored out to other (static) functions.

(The patch as is does a lot of copying that could be avoided, and I
have an older version that is better in this respect, but it's more
invasive, and I didn't measure serious problems with this one, so I
decided to take the smarter logic out, for simplicity.  But I can
dig it up if it is deemed more appropriate.)

As for performance, if the moduli are not too large, then this thing is
comparable in speed to doing the chinese remaindering by hand; on the
other hand, if the modulus is for example a product of two primes
that each just barely fit into a machine int, then it's a factor 2 slower.
To me, that's entirely acceptable.

The remap_Fp_funcs.patch removes the FpM_gauss() and Flm_gauss() code
and replaces it with calls to the corresponding Z??_*_fl(), with
mod_is_prime set to true, since the former are now duplicate code.

Best,
--Lorenz
-- 
Neu: GMX FreeDSL Komplettanschluss mit DSL 6.000 Flatrate + Telefonanschluss für nur 17,95 Euro/mtl.!* http://dslspecial.gmx.de/freedsl-aktionspreis/?ac=OM.AD.PD003K11308T4569a
diff --git a/src/basemath/alglin1.c b/src/basemath/alglin1.c
--- a/src/basemath/alglin1.c
+++ b/src/basemath/alglin1.c
@@ -19,6 +19,9 @@
 /**                          (first part)                          **/
 /**                                                                **/
 /********************************************************************/
+
+#include <assert.h>
+
 #include "pari.h"
 #include "paripriv.h"
 
@@ -581,13 +584,16 @@
   return u;
 }
 static GEN
-Fp_get_col(GEN a, GEN b, long li, GEN p)
+Fp_get_col(GEN a, GEN b, long li, long last_row, GEN p)
 {
   GEN u = cgetg(li+1,t_COL);
   long i, j;
 
-  gel(u,li) = Fp_mul(gel(b,li), gcoeff(a,li,li), p);
-  for (i=li-1; i>0; i--)
+  for (i=li; i>last_row; i--) {
+    pari_sp av = avma;
+    gel(u, i) = gerepileuptoint(av, modii(gel(b, i), p));
+  }
+  for (; i>0; i--)
   {
     pari_sp av = avma;
     GEN m = gel(b,i);
@@ -637,15 +643,18 @@
   }
   return u;
 }
+
 static uGEN
-Fl_get_col(GEN a, uGEN b, long li, ulong p)
+Fl_get_col(GEN a, uGEN b, long li, long last_row, ulong p)
 {
   uGEN u = (uGEN)cgetg(li+1,t_VECSMALL);
-  ulong m = b[li] % p;
+  ulong m;
   long i,j;
 
-  u[li] = Fl_mul(m, ucoeff(a,li,li), p);
-  for (i=li-1; i>0; i--)
+  for (i=li; i>last_row; i--) {
+    u[i] = b[i] % p;
+  }
+  for (; i>0; i--)
   {
     m = b[i]%p;
     for (j = i+1; j <= li; j++)
@@ -749,7 +758,7 @@
   if (!is_FpM(&a, &p)) return 0;
   if (!b)
   {
-    a = FpM_inv(a,p);
+    a = ZnM_inv(a,p);
     if (a) a = FpM_to_mod(a, p);
   }
   else
@@ -758,12 +767,12 @@
     {
       case t_COL:
         if (!is_FpC(&b, &p)) return 0;
-        a = FpM_gauss(a,b,p);
+        a = ZnM_gauss(a,b,p);
         if (a) a = FpC_to_mod(a, p);
         break;
       case t_MAT:
         if (!is_FpM(&b, &p)) return 0;
-        a = FpM_gauss(a,b,p);
+        a = ZnM_gauss(a,b,p);
         if (a) a = FpM_to_mod(a, p);
         break;
       default: return 0;
@@ -771,6 +780,7 @@
   }
   *u = a; return 1;
 }
+
 /* Gaussan Elimination. Compute a^(-1)*b
  * b is a matrix or column vector, NULL meaning: take the identity matrix
  * If a and b are empty, the result is the empty matrix.
@@ -976,10 +986,309 @@
   if (OK_ulong)
     for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col_OK(a,(uGEN)b[j], aco,p);
   else
-    for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col(a,(uGEN)b[j], aco,p);
+    for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col(a,(uGEN)b[j], aco, aco, p);
   return u;
 }
 
+/* Helper functions for Zlm_gauss_fl */
+
+static int
+ulong_cmp(const void *v1, const void *v2)
+{
+  const ulong l1 = *(const ulong *)v1;
+  const ulong l2 = *(const ulong *)v2;
+
+  if(l1 < l2) return -1;
+  if(l1 > l2) return 1;
+  return 0;
+}
+
+/*
+	find_coprime_factors(a, b)
+
+	Given a, b; find m such that m | ab and that
+
+	  m * (ab/m)
+
+	is a coprime factorization of a * b.  This fails when a and b are
+	powers of the same integer.  Otherwise it succeeds.
+
+	RETURN	m	coprime on success
+		a * b	otherwise.
+ */
+
+static ulong
+find_coprime_factors(ulong a, ulong b)
+{
+   const int N = sizeof(ulong) * 8;
+   ulong r[N];
+
+   int n = 2;
+   r[0] = a;
+   r[1] = b;
+
+   /* Build a list of factors that are either coprime or identical */
+   int i;
+   for(i = 1; i < n; ++i) {
+     int j;
+     for(j = 0; j < i; ++j) {
+       if(r[i] == r[j])
+         continue;
+       long s;
+       ulong v  = (r[i] > r[j] ? r[i] : r[j]),
+             v1 = (r[i] > r[j] ? r[j] : r[i]);
+       ulong g = xgcduu(v, v1, 1, &v, &v1, &s);
+       if(g == 1)
+         continue;
+       if(g == r[j]) {
+         r[i] /= g;
+	 r[n++] = g;
+	 --j;
+	 continue;
+       }
+       if(g == r[i]) {
+         r[j] /= g;
+	 r[n++] = g;
+	 --j;
+	 continue;
+       }
+
+       r[i] /= g;
+       r[j] /= g;
+       r[n++] = g;
+       r[n++] = g;
+     }
+   }
+
+   /* Sort */
+   qsort(r, n, sizeof(ulong), ulong_cmp);
+
+#if 0
+   /* Debugging: Recheck */
+   for(i = 1; i < n; ++i) {
+     assert(r[i] != 1);
+     int j;
+     for(j = 0; j < i; ++j) {
+       if(r[i] == r[j])
+         continue;
+       long s;
+       ulong v, v1;
+       ulong g = xgcduu(r[i], r[j], 1, &v, &v1, &s);
+       assert(g == 1);
+     }
+   }
+#endif
+
+   /* Build return value */
+   ulong ret = r[0];
+   for(i = 1; i < n && r[i] == r[0]; ++i)
+     ret *= r[0];
+
+   return ret;
+}
+
+static GEN
+Zlm_gauss_fl(GEN a, GEN b, ulong p, int mod_is_prime);
+
+static int
+Zlm_gauss_splitinst(GEN a, GEN b, ulong m, int i, int k, ulong g)
+{
+  const int aco = lg(a) - 1;
+  const int bco = lg(b) - 1;
+  const int li = lg(a[1]) - 1;
+
+  /* Find two coprime factors for g */
+  g = find_coprime_factors(g, m/g);
+  if(g == m)
+    return 0;	/* This row can't be used for pivot. */
+  ulong mg = m/g;
+
+  /* Save sp, so we can later reclaim scratch storage. */
+  pari_sp av = avma;
+
+  /* Swap rows i and k */
+  if (k != i)
+  {
+    int j;
+    for (j=i; j<=aco; j++) swap(gcoeff(a,i,j), gcoeff(a,k,j));
+    for (j=1; j<=bco; j++) swap(gcoeff(b,i,j), gcoeff(b,k,j));
+  }
+
+  /* Build and solve Z/gZ instance */
+  GEN aa = cgetg(1 + (aco - i + 1), t_MAT);
+  int j;
+  for (j = 1; j <= aco - i + 1; ++j) {
+    GEN v = (gel(aa, j) = cgetg(1 + (li - i + 1), t_VECSMALL));
+    long int e;
+    for (e = 1; e <= li - i + 1; ++e) {
+      v[e] = gel(a, j + i - 1)[e + i - 1] % g;
+    }
+  }
+
+  GEN bb = cgetg(bco + 1, t_MAT);
+  for (j = 1; j <= bco; ++j) {
+    GEN v = (gel(bb, j) = cgetg(1 + (li - i + 1), t_VECSMALL));
+    long int e;
+    for (e = 1; e <= li - i + 1; ++e) {
+      v[e] = gel(b, j)[e + i - 1] % g;
+    }
+  }
+
+  GEN sg = Zlm_gauss_fl(aa, bb, g, 0);
+  if(sg == 0) { avma = av; return 1; }
+
+  /* Build and solve Z/(m/g)Z instance */
+  aa = cgetg(1 + (aco - i + 1), t_MAT);
+  for (j = 1; j <= aco - i + 1; ++j) {
+    GEN v = (gel(aa, j) = cgetg(1 + (li - i + 1), t_VECSMALL));
+    long int e;
+    for (e = 1; e <= li - i + 1; ++e) {
+      v[e] = gel(a, j + i - 1)[e + i - 1] % mg;
+    }
+  }
+
+  bb = cgetg(bco + 1, t_MAT);
+  for (j = 1; j <= bco; ++j) {
+    GEN v = (gel(bb, j) = cgetg(1 + (li - i + 1), t_VECSMALL));
+    long int e;
+    for (e = 1; e <= li - i + 1; ++e) {
+      v[e] = gel(b, j)[e + i - 1] % mg;
+    }
+  }
+
+  GEN spg = Zlm_gauss_fl(aa, bb, mg, 0);
+  if(spg == 0) { avma = av; return 1; }
+
+  /* Combine.
+   * Here,	ipg is 1 mod m/g and 0 mod g,
+   *		ig  is 0 mod m/g and 1 mod g.
+   */
+  const ulong ipg = Fl_mul(g, Fl_inv(g % mg, mg), m);
+  const ulong ig = Fl_mul(mg, Fl_inv(mg % g, g), m);
+  for (j = 1; j <= bco; ++j) {
+    long int e;
+    GEN v = gel(b, j);
+    GEN vsg = gel(sg, j);
+    GEN vspg = gel(spg, j);
+    for (e = 1; e <= li - i + 1; ++e) {
+      v[e + i - 1] = Fl_add(Fl_mul(ig, vsg[e], m),
+        Fl_mul(ipg, vspg[e], m), m);
+    }
+  }
+
+  /* Done */
+  avma = av;
+  return 2;
+}
+
+/* Zlm_gauss_fl().  A version of Zlm_gauss() that does not copy a, b and
+   thus garbles them in the process.  It also takes an additional
+   mod_is_prime flag.
+   mod_is_prime: Flag to abort with error if a nonzero noninvertible
+   pivot is found. Useful to imitate Flm_gauss_sp(). */
+static GEN
+Zlm_gauss_fl(GEN a, GEN b, ulong p, int mod_is_prime)
+{
+  /* Return Empty matrix if a is empty. */
+  const long int aco = lg(a)-1;		/* aco: # of columns of a */
+  if (!aco) return cgetg(1,t_MAT);
+
+  const long int li = lg(a[1])-1;	/* li: # of rows of a */
+
+  /* If b is a column vector,
+     temporarily wrap it into a matrix. */
+  const int iscol = (typ(b)!=t_MAT);
+  if (iscol) b = mkmat(b);
+  const long int bco = lg(b)-1;		/* bco: # of cols of b */
+
+  /* First step: Triangularize the matrix. */
+  long int i, j, k;
+  for (i=1; i<=aco; i++)
+  {
+    /* Find a pivot row in the ith column */
+    int done = 0;	/* Flag to cancel the rest of the triangularization
+			   procedure */
+    for (k = i; k <= li; k++)
+    {
+      const ulong piv = ( ucoeff(a,k,i) %= p );
+      if (piv != 0) {
+
+        /* Compute the inverse of the pivot */
+	ulong xv, xv1;
+	long s;
+	ulong g = xgcduu(p, piv, 1, &xv, &xv1, &s);
+
+	if (g != 1ul) {
+	  if(mod_is_prime) {
+	    /* Trigger error */
+	    Fl_inv(piv, p);
+	    return NULL;
+	  }
+
+	  /* Pivot not invertible ? -> attempt to split to 2 instances */
+	  const int r = Zlm_gauss_splitinst(a, b, p, i, k, g);
+	  if (r == 0)
+	    continue; /* could not use pivot at row k */
+	  else if (r == 1)
+	    return NULL; /* system has no valid solution */
+
+	  /* Done, readjust counters and go to backsubstitution */
+	  --i;
+          done = 1;
+	  break;
+	}
+
+        /* We store the inverse pivot value in the (k, i) position for
+	   later use. */
+	xv = xv1 % p;
+	if (s < 0) xv = p - xv;
+        ucoeff(a,k,i) = xv;
+	break;
+      }
+    }
+
+    /* no pivot? matrix not invertible, abort */
+    if (k > li) return NULL;
+    if (done) break;
+
+    /* Exchange the rows k and i */
+    if (k != i)
+    {
+      for (j=i; j<=aco; j++) swap(gcoeff(a,i,j), gcoeff(a,k,j));
+      for (j=1; j<=bco; j++) swap(gcoeff(b,i,j), gcoeff(b,k,j));
+    }
+
+    /* Last column? Done. */
+    if (i == aco) break;
+
+    /* Use the pivot row to eliminate column i in rows > i */
+    const ulong invpiv = ucoeff(a,i,i); /* 1/piv mod p */
+    for (k=i+1; k<=li; k++)
+    {
+      ulong m = ( ucoeff(a,k,i) %= p );
+      if (!m) continue;
+
+      m = Fl_mul(m, invpiv, p);
+      if (m == 1) {
+	for (j=i+1; j<=aco; j++) _Fl_sub((uGEN)a[j],k,i, p);
+	for (j=1;   j<=bco; j++) _Fl_sub((uGEN)b[j],k,i, p);
+      } else {
+	for (j=i+1; j<=aco; j++) _Fl_submul((uGEN)a[j],k,i,m, p);
+	for (j=1;   j<=bco; j++) _Fl_submul((uGEN)b[j],k,i,m, p);
+      }
+    }
+  }
+
+  /* Second Step: Backsubstitute. */
+  GEN u = cgetg(bco+1,t_MAT);
+  for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col(a,(uGEN)b[j], aco, i, p);
+
+  /* Done, unwrap result if necessary */
+  return iscol? gel(u,1): u;
+}
+
+
+
 GEN
 matid_Flm(long n)
 {
@@ -1002,6 +1311,10 @@
 Flm_inv(GEN a, ulong p) {
   return Flm_inv_sp(RgM_shallowcopy(a), p);
 }
+GEN
+Zlm_inv(GEN a, ulong m) {
+  return Zlm_gauss_fl(RgM_shallowcopy(a), matid_Flm(lg(a)-1), m, 0);
+}
 
 GEN
 FpM_gauss(GEN a, GEN b, GEN p)
@@ -1062,7 +1375,7 @@
 
   if(DEBUGLEVEL>4) fprintferr("Solving the triangular system\n");
   u = cgetg(bco+1,t_MAT);
-  for (j=1; j<=bco; j++) gel(u,j) = Fp_get_col(a, gel(b,j), aco, p);
+  for (j=1; j<=bco; j++) gel(u,j) = Fp_get_col(a, gel(b,j), aco, aco, p);
   return gerepilecopy(av, iscol? gel(u,1): u);
 }
 GEN
@@ -1120,9 +1433,318 @@
   return gerepilecopy(av, iscol? gel(u,1): u);
 }
 
+static int
+cmp_GENii(const void *a, const void *b)
+{
+	GEN aa = *(GEN *)a;
+	GEN bb = *(GEN *)b;
+	return cmpii(aa, bb);
+}
+
+/*
+	Z_find_coprime_factors(a, b)
+
+	A version of find_coprime_factors() that works with t_INT GENs.
+ */
+
+static GEN
+Z_find_coprime_factors(GEN a, GEN b)
+{
+   const int N = (lg(a) - 1) + (lg(b) - 1) * sizeof(ulong) * 8;
+   GEN r[N];
+
+   pari_sp av = avma;
+   int n = 2;
+   r[0] = a;
+   r[1] = b;
+  
+   /* Build a list of factors that are either coprime or identical. */
+   int i;
+   for(i = 1; i < n; ++i) {
+     int j;
+     for(j = 0; j < i; ++j) {
+       if(equalii(r[i], r[j]))
+         continue;
+       GEN g = gcdii(r[i], r[j]);
+       if(equali1(g))
+         continue;
+       if(equalii(g, r[j])) {
+         r[i] = divii(r[i], g);
+	 r[n++] = g;
+	 --j;
+	 continue;
+       }
+       if(equalii(g, r[i])) {
+         r[j] = divii(r[j], g);
+	 r[n++] = g;
+	 --j;
+	 continue;
+       }
+       
+       r[i] = divii(r[i], g);
+       r[j] = divii(r[j], g);
+       r[n++] = g;
+       r[n++] = gcopy(g);
+     }
+   }
+
+   /* Sort */
+   qsort(r, n, sizeof(GEN), cmp_GENii);
+
+#if 0
+   /* Debugging: Recheck */
+   for(i = 1; i < n; ++i) {
+     assert(!gequal1(r[i]));
+     int j;
+     for(j = 0; j < i; ++j) {
+       if(equalii(r[i], r[j]))
+         continue;
+       GEN g = gcdii(r[i], r[j]);
+       assert(gequal1(g));
+     }
+   }
+#endif
+
+   /* Build return value */
+   for(i = 1; i < n && equalii(r[i], r[0]); ++i)
+     ;
+   GEN ret = powiu(r[0], i);
+
+   /* Done */
+   ret = gerepileupto(av, ret);
+   return ret;
+}
+
+static int
+ZnM_gauss_splitinst(GEN a, GEN b, GEN m, int i, int k, GEN g)
+{
+  const int aco = lg(a) - 1;
+  const int bco = lg(b) - 1;
+  const int li = lg(a[1]) - 1;
+
+  /* Find two coprime factors for g */
+  g = Z_find_coprime_factors(g, divii(m, g));
+  if(equalii(g, m))
+    return 0;	/* This row can't be used for pivot. */
+  GEN mg = divii(m, g);
+
+  /* Save sp, so we can later reclaim scratch storage. */
+  pari_sp av = avma;
+
+  /* Swap rows i and k */
+  if (k != i)
+  {
+    int j;
+    for (j=i; j<=aco; j++) swap(gcoeff(a,i,j), gcoeff(a,k,j));
+    for (j=1; j<=bco; j++) swap(gcoeff(b,i,j), gcoeff(b,k,j));
+  }
+
+  /* Build and solve Z/gZ instance */
+  GEN aa = cgetg(1 + (aco - i + 1), t_MAT);
+  int j;
+  for (j = 1; j <= aco - i + 1; ++j) {
+    GEN v = (gel(aa, j) = cgetg(1 + (li - i + 1), t_COL));
+    long int e;
+    for (e = 1; e <= li - i + 1; ++e) {
+      gel(v, e) = modii(gcoeff(a, e + i - 1, j + i - 1), g);
+    }
+  }
+
+  GEN bb = cgetg(bco + 1, t_MAT);
+  for (j = 1; j <= bco; ++j) {
+    GEN v = (gel(bb, j) = cgetg(1 + (li - i + 1), t_COL));
+    int e;
+    for (e = 1; e <= li - i + 1; ++e) {
+      gel(v, e) = modii(gcoeff(b, e + i - 1, j), g);
+    }
+  }
+
+  GEN sg = ZnM_gauss(aa, bb, g);
+  if(sg == 0) { avma = av; return 1; }
+
+  /* Build and solve Z/(m/g)Z instance */
+  aa = cgetg(1 + (aco - i + 1), t_MAT);
+  for (j = 1; j <= aco - i + 1; ++j) {
+    GEN v = (gel(aa, j) = cgetg(1 + (li - i + 1), t_COL));
+    int e;
+    for (e = 1; e <= li - i + 1; ++e) {
+      gel(v, e) = modii(gcoeff(a, e + i - 1, j + i - 1), mg);
+    }
+  }
+
+  bb = cgetg(bco + 1, t_MAT);
+  for (j = 1; j <= bco; ++j) {
+    GEN v = (gel(bb, j) = cgetg(1 + (li - i + 1), t_COL));
+    long int e;
+    for (e = 1; e <= li - i + 1; ++e) {
+      gel(v, e) = modii(gcoeff(b, e + i - 1, j), mg);
+    }
+  }
+
+  GEN spg = ZnM_gauss(aa, bb, mg);
+  if(spg == 0) { avma = av; return 1; }
+
+  /* Combine.
+   * Here,	ipg is 1 mod m/g and 0 mod g,
+   *		ig  is 0 mod m/g and 1 mod g.
+   */
+  GEN ipg = Fp_mul(g, Fp_inv(modii(g, mg), mg), m);
+  GEN ig = Fp_mul(mg, Fp_inv(modii(mg, g), g), m);
+  for (j = 1; j <= bco; ++j) {
+    long int e;
+    GEN v = gel(b, j);
+    GEN vsg = gel(sg, j);
+    GEN vspg = gel(spg, j);
+    for (e = 1; e <= li - i + 1; ++e) {
+      gel(v, e + i - 1) = Fp_add(Fp_mul(ig, gel(vsg, e), m),
+        Fp_mul(ipg, gel(vspg, e), m), m);
+    }
+  }
+
+  /* Done */
+  avma = av;
+  return 2;
+}
+
+/* ZnM_gauss_fl(): A version of ZnM_gauss() that takes an additional
+   flag, mod_is_prime.  If this flag, computation is aborted with an
+   error, if a nonzero non-invertible pivot is encountered.  This flag
+   is used to imitate the behaviour of FpM_gauss(). */
+static
+GEN ZnM_gauss_fl(GEN a, GEN b, GEN p, int mod_is_prime)
+{
+  pari_sp av = avma, lim;
+  long i, j, k, li, bco, aco;
+  int iscol;
+  GEN u;
+
+  if (!init_gauss(a, &b, &aco, &li, &iscol)) return cgetg(1, t_MAT);
+
+  /* Special case small moduli */
+  if (lgefint(p) == 3)
+  {
+    ulong pp = (ulong)p[2];
+    a = ZM_to_Flm(a, pp);
+    b = ZM_to_Flm(b, pp);
+    u = Zlm_gauss_fl(a,b, pp, mod_is_prime);
+    if (!u) {avma = av; return u;}
+    u = iscol? Flc_to_ZC(gel(u,1)): Flm_to_ZM(u);
+    return gerepileupto(av, u);
+  }
+
+  lim = stack_lim(av,1);
+  a = RgM_shallowcopy(a);
+  bco = lg(b)-1;
+  for (i=1; i<=aco; i++)
+  {
+    int done = 0;  /* Flag to cancel rest of triangularization step */
+
+    /* Look for a pivot in the ith column */
+    GEN invpiv;
+    for (k = i; k <= li; k++)
+    {
+      GEN piv = remii(gcoeff(a,k,i), p);
+      if (signe(piv)) {
+        GEN res;
+	if(!invmod(piv, p, &res)) {
+	  if(mod_is_prime) {
+	    /* Trigger error */
+	    Fp_inv(piv, p);
+	  }
+
+	  /* Not invertible ?
+	   *
+	   * Now, res contains a factor of p.  Split into two instances.
+	   */
+	  const int r = ZnM_gauss_splitinst(a, b, p, i, k, res);
+	  if (r == 0)
+	    continue; /* could not use pivot at row k */
+	  else if (r == 1)
+	    return NULL; /* system has no valid solution */
+
+	  /* Done, readjust counters and go to backsubstitution */
+	  --i;
+          done = 1;
+	  break;
+	} else {
+          gcoeff(a,k,i) = res;
+	}
+	break;
+      } else {
+	gcoeff(a,k,i) = gen_0;
+      }
+    }
+
+    /* No pivot? Not invertible, exit */
+    if (k > li) return NULL;
+    if (done) break;
+
+    /* Swap lines so the pivot is in line i */
+    if (k != i)
+    { /* swap lines so that k = i */
+      for (j=i; j<=aco; j++) swap(gcoeff(a,i,j), gcoeff(a,k,j));
+      for (j=1; j<=bco; j++) swap(gcoeff(b,i,j), gcoeff(b,k,j));
+    }
+    if (i == aco) break;
+
+    invpiv = gcoeff(a,i,i); /* 1/piv mod p */
+    for (k=i+1; k<=li; k++)
+    {
+      GEN m = remii(gcoeff(a,k,i), p); gcoeff(a,k,i) = gen_0;
+      if (!signe(m)) continue;
+
+      m = Fp_mul(m, invpiv, p);
+      for (j=i+1; j<=aco; j++) _Fp_submul(gel(a,j),k,i,m, p);
+      for (j=1  ; j<=bco; j++) _Fp_submul(gel(b,j),k,i,m, p);
+    }
+    if (low_stack(lim, stack_lim(av,1)))
+    {
+      if(DEBUGMEM>1) pari_warn(warnmem,"FpM_gauss. i=%ld",i);
+      gerepileall(av,2, &a,&b);
+    }
+  }
+
+  if(DEBUGLEVEL>4) fprintferr("Solving the triangular system\n");
+  u = cgetg(bco+1,t_MAT);
+  for (j=1; j<=bco; j++) gel(u,j) = Fp_get_col(a, gel(b,j), aco, i, p);
+  return gerepilecopy(av, iscol? gel(u,1): u);
+}
+
+/*
+	ZnM_gauss(a, b, m)
+
+	Solve ax = b (mod m), return NULL on error, otherwise a solution
+	vector.
+
+ */
+
+GEN ZnM_gauss(GEN a, GEN b, GEN m)
+{
+  return ZnM_gauss_fl(a, b, m, 0);
+}
+
+/*
+	GEN Zlm_gauss(a, b, ulong p)
+
+	Perform Gaussian elimination modulo a small integer p. a is the
+	matrix, b the right hand side, which can be either a matrix or a
+	column vector.
+
+	The matrices are t_MAT with t_VECSMALL as column vectors.
+ */
+
+
+GEN
+Zlm_gauss(GEN a, GEN b, ulong m) {
+  return Zlm_gauss_fl(RgM_shallowcopy(a), shallowcopy(b), m, 0);
+}
+
 GEN
 FpM_inv(GEN x, GEN p) { return FpM_gauss(x, NULL, p); }
 
+GEN
+ZnM_inv(GEN x, GEN m) { return ZnM_gauss(x, NULL, m); }
+
 /* M integral, dM such that M' = dM M^-1 is integral [e.g det(M)]. Return M' */
 GEN
 ZM_inv(GEN M, GEN dM)
diff --git a/src/headers/paridecl.h b/src/headers/paridecl.h
--- a/src/headers/paridecl.h
+++ b/src/headers/paridecl.h
@@ -518,6 +518,10 @@
 GEN     RgM_solve_realimag(GEN x, GEN y);
 GEN     ZM_detmult(GEN A);
 GEN     ZM_inv(GEN M, GEN dM);
+GEN     ZnM_gauss(GEN a, GEN b, GEN m);
+GEN     ZnM_inv(GEN x, GEN m);
+GEN     Zlm_gauss(GEN a, GEN b, ulong m);
+GEN     Zlm_inv(GEN a, ulong m);
 GEN     apply0(GEN A, GEN f);
 GEN     deplin(GEN x);
 GEN     det(GEN a);
diff --git a/src/basemath/alglin1.c b/src/basemath/alglin1.c
--- a/src/basemath/alglin1.c
+++ b/src/basemath/alglin1.c
@@ -621,28 +621,6 @@
   }
   return u;
 }
-/* assume 0 <= a[i,j] < p */
-static uGEN
-Fl_get_col_OK(GEN a, uGEN b, long li, ulong p)
-{
-  uGEN u = (uGEN)cgetg(li+1,t_VECSMALL);
-  ulong m = b[li] % p;
-  long i,j;
-
-  u[li] = (m * ucoeff(a,li,li)) % p;
-  for (i = li-1; i > 0; i--)
-  {
-    m = p - b[i]%p;
-    for (j = i+1; j <= li; j++) {
-      if (m & HIGHBIT) m %= p;
-      m += ucoeff(a,i,j) * u[j]; /* 0 <= u[j] < p */
-    }
-    m %= p;
-    if (m) m = ((p-m) * ucoeff(a,i,i)) % p;
-    u[i] = m;
-  }
-  return u;
-}
 
 static uGEN
 Fl_get_col(GEN a, uGEN b, long li, long last_row, ulong p)
@@ -683,12 +661,6 @@
   gel(b,i) = Fq_red(gel(b,i), T,p);
   gel(b,k) = gsub(gel(b,k), gmul(m, gel(b,i)));
 }
-static void /* assume m < p && SMALL_ULONG(p) && (! (b[i] & b[k] & HIGHMASK)) */
-_Fl_submul_OK(uGEN b, long k, long i, ulong m, ulong p)
-{
-  b[k] -= m * b[i];
-  if (b[k] & HIGHMASK) b[k] %= p;
-}
 static void /* assume m < p */
 _Fl_submul(uGEN b, long k, long i, ulong m, ulong p)
 {
@@ -933,61 +905,14 @@
   return z;
 }
 
+static GEN
+Zlm_gauss_fl(GEN a, GEN b, ulong p, int mod_is_prime);
+
 /* destroy a, b */
 static GEN
 Flm_gauss_sp(GEN a, GEN b, ulong p)
 {
-  long i, j, k, li, bco, aco = lg(a)-1;
-  const int OK_ulong = 0;
-  GEN u;
-
-  if (!aco) return cgetg(1,t_MAT);
-  li = lg(a[1])-1;
-  bco = lg(b)-1;
-  for (i=1; i<=aco; i++)
-  {
-    ulong invpiv;
-    /* Fl_get_col wants 0 <= a[i,j] < p for all i,j */
-    if (OK_ulong) for (k = 1; k < i; k++) ucoeff(a,k,i) %= p;
-    for (k = i; k <= li; k++)
-    {
-      ulong piv = ( ucoeff(a,k,i) %= p );
-      if (piv) { ucoeff(a,k,i) = Fl_inv(piv, p); break; }
-    }
-    /* found a pivot on line k */
-    if (k > li) return NULL;
-    if (k != i)
-    { /* swap lines so that k = i */
-      for (j=i; j<=aco; j++) swap(gcoeff(a,i,j), gcoeff(a,k,j));
-      for (j=1; j<=bco; j++) swap(gcoeff(b,i,j), gcoeff(b,k,j));
-    }
-    if (i == aco) break;
-
-    invpiv = ucoeff(a,i,i); /* 1/piv mod p */
-    for (k=i+1; k<=li; k++)
-    {
-      ulong m = ( ucoeff(a,k,i) %= p );
-      if (!m) continue;
-
-      m = Fl_mul(m, invpiv, p);
-      if (m == 1) {
-	for (j=i+1; j<=aco; j++) _Fl_sub((uGEN)a[j],k,i, p);
-	for (j=1;   j<=bco; j++) _Fl_sub((uGEN)b[j],k,i, p);
-      } else if (OK_ulong) {
-	for (j=i+1; j<=aco; j++) _Fl_submul_OK((uGEN)a[j],k,i,m, p);
-	for (j=1;   j<=bco; j++) _Fl_submul_OK((uGEN)b[j],k,i,m, p);
-      } else {
-	for (j=i+1; j<=aco; j++) _Fl_submul((uGEN)a[j],k,i,m, p);
-	for (j=1;   j<=bco; j++) _Fl_submul((uGEN)b[j],k,i,m, p);
-      }
-    }
-  }
-  u = cgetg(bco+1,t_MAT);
-  if (OK_ulong)
-    for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col_OK(a,(uGEN)b[j], aco,p);
-  else
-    for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col(a,(uGEN)b[j], aco, aco, p);
-  return u;
+  return Zlm_gauss_fl(a, b, p, 1);
 }
 
 /* Helper functions for Zlm_gauss_fl */
@@ -1087,9 +1012,6 @@
    return ret;
 }
 
-static GEN
-Zlm_gauss_fl(GEN a, GEN b, ulong p, int mod_is_prime);
-
 static int
 Zlm_gauss_splitinst(GEN a, GEN b, ulong m, int i, int k, ulong g)
 {
@@ -1316,67 +1238,13 @@
   return Zlm_gauss_fl(RgM_shallowcopy(a), matid_Flm(lg(a)-1), m, 0);
 }
 
+static
+GEN ZnM_gauss_fl(GEN a, GEN b, GEN p, int mod_is_prime);
+
 GEN
 FpM_gauss(GEN a, GEN b, GEN p)
 {
-  pari_sp av = avma, lim;
-  long i, j, k, li, bco, aco;
-  int iscol;
-  GEN u;
-
-  if (!init_gauss(a, &b, &aco, &li, &iscol)) return cgetg(1, t_MAT);
-  if (lgefint(p) == 3)
-  {
-    ulong pp = (ulong)p[2];
-    a = ZM_to_Flm(a, pp);
-    b = ZM_to_Flm(b, pp);
-    u = Flm_gauss_sp(a,b, pp);
-    if (!u) {avma = av; return u;}
-    u = iscol? Flc_to_ZC(gel(u,1)): Flm_to_ZM(u);
-    return gerepileupto(av, u);
-  }
-  lim = stack_lim(av,1);
-  a = RgM_shallowcopy(a);
-  bco = lg(b)-1;
-  for (i=1; i<=aco; i++)
-  {
-    GEN invpiv;
-    for (k = i; k <= li; k++)
-    {
-      GEN piv = remii(gcoeff(a,k,i), p);
-      if (signe(piv)) { gcoeff(a,k,i) = Fp_inv(piv, p); break; }
-      gcoeff(a,k,i) = gen_0;
-    }
-    /* found a pivot on line k */
-    if (k > li) return NULL;
-    if (k != i)
-    { /* swap lines so that k = i */
-      for (j=i; j<=aco; j++) swap(gcoeff(a,i,j), gcoeff(a,k,j));
-      for (j=1; j<=bco; j++) swap(gcoeff(b,i,j), gcoeff(b,k,j));
-    }
-    if (i == aco) break;
-
-    invpiv = gcoeff(a,i,i); /* 1/piv mod p */
-    for (k=i+1; k<=li; k++)
-    {
-      GEN m = remii(gcoeff(a,k,i), p); gcoeff(a,k,i) = gen_0;
-      if (!signe(m)) continue;
-
-      m = Fp_mul(m, invpiv, p);
-      for (j=i+1; j<=aco; j++) _Fp_submul(gel(a,j),k,i,m, p);
-      for (j=1  ; j<=bco; j++) _Fp_submul(gel(b,j),k,i,m, p);
-    }
-    if (low_stack(lim, stack_lim(av,1)))
-    {
-      if(DEBUGMEM>1) pari_warn(warnmem,"FpM_gauss. i=%ld",i);
-      gerepileall(av,2, &a,&b);
-    }
-  }
-
-  if(DEBUGLEVEL>4) fprintferr("Solving the triangular system\n");
-  u = cgetg(bco+1,t_MAT);
-  for (j=1; j<=bco; j++) gel(u,j) = Fp_get_col(a, gel(b,j), aco, aco, p);
-  return gerepilecopy(av, iscol? gel(u,1): u);
+  return ZnM_gauss_fl(a, b, p, 1);
 }
 GEN
 FqM_gauss(GEN a, GEN b, GEN T, GEN p)