Line data Source code
1 : #line 2 "../src/kernel/none/halfgcd.c"
2 : /* Copyright (C) 2019 The PARI group.
3 :
4 : This file is part of the PARI/GP package.
5 :
6 : PARI/GP is free software; you can redistribute it and/or modify it under the
7 : terms of the GNU General Public License as published by the Free Software
8 : Foundation; either version 2 of the License, or (at your option) any later
9 : version. It is distributed in the hope that it will be useful, but WITHOUT
10 : ANY WARRANTY WHATSOEVER.
11 :
12 : Check the License for details. You should have received a copy of it, along
13 : with the package; see the file 'COPYING'. If not, write to the Free Software
14 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
15 :
16 : GEN
17 2338 : ZM2_sqr(GEN A)
18 : {
19 2338 : GEN a = gcoeff(A,1,1), b = gcoeff(A,1,2), a2 = sqri(a);
20 2338 : GEN c = gcoeff(A,2,1), d = gcoeff(A,2,2), d2 = sqri(d), t = addii(a,d);
21 2338 : if (equalii(b, c)) /* symetric, 3S + 1M */
22 : {
23 322 : GEN b2 = sqri(b), M = cgetg(3, t_MAT), tb = mulii(b, t);
24 322 : gel(M,1) = mkcol2(addii(a2, b2), tb);
25 322 : gel(M,2) = mkcol2(tb, addii(b2, d2)); return M;
26 : }
27 : else
28 : { /* general, 2S + 3M */
29 2016 : GEN bc = mulii(b, c);
30 2016 : retmkmat2(mkcol2(addii(bc, a2), mulii(c, t)),
31 : mkcol2(mulii(b, t), addii(bc, d2)));
32 : }
33 : }
34 :
35 : GEN
36 7276481 : ZM2_mul(GEN A, GEN B)
37 : {
38 7276481 : const long t = ZM2_MUL_LIMIT+2;
39 7276481 : GEN A11=gcoeff(A,1,1),A12=gcoeff(A,1,2), B11=gcoeff(B,1,1),B12=gcoeff(B,1,2);
40 7276481 : GEN A21=gcoeff(A,2,1),A22=gcoeff(A,2,2), B21=gcoeff(B,2,1),B22=gcoeff(B,2,2);
41 7276481 : if (lgefint(A11) < t || lgefint(B11) < t || lgefint(A22) < t || lgefint(B22) < t
42 120959 : || lgefint(A12) < t || lgefint(B12) < t || lgefint(A21) < t || lgefint(B21) < t)
43 : { /* 8M */
44 7158186 : GEN a = mulii(A11, B11), b = mulii(A12, B21);
45 7157470 : GEN c = mulii(A11, B12), d = mulii(A12, B22);
46 7157381 : GEN e = mulii(A21, B11), f = mulii(A22, B21);
47 7157445 : GEN g = mulii(A21, B12), h = mulii(A22, B22);
48 7157381 : retmkmat2(mkcol2(addii(a,b), addii(e,f)), mkcol2(addii(c,d), addii(g,h)));
49 : } else
50 : { /* Strassen: 7M */
51 118295 : GEN M1 = mulii(addii(A11,A22), addii(B11,B22));
52 118295 : GEN M2 = mulii(addii(A21,A22), B11);
53 118295 : GEN M3 = mulii(A11, subii(B12,B22));
54 118295 : GEN M4 = mulii(A22, subii(B21,B11));
55 118295 : GEN M5 = mulii(addii(A11,A12), B22);
56 118295 : GEN M6 = mulii(subii(A21,A11), addii(B11,B12));
57 118295 : GEN M7 = mulii(subii(A12,A22), addii(B21,B22));
58 118295 : GEN T1 = addii(M1,M4), T2 = subii(M7,M5);
59 118295 : GEN T3 = subii(M1,M2), T4 = addii(M3,M6);
60 118295 : retmkmat2(mkcol2(addii(T1,T2), addii(M2,M4)),
61 : mkcol2(addii(M3,M5), addii(T3,T4)));
62 : }
63 : }
64 :
65 : static GEN
66 666 : matid2(void)
67 : {
68 666 : retmkmat2(mkcol2(gen_1,gen_0),
69 : mkcol2(gen_0,gen_1));
70 : }
71 :
72 : /* Return M*[q,1;1,0] */
73 : static GEN
74 2995431 : mulq(GEN M, GEN q)
75 : {
76 2995431 : GEN u, v, res = cgetg(3, t_MAT);
77 2995411 : u = addii(mulii(gcoeff(M,1,1), q), gcoeff(M,1,2));
78 2995421 : v = addii(mulii(gcoeff(M,2,1), q), gcoeff(M,2,2));
79 2995414 : gel(res,1) = mkcol2(u, v);
80 2995415 : gel(res,2) = gel(M,1);
81 2995415 : return res;
82 : }
83 : static GEN
84 59 : mulqab(GEN M, GEN q, GEN *ap, GEN *bp)
85 : {
86 59 : GEN b = subii(*ap, mulii(*bp, q));
87 59 : *ap = *bp; *bp = b;
88 59 : return mulq(M,q);
89 : }
90 :
91 : /* Return M*[q,1;1,0]^-1 */
92 :
93 : static GEN
94 1168 : mulqi(GEN M, GEN q, GEN *ap, GEN *bp)
95 : {
96 : GEN u, v, res, a;
97 1168 : a = addii(mulii(*ap, q), *bp);
98 1168 : *bp = *ap; *ap = a;
99 1168 : res = cgetg(3, t_MAT);
100 1168 : u = subii(gcoeff(M,1,1),mulii(gcoeff(M,1,2), q));
101 1168 : v = subii(gcoeff(M,2,1),mulii(gcoeff(M,2,2), q));
102 1168 : gel(res,1) = gel(M,2);
103 1168 : gel(res,2) = mkcol2(u,v);
104 1168 : return res;
105 : }
106 :
107 : /* test whether n is a power of 2 */
108 : static long
109 22984034 : isint2n(GEN n)
110 : {
111 : GEN x;
112 22984034 : long lx = lgefint(n), i;
113 22984034 : if (lx == 2) return 0;
114 22984034 : x = int_MSW(n);
115 22984034 : if (*(ulong*)x != 1UL<<expu(*(ulong*)x) ) return 0;
116 522011 : for (i = 3; i < lx; i++)
117 : {
118 514591 : x = int_precW(x); if (*x) return 0;
119 : }
120 7420 : return 1;
121 : }
122 :
123 : static long
124 22984256 : uexpi(GEN a)
125 22984256 : { return expi(a)+!isint2n(a); }
126 :
127 : static GEN
128 107808 : FIXUP0(GEN M, GEN *a, GEN *b, long m)
129 : {
130 107808 : long cnt=0;
131 137224 : while (expi(*b) >= m)
132 : {
133 29416 : GEN r, q = dvmdii(*a, *b, &r);
134 29416 : *a = *b; *b = r;
135 29416 : M = mulq(M, q);
136 29416 : cnt++;
137 : };
138 107807 : if (cnt>6) pari_err_BUG("FIXUP0");
139 107807 : return M;
140 : }
141 :
142 : static long
143 6454531 : signdet(GEN Q)
144 : {
145 6454531 : long a = Mod4(gcoeff(Q,1,1)), b = Mod4(gcoeff(Q,1,2));
146 6454469 : long c = Mod4(gcoeff(Q,2,1)), d = Mod4(gcoeff(Q,2,2));
147 6455649 : return ((a*d-b*c)&3)==1 ? 1 : -1;
148 : }
149 :
150 : static GEN
151 6289018 : ZM_inv2(GEN M)
152 : {
153 6289018 : long e = signdet(M);
154 9952668 : if (e==1) return mkmat22(gcoeff(M,2,2),negi(gcoeff(M,1,2)),
155 3662904 : negi(gcoeff(M,2,1)),gcoeff(M,1,1));
156 2627007 : else return mkmat22(negi(gcoeff(M,2,2)),gcoeff(M,1,2),
157 2626991 : gcoeff(M,2,1),negi(gcoeff(M,1,1)));
158 : }
159 :
160 : static GEN
161 1168 : lastq(GEN Q)
162 : {
163 1168 : GEN p = gcoeff(Q,1,1), q = gcoeff(Q,1,2), s = gcoeff(Q,2,2);
164 1168 : if (signe(q)==0) pari_err_BUG("halfgcd");
165 1168 : if (signe(s)==0) return p;
166 1168 : if (equali1(q)) return subiu(p,1);
167 1168 : return divii(p, q);
168 : }
169 :
170 : static GEN
171 6853 : mulT(GEN Q, GEN *ap, GEN *bp)
172 : {
173 6853 : *ap = addii(*ap, *bp);
174 6853 : *bp = negi(*bp);
175 6853 : return mkmat2(gel(Q,1),
176 6853 : mkcol2(subii(gcoeff(Q,1,1), gcoeff(Q,1,2))
177 6853 : , subii(gcoeff(Q,2,1), gcoeff(Q,2,2))));
178 : }
179 :
180 : static GEN
181 165651 : FIXUP1(GEN M, GEN a, GEN b, long m, long t, GEN *ap, GEN *bp)
182 : {
183 165651 : GEN Q = gel(M,1), a0 = gel(M,2), b0 = gel(M,3);
184 165651 : GEN q, am = remi2n(a, m), bm = remi2n(b, m);
185 165712 : if (signdet(Q)==-1)
186 : {
187 111064 : *ap = subii(mulii(bm, gcoeff(Q,1,2)),mulii(am, gcoeff(Q,2,2)));
188 110981 : *bp = subii(mulii(am, gcoeff(Q,2,1)),mulii(bm, gcoeff(Q,1,1)));
189 111197 : *ap = addii(*ap, shifti(addii(a0, gcoeff(Q,2,2)), m));
190 111123 : *bp = addii(*bp, shifti(subii(b0, gcoeff(Q,2,1)), m));
191 110735 : if (signe(*bp) >= 0)
192 103811 : return Q;
193 6924 : if (expi(addii(*ap,*bp)) >= m+t)
194 6853 : return mulT(Q, ap ,bp);
195 71 : q = lastq(Q);
196 71 : Q = mulqi(Q, q, ap, bp);
197 71 : if (cmpiu(q, 2)>=0)
198 59 : return mulqab(Q, subiu(q,1), ap, bp);
199 : else
200 12 : return mulqi(Q, lastq(Q), ap, bp);
201 : }
202 : else
203 : {
204 54526 : *ap = subii(mulii(am, gcoeff(Q,2,2)),mulii(bm, gcoeff(Q,1,2)));
205 54526 : *bp = subii(mulii(bm, gcoeff(Q,1,1)),mulii(am, gcoeff(Q,2,1)));
206 54526 : *ap = addii(*ap, shifti(subii(a0, gcoeff(Q,2,2)), m));
207 54526 : *bp = addii(*bp, shifti(addii(b0, gcoeff(Q,2,1)), m));
208 54526 : if (expi(*ap) >= m+t)
209 53441 : return FIXUP0(Q, ap, bp, m+t);
210 : else
211 1085 : return signe(gcoeff(Q,1,2))==0? Q: mulqi(Q, lastq(Q), ap, bp);
212 : }
213 : }
214 :
215 : static long
216 22929971 : magic_threshold(GEN a)
217 22929971 : { return (3+uexpi(a))>>1; }
218 :
219 : static GEN
220 14953170 : HGCD_basecase(GEN y, GEN x)
221 : {
222 14953170 : pari_sp av = avma;
223 : GEN d, d1, q, r;
224 : GEN u, u1, v, v1;
225 : ulong xu, xu1, xv, xv1; /* Lehmer stage recurrence matrix */
226 : int lhmres; /* Lehmer stage return value */
227 :
228 14953170 : long m = magic_threshold(y);
229 :
230 : /* There is no special case for single-word numbers since this is
231 : * mainly meant to be used with large moduli. */
232 14953169 : if (cmpii(y,x) <= 0)
233 : {
234 94251 : d = x; d1 = y;
235 94251 : u = gen_1; u1 = gen_0;
236 94251 : v = gen_0; v1 = gen_1;
237 : } else
238 : {
239 14858910 : d = y; d1 = x;
240 14858910 : u = gen_0; u1 = gen_1;
241 14858910 : v = gen_1; v1 = gen_0;
242 : }
243 33374373 : while (lgefint(d) > 3 && expi(d1) >= m + BITS_IN_LONG + 1)
244 : {
245 : /* do a Lehmer-Jebelean round */
246 18757261 : lhmres = lgcdii((ulong *)d, (ulong *)d1, &xu, &xu1, &xv, &xv1, 0);
247 :
248 18765049 : if (lhmres)
249 : {
250 17758971 : if (lhmres == 1 || lhmres == -1)
251 : {
252 460462 : if (xv1 == 1)
253 : {
254 394670 : r = subii(d,d1); d = d1; d1 = r;
255 394653 : r = addii(u,u1); u = u1; u1 = r;
256 394580 : r = addii(v,v1); v = v1; v1 = r;
257 : }
258 : else
259 : {
260 65792 : r = subii(d, mului(xv1,d1)); d = d1; d1 = r;
261 65791 : r = addii(u, mului(xv1,u1)); u = u1; u1 = r;
262 65791 : r = addii(v, mului(xv1,v1)); v = v1; v1 = r;
263 : }
264 : }
265 : else
266 : {
267 17298509 : r = subii(muliu(d,xu), muliu(d1,xv));
268 17293584 : d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
269 17293644 : r = addii(muliu(u,xu), muliu(u1,xv));
270 17292025 : u1 = addii(muliu(u,xu1), muliu(u1,xv1)); u = r;
271 17291900 : r = addii(muliu(v,xu), muliu(v1,xv));
272 17291883 : v1 = addii(muliu(v,xu1), muliu(v1,xv1)); v = r;
273 17294890 : if (lhmres&1) togglesign(d); else togglesign(d1);
274 : }
275 : } /* lhmres != 0 */
276 18761115 : if (expi(d1) < m) break;
277 :
278 18421598 : if (lhmres <= 0 && signe(d1))
279 : {
280 1059461 : q = dvmdii(d,d1,&r);
281 1059475 : d = d1; d1 = r;
282 1059475 : r = addii(u, mulii(q,u1)); u = u1; u1 = r;
283 1059047 : r = addii(v, mulii(q,v1)); v = v1; v1 = r;
284 : }
285 18421177 : if (gc_needed(av,1))
286 : {
287 0 : if(DEBUGMEM>1) pari_warn(warnmem,"ratlift");
288 0 : gerepileall(av, 6, &d, &d1, &u, &u1, &v, &v1);
289 : }
290 : }
291 82859782 : while (expi(d1) >= m)
292 : {
293 67907171 : GEN r, q = dvmdii(d,d1, &r);
294 67922639 : d = d1; d1 = r; swap(u,u1); swap(v,v1);
295 67922639 : u1 = addii(mulii(u, q), u1);
296 67907183 : v1 = addii(mulii(v, q), v1);
297 : }
298 14943729 : return gerepilecopy(av, mkvec3(mkmat22(u1,u,v1,v), d, d1));
299 : }
300 :
301 : static GEN HGCD(GEN x, GEN y);
302 :
303 : /*
304 : Based on
305 : Klaus Thull and Chee K. Yap,
306 : A unified approach to HGCD algorithms for polynomials andintegers,
307 : 1990, Manuscript.
308 : URL: http://cs.nyu.edu/cs/faculty/yap/papers.
309 : */
310 :
311 : static GEN
312 111593 : HGCD_split(GEN a, GEN b)
313 : {
314 111593 : pari_sp av = avma;
315 111593 : long m = magic_threshold(a), t, l, k, tp;
316 : GEN a0, b0, ap, bp, c, d, c0, d0, cp, dp, R, S, T, q, r;
317 111484 : if (signe(b) < 0 || cmpii(a,b)<0) pari_err_BUG("HGCD_split");
318 111524 : if (expi(b) < m)
319 444 : return gerepilecopy(av, mkvec3(matid2(), a, b));
320 111024 : a0 = addiu(shifti(a, -m), 1);
321 111107 : if (cmpiu(a0,7) <= 0)
322 : {
323 0 : R = FIXUP0(matid2(), &a, &b, m);
324 0 : return gerepilecopy(av, mkvec3(R, a, b));
325 : }
326 111141 : b0 = shifti(b,-m);
327 111339 : t = magic_threshold(a0);
328 111060 : R = FIXUP1(HGCD(a0,b0),a, b, m, t, &ap, &bp);
329 110895 : if (expi(bp) < m)
330 56431 : return gerepilecopy(av, mkvec3(R, ap, bp));
331 54367 : q = dvmdii(ap, bp, &r);
332 54367 : c = bp; d = r;
333 54367 : if (cmpiu(shifti(c,-m),6) <= 0)
334 : {
335 21 : R = FIXUP0(mulq(R, q), &c, &d, m);
336 21 : return gerepilecopy(av, mkvec3(R, c, d));
337 : }
338 54345 : l = uexpi(c);
339 54346 : k = 2*m-l-1; if (k<0) pari_err_BUG("halfgcd");
340 54346 : c0 = addiu(shifti(c, -k), 1); if (cmpiu(c0,8)<0) pari_err_BUG("halfgcd");
341 54346 : d0 = shifti(d, -k);
342 54346 : tp = magic_threshold(c0);
343 54346 : S = FIXUP1(HGCD(c0,d0), c, d, k, tp, &cp, &dp);
344 54346 : if (!(expi(cp)>=m+1 && m+1 > expi(dp))) pari_err_BUG("halfgcd");
345 54346 : T = FIXUP0(ZM2_mul(mulq(R, q), S), &cp, &dp, m);
346 54345 : return gerepilecopy(av, mkvec3(T, cp, dp));
347 : }
348 :
349 : static GEN
350 15064523 : HGCD(GEN x, GEN y)
351 : {
352 15064523 : if (lgefint(y)-2 < HALFGCD_LIMIT)
353 14953127 : return HGCD_basecase(x, y);
354 : else
355 111396 : return HGCD_split(x, y);
356 : }
357 :
358 : static GEN
359 29270598 : HGCD0(GEN x, GEN y)
360 : {
361 29270598 : if (signe(y) >= 0 && cmpii(x, y) >= 0)
362 14895374 : return HGCD(x, y);
363 14375202 : if (cmpii(x, y) < 0)
364 : {
365 11985845 : GEN M = HGCD0(y, x), Q = gel(M,1);
366 11984584 : return mkvec3(mkmat22(gcoeff(Q,2,1),gcoeff(Q,2,2),gcoeff(Q,1,1),gcoeff(Q,1,2)),
367 11986175 : gel(M,2),gel(M,3));
368 : } /* Now y <= x*/
369 2389517 : if (signe(x) <= 0)
370 : { /* y <= x <=0 */
371 3856 : GEN M = HGCD(negi(y), negi(x)), Q = gel(M,1);
372 3856 : return mkvec3(mkmat22(negi(gcoeff(Q,2,1)),negi(gcoeff(Q,2,2)),
373 3856 : negi(gcoeff(Q,1,1)),negi(gcoeff(Q,1,2))),
374 3856 : gel(M,2),gel(M,3));
375 : }
376 : else /* y <= 0 <=x */
377 : {
378 2385661 : GEN M = HGCD0(x, negi(y)), Q = gel(M,1);
379 2385661 : return mkvec3(mkmat22(gcoeff(Q,1,1),gcoeff(Q,1,2),negi(gcoeff(Q,2,1)),negi(gcoeff(Q,2,2))),
380 2385661 : gel(M,2),gel(M,3));
381 : }
382 : }
383 :
384 : GEN
385 6289484 : halfgcdii(GEN A, GEN B)
386 : {
387 6289484 : pari_sp av = avma;
388 6289484 : GEN M, Q, a, b, m = abscmpii(A, B)>0 ? A: B;
389 6289519 : M = HGCD0(A,B); Q = gel(M,1); a = gel(M,2); b = gel(M,3);
390 9199759 : while (signe(b) && abscmpii(sqri(b), m) >= 0)
391 : {
392 2911460 : GEN r, q = dvmdii(a, b, &r);
393 2911437 : a = b; b = r;
394 2911437 : Q = mulq(Q, q);
395 : }
396 6288500 : return gerepilecopy(av, mkvec2(ZM_inv2(Q),mkcol2(a,b)));
397 : }
|