Line data Source code
1 : #line 2 "../src/kernel/none/halfgcd.c"
2 : /* Copyright (C) 2019 The PARI group.
3 :
4 : This file is part of the PARI/GP package.
5 :
6 : PARI/GP is free software; you can redistribute it and/or modify it under the
7 : terms of the GNU General Public License as published by the Free Software
8 : Foundation; either version 2 of the License, or (at your option) any later
9 : version. It is distributed in the hope that it will be useful, but WITHOUT
10 : ANY WARRANTY WHATSOEVER.
11 :
12 : Check the License for details. You should have received a copy of it, along
13 : with the package; see the file 'COPYING'. If not, write to the Free Software
14 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
15 :
16 : GEN
17 2331 : ZM2_sqr(GEN A)
18 : {
19 2331 : GEN a = gcoeff(A,1,1), b = gcoeff(A,1,2), c = gcoeff(A,2,1), d = gcoeff(A,2,2);
20 2331 : if (equalii(b, c)) /* symetric, 3S + 1M */
21 : {
22 315 : GEN b2 = sqri(b), t = mulii(b, addii(a, d));
23 315 : retmkmat2(mkcol2(addii(b2, sqri(a)), t), mkcol2(t, addii(b2, sqri(d))));
24 : }
25 : else
26 : { /* general, 2S + 3M */
27 2016 : GEN bc = mulii(b, c), t = addii(a, d);
28 2016 : retmkmat2(mkcol2(addii(bc, sqri(a)), mulii(c, t)),
29 : mkcol2(mulii(b, t), addii(bc, sqri(d))));
30 : }
31 : }
32 :
33 : GEN
34 7142502 : ZM2_mul(GEN A, GEN B)
35 : {
36 7142502 : const long t = ZM2_MUL_LIMIT+2;
37 7142502 : GEN A11=gcoeff(A,1,1),A12=gcoeff(A,1,2), B11=gcoeff(B,1,1),B12=gcoeff(B,1,2);
38 7142502 : GEN A21=gcoeff(A,2,1),A22=gcoeff(A,2,2), B21=gcoeff(B,2,1),B22=gcoeff(B,2,2);
39 7142502 : if (lgefint(A11) < t || lgefint(B11) < t || lgefint(A22) < t || lgefint(B22) < t
40 120934 : || lgefint(A12) < t || lgefint(B12) < t || lgefint(A21) < t || lgefint(B21) < t)
41 : { /* 8M */
42 7024232 : GEN a = mulii(A11, B11), b = mulii(A12, B21);
43 7023733 : GEN c = mulii(A11, B12), d = mulii(A12, B22);
44 7023720 : GEN e = mulii(A21, B11), f = mulii(A22, B21);
45 7023720 : GEN g = mulii(A21, B12), h = mulii(A22, B22);
46 7023730 : retmkmat2(mkcol2(addii(a,b), addii(e,f)), mkcol2(addii(c,d), addii(g,h)));
47 : } else
48 : { /* Strassen: 7M */
49 118270 : GEN M1 = mulii(addii(A11,A22), addii(B11,B22));
50 118270 : GEN M2 = mulii(addii(A21,A22), B11);
51 118270 : GEN M3 = mulii(A11, subii(B12,B22));
52 118270 : GEN M4 = mulii(A22, subii(B21,B11));
53 118270 : GEN M5 = mulii(addii(A11,A12), B22);
54 118270 : GEN M6 = mulii(subii(A21,A11), addii(B11,B12));
55 118270 : GEN M7 = mulii(subii(A12,A22), addii(B21,B22));
56 118270 : GEN T1 = addii(M1,M4), T2 = subii(M7,M5);
57 118270 : GEN T3 = subii(M1,M2), T4 = addii(M3,M6);
58 118270 : retmkmat2(mkcol2(addii(T1,T2), addii(M2,M4)),
59 : mkcol2(addii(M3,M5), addii(T3,T4)));
60 : }
61 : }
62 :
63 : static GEN
64 666 : matid2(void)
65 : {
66 666 : retmkmat2(mkcol2(gen_1,gen_0),
67 : mkcol2(gen_0,gen_1));
68 : }
69 :
70 : /* Return M*[q,1;1,0] */
71 : static GEN
72 2983603 : mulq(GEN M, GEN q)
73 : {
74 2983603 : GEN u, v, res = cgetg(3, t_MAT);
75 2983581 : u = addii(mulii(gcoeff(M,1,1), q), gcoeff(M,1,2));
76 2983607 : v = addii(mulii(gcoeff(M,2,1), q), gcoeff(M,2,2));
77 2983604 : gel(res,1) = mkcol2(u, v);
78 2983588 : gel(res,2) = gel(M,1);
79 2983588 : return res;
80 : }
81 : static GEN
82 59 : mulqab(GEN M, GEN q, GEN *ap, GEN *bp)
83 : {
84 59 : GEN b = subii(*ap, mulii(*bp, q));
85 59 : *ap = *bp; *bp = b;
86 59 : return mulq(M,q);
87 : }
88 :
89 : /* Return M*[q,1;1,0]^-1 */
90 :
91 : static GEN
92 1169 : mulqi(GEN M, GEN q, GEN *ap, GEN *bp)
93 : {
94 : GEN u, v, res, a;
95 1169 : a = addii(mulii(*ap, q), *bp);
96 1169 : *bp = *ap; *ap = a;
97 1169 : res = cgetg(3, t_MAT);
98 1169 : u = subii(gcoeff(M,1,1),mulii(gcoeff(M,1,2), q));
99 1169 : v = subii(gcoeff(M,2,1),mulii(gcoeff(M,2,2), q));
100 1169 : gel(res,1) = gel(M,2);
101 1169 : gel(res,2) = mkcol2(u,v);
102 1169 : return res;
103 : }
104 :
105 : /* test whether n is a power of 2 */
106 : static long
107 23201463 : isint2n(GEN n)
108 : {
109 : GEN x;
110 23201463 : long lx = lgefint(n), i;
111 23201463 : if (lx == 2) return 0;
112 23201463 : x = int_MSW(n);
113 23201463 : if (*(ulong*)x != 1UL<<expu(*(ulong*)x) ) return 0;
114 519999 : for (i = 3; i < lx; i++)
115 : {
116 512630 : x = int_precW(x); if (*x) return 0;
117 : }
118 7369 : return 1;
119 : }
120 :
121 : static long
122 23202269 : uexpi(GEN a)
123 23202269 : { return expi(a)+!isint2n(a); }
124 :
125 : static GEN
126 107743 : FIXUP0(GEN M, GEN *a, GEN *b, long m)
127 : {
128 107743 : long cnt=0;
129 137155 : while (expi(*b) >= m)
130 : {
131 29412 : GEN r, q = dvmdii(*a, *b, &r);
132 29412 : *a = *b; *b = r;
133 29412 : M = mulq(M, q);
134 29412 : cnt++;
135 : };
136 107743 : if (cnt>6) pari_err_BUG("FIXUP0");
137 107743 : return M;
138 : }
139 :
140 : static long
141 6398449 : signdet(GEN Q)
142 : {
143 6398449 : long a = Mod4(gcoeff(Q,1,1)), b = Mod4(gcoeff(Q,1,2));
144 6398164 : long c = Mod4(gcoeff(Q,2,1)), d = Mod4(gcoeff(Q,2,2));
145 6399350 : return ((a*d-b*c)&3)==1 ? 1 : -1;
146 : }
147 :
148 : static GEN
149 6233104 : ZM_inv2(GEN M)
150 : {
151 6233104 : long e = signdet(M);
152 9869359 : if (e==1) return mkmat22(gcoeff(M,2,2),negi(gcoeff(M,1,2)),
153 3635603 : negi(gcoeff(M,2,1)),gcoeff(M,1,1));
154 2598318 : else return mkmat22(negi(gcoeff(M,2,2)),gcoeff(M,1,2),
155 2598261 : gcoeff(M,2,1),negi(gcoeff(M,1,1)));
156 : }
157 :
158 : static GEN
159 1169 : lastq(GEN Q)
160 : {
161 1169 : GEN p = gcoeff(Q,1,1), q = gcoeff(Q,1,2), s = gcoeff(Q,2,2);
162 1169 : if (signe(q)==0) pari_err_BUG("halfgcd");
163 1169 : if (signe(s)==0) return p;
164 1169 : if (equali1(q)) return subiu(p,1);
165 1169 : return divii(p, q);
166 : }
167 :
168 : static GEN
169 6828 : mulT(GEN Q, GEN *ap, GEN *bp)
170 : {
171 6828 : *ap = addii(*ap, *bp);
172 6828 : *bp = negi(*bp);
173 6828 : return mkmat2(gel(Q,1),
174 6828 : mkcol2(subii(gcoeff(Q,1,1), gcoeff(Q,1,2))
175 6828 : , subii(gcoeff(Q,2,1), gcoeff(Q,2,2))));
176 : }
177 :
178 : static GEN
179 165487 : FIXUP1(GEN M, GEN a, GEN b, long m, long t, GEN *ap, GEN *bp)
180 : {
181 165487 : GEN Q = gel(M,1), a0 = gel(M,2), b0 = gel(M,3);
182 165487 : GEN q, am = remi2n(a, m), bm = remi2n(b, m);
183 165543 : if (signdet(Q)==-1)
184 : {
185 110826 : *ap = subii(mulii(bm, gcoeff(Q,1,2)),mulii(am, gcoeff(Q,2,2)));
186 110934 : *bp = subii(mulii(am, gcoeff(Q,2,1)),mulii(bm, gcoeff(Q,1,1)));
187 111134 : *ap = addii(*ap, shifti(addii(a0, gcoeff(Q,2,2)), m));
188 111067 : *bp = addii(*bp, shifti(subii(b0, gcoeff(Q,2,1)), m));
189 110570 : if (signe(*bp) >= 0)
190 103671 : return Q;
191 6899 : if (expi(addii(*ap,*bp)) >= m+t)
192 6828 : return mulT(Q, ap ,bp);
193 71 : q = lastq(Q);
194 71 : Q = mulqi(Q, q, ap, bp);
195 71 : if (cmpiu(q, 2)>=0)
196 59 : return mulqab(Q, subiu(q,1), ap, bp);
197 : else
198 12 : return mulqi(Q, lastq(Q), ap, bp);
199 : }
200 : else
201 : {
202 54508 : *ap = subii(mulii(am, gcoeff(Q,2,2)),mulii(bm, gcoeff(Q,1,2)));
203 54508 : *bp = subii(mulii(bm, gcoeff(Q,1,1)),mulii(am, gcoeff(Q,2,1)));
204 54508 : *ap = addii(*ap, shifti(subii(a0, gcoeff(Q,2,2)), m));
205 54508 : *bp = addii(*bp, shifti(addii(b0, gcoeff(Q,2,1)), m));
206 54508 : if (expi(*ap) >= m+t)
207 53422 : return FIXUP0(Q, ap, bp, m+t);
208 : else
209 1086 : return signe(gcoeff(Q,1,2))==0? Q: mulqi(Q, lastq(Q), ap, bp);
210 : }
211 : }
212 :
213 : static long
214 23148024 : magic_threshold(GEN a)
215 23148024 : { return (3+uexpi(a))>>1; }
216 :
217 : static GEN
218 15016119 : HGCD_basecase(GEN y, GEN x)
219 : {
220 15016119 : pari_sp av = avma;
221 : GEN d, d1, q, r;
222 : GEN u, u1, v, v1;
223 : ulong xu, xu1, xv, xv1; /* Lehmer stage recurrence matrix */
224 : int lhmres; /* Lehmer stage return value */
225 :
226 15016119 : long m = magic_threshold(y);
227 :
228 : /* There is no special case for single-word numbers since this is
229 : * mainly meant to be used with large moduli. */
230 15016083 : if (cmpii(y,x) <= 0)
231 : {
232 91623 : d = x; d1 = y;
233 91623 : u = gen_1; u1 = gen_0;
234 91623 : v = gen_0; v1 = gen_1;
235 : } else
236 : {
237 14924358 : d = y; d1 = x;
238 14924358 : u = gen_0; u1 = gen_1;
239 14924358 : v = gen_1; v1 = gen_0;
240 : }
241 33341641 : while (lgefint(d) > 3 && expi(d1) >= m + BITS_IN_LONG + 1)
242 : {
243 : /* do a Lehmer-Jebelean round */
244 18658447 : lhmres = lgcdii((ulong *)d, (ulong *)d1, &xu, &xu1, &xv, &xv1, 0);
245 :
246 18663842 : if (lhmres)
247 : {
248 17696265 : if (lhmres == 1 || lhmres == -1)
249 : {
250 447635 : if (xv1 == 1)
251 : {
252 385031 : r = subii(d,d1); d = d1; d1 = r;
253 385049 : r = addii(u,u1); u = u1; u1 = r;
254 384940 : r = addii(v,v1); v = v1; v1 = r;
255 : }
256 : else
257 : {
258 62604 : r = subii(d, mului(xv1,d1)); d = d1; d1 = r;
259 62603 : r = addii(u, mului(xv1,u1)); u = u1; u1 = r;
260 62603 : r = addii(v, mului(xv1,v1)); v = v1; v1 = r;
261 : }
262 : }
263 : else
264 : {
265 17248630 : r = subii(muliu(d,xu), muliu(d1,xv));
266 17244717 : d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
267 17244651 : r = addii(muliu(u,xu), muliu(u1,xv));
268 17243518 : u1 = addii(muliu(u,xu1), muliu(u1,xv1)); u = r;
269 17243440 : r = addii(muliu(v,xu), muliu(v1,xv));
270 17243471 : v1 = addii(muliu(v,xu1), muliu(v1,xv1)); v = r;
271 17245842 : if (lhmres&1) togglesign(d); else togglesign(d1);
272 : }
273 : } /* lhmres != 0 */
274 18660809 : if (expi(d1) < m) break;
275 :
276 18326089 : if (lhmres <= 0 && signe(d1))
277 : {
278 1013405 : q = dvmdii(d,d1,&r);
279 1013353 : d = d1; d1 = r;
280 1013353 : r = addii(u, mulii(q,u1)); u = u1; u1 = r;
281 1013110 : r = addii(v, mulii(q,v1)); v = v1; v1 = r;
282 : }
283 18325819 : if (gc_needed(av,1))
284 : {
285 0 : if(DEBUGMEM>1) pari_warn(warnmem,"ratlift");
286 0 : gerepileall(av, 6, &d, &d1, &u, &u1, &v, &v1);
287 : }
288 : }
289 82397937 : while (expi(d1) >= m)
290 : {
291 67381354 : GEN r, q = dvmdii(d,d1, &r);
292 67385625 : d = d1; d1 = r; swap(u,u1); swap(v,v1);
293 67385625 : u1 = addii(mulii(u, q), u1);
294 67382110 : v1 = addii(mulii(v, q), v1);
295 : }
296 15009683 : return gerepilecopy(av, mkvec3(mkmat22(u1,u,v1,v), d, d1));
297 : }
298 :
299 : static GEN HGCD(GEN x, GEN y);
300 :
301 : /*
302 : Based on
303 : Klaus Thull and Chee K. Yap,
304 : A unified approach to HGCD algorithms for polynomials andintegers,
305 : 1990, Manuscript.
306 : URL: http://cs.nyu.edu/cs/faculty/yap/papers.
307 : */
308 :
309 : static GEN
310 111602 : HGCD_split(GEN a, GEN b)
311 : {
312 111602 : pari_sp av = avma;
313 111602 : long m = magic_threshold(a), t, l, k, tp;
314 : GEN a0, b0, ap, bp, c, d, c0, d0, cp, dp, R, S, T, q, r;
315 111481 : if (signe(b) < 0 || cmpii(a,b)<0) pari_err_BUG("HGCD_split");
316 111522 : if (expi(b) < m)
317 444 : return gerepilecopy(av, mkvec3(matid2(), a, b));
318 111034 : a0 = addiu(shifti(a, -m), 1);
319 111070 : if (cmpiu(a0,7) <= 0)
320 : {
321 0 : R = FIXUP0(matid2(), &a, &b, m);
322 0 : return gerepilecopy(av, mkvec3(R, a, b));
323 : }
324 111036 : b0 = shifti(b,-m);
325 111334 : t = magic_threshold(a0);
326 111084 : R = FIXUP1(HGCD(a0,b0),a, b, m, t, &ap, &bp);
327 110743 : if (expi(bp) < m)
328 56308 : return gerepilecopy(av, mkvec3(R, ap, bp));
329 54321 : q = dvmdii(ap, bp, &r);
330 54321 : c = bp; d = r;
331 54321 : if (cmpiu(shifti(c,-m),6) <= 0)
332 : {
333 21 : R = FIXUP0(mulq(R, q), &c, &d, m);
334 21 : return gerepilecopy(av, mkvec3(R, c, d));
335 : }
336 54300 : l = uexpi(c);
337 54300 : k = 2*m-l-1; if (k<0) pari_err_BUG("halfgcd");
338 54300 : c0 = addiu(shifti(c, -k), 1); if (cmpiu(c0,8)<0) pari_err_BUG("halfgcd");
339 54300 : d0 = shifti(d, -k);
340 54300 : tp = magic_threshold(c0);
341 54300 : S = FIXUP1(HGCD(c0,d0), c, d, k, tp, &cp, &dp);
342 54300 : if (!(expi(cp)>=m+1 && m+1 > expi(dp))) pari_err_BUG("halfgcd");
343 54300 : T = FIXUP0(ZM2_mul(mulq(R, q), S), &cp, &dp, m);
344 54300 : return gerepilecopy(av, mkvec3(T, cp, dp));
345 : }
346 :
347 : static GEN
348 15127533 : HGCD(GEN x, GEN y)
349 : {
350 15127533 : if (lgefint(y)-2 < HALFGCD_LIMIT)
351 15016077 : return HGCD_basecase(x, y);
352 : else
353 111456 : return HGCD_split(x, y);
354 : }
355 :
356 : static GEN
357 29554497 : HGCD0(GEN x, GEN y)
358 : {
359 29554497 : if (signe(y) >= 0 && cmpii(x, y) >= 0)
360 14958399 : return HGCD(x, y);
361 14596076 : if (cmpii(x, y) < 0)
362 : {
363 12060648 : GEN M = HGCD0(y, x), Q = gel(M,1);
364 12059347 : return mkvec3(mkmat22(gcoeff(Q,2,1),gcoeff(Q,2,2),gcoeff(Q,1,1),gcoeff(Q,1,2)),
365 12060820 : gel(M,2),gel(M,3));
366 : } /* Now y <= x*/
367 2535572 : if (signe(x) <= 0)
368 : { /* y <= x <=0 */
369 3847 : GEN M = HGCD(negi(y), negi(x)), Q = gel(M,1);
370 3847 : return mkvec3(mkmat22(negi(gcoeff(Q,2,1)),negi(gcoeff(Q,2,2)),
371 3847 : negi(gcoeff(Q,1,1)),negi(gcoeff(Q,1,2))),
372 3847 : gel(M,2),gel(M,3));
373 : }
374 : else /* y <= 0 <=x */
375 : {
376 2531725 : GEN M = HGCD0(x, negi(y)), Q = gel(M,1);
377 2531725 : return mkvec3(mkmat22(gcoeff(Q,1,1),gcoeff(Q,1,2),negi(gcoeff(Q,2,1)),negi(gcoeff(Q,2,2))),
378 2531725 : gel(M,2),gel(M,3));
379 : }
380 : }
381 :
382 : GEN
383 6233694 : halfgcdii(GEN A, GEN B)
384 : {
385 6233694 : pari_sp av = avma;
386 6233694 : GEN M, Q, a, b, m = abscmpii(A, B)>0 ? A: B;
387 6233716 : M = HGCD0(A,B); Q = gel(M,1); a = gel(M,2); b = gel(M,3);
388 9131951 : while (signe(b) && abscmpii(sqri(b), m) >= 0)
389 : {
390 2899687 : GEN r, q = dvmdii(a, b, &r);
391 2899661 : a = b; b = r;
392 2899661 : Q = mulq(Q, q);
393 : }
394 6232895 : return gerepilecopy(av, mkvec2(ZM_inv2(Q),mkcol2(a,b)));
395 : }
|