Line data Source code
1 : /* Copyright (C) 2000 The PARI group.
2 :
3 : This file is part of the PARI/GP package.
4 :
5 : PARI/GP is free software; you can redistribute it and/or modify it under the
6 : terms of the GNU General Public License as published by the Free Software
7 : Foundation; either version 2 of the License, or (at your option) any later
8 : version. It is distributed in the hope that it will be useful, but WITHOUT
9 : ANY WARRANTY WHATSOEVER.
10 :
11 : Check the License for details. You should have received a copy of it, along
12 : with the package; see the file 'COPYING'. If not, write to the Free Software
13 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
14 : #include "pari.h"
15 : #include "paripriv.h"
16 :
17 : #define DEBUGLEVEL DEBUGLEVEL_factorint
18 :
19 : /***********************************************************************/
20 : /** PRIMES IN SUCCESSION **/
21 : /***********************************************************************/
22 :
23 : /* map from prime residue classes mod 210 to their numbers in {0...47}.
24 : * Subscripts into this array take the form ((k-1)%210)/2, ranging from
25 : * 0 to 104. Unused entries are */
26 : #define NPRC 128 /* nonprime residue class */
27 :
28 : static unsigned char prc210_no[] = {
29 : 0, NPRC, NPRC, NPRC, NPRC, 1, 2, NPRC, 3, 4, NPRC, /* 21 */
30 : 5, NPRC, NPRC, 6, 7, NPRC, NPRC, 8, NPRC, 9, /* 41 */
31 : 10, NPRC, 11, NPRC, NPRC, 12, NPRC, NPRC, 13, 14, NPRC, /* 63 */
32 : NPRC, 15, NPRC, 16, 17, NPRC, NPRC, 18, NPRC, 19, /* 83 */
33 : NPRC, NPRC, 20, NPRC, NPRC, NPRC, 21, NPRC, 22, 23, NPRC, /* 105 */
34 : 24, 25, NPRC, 26, NPRC, NPRC, NPRC, 27, NPRC, NPRC, /* 125 */
35 : 28, NPRC, 29, NPRC, NPRC, 30, 31, NPRC, 32, NPRC, NPRC, /* 147 */
36 : 33, 34, NPRC, NPRC, 35, NPRC, NPRC, 36, NPRC, 37, /* 167 */
37 : 38, NPRC, 39, NPRC, NPRC, 40, 41, NPRC, NPRC, 42, NPRC, /* 189 */
38 : 43, 44, NPRC, 45, 46, NPRC, NPRC, NPRC, NPRC, 47, /* 209 */
39 : };
40 :
41 : /* first differences of the preceding */
42 : static unsigned char prc210_d1[] = {
43 : 10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6,
44 : 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4, 6, 2, 4, 6,
45 : 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4, 2, 10, 2,
46 : };
47 :
48 : static int
49 900549 : unextprime_overflow(ulong n)
50 : {
51 : #ifdef LONG_IS_64BIT
52 899963 : return (n > (ulong)-59);
53 : #else
54 586 : return (n > (ulong)-5);
55 : #endif
56 : }
57 :
58 : /* return 0 for overflow */
59 : ulong
60 1037649 : unextprime(ulong n)
61 : {
62 : long rc, rc0, rcn;
63 :
64 1037649 : switch(n) {
65 6858 : case 0: case 1: case 2: return 2;
66 2434 : case 3: return 3;
67 1668 : case 4: case 5: return 5;
68 1162 : case 6: case 7: return 7;
69 : }
70 1025527 : if (n <= maxprime())
71 : {
72 124964 : long i = PRIMES_search(n);
73 124964 : return i > 0? n: pari_PRIMES[-i];
74 : }
75 900554 : if (unextprime_overflow(n)) return 0;
76 : /* here n > 7 */
77 900507 : n |= 1; /* make it odd */
78 900507 : rc = rc0 = n % 210;
79 : /* find next prime residue class mod 210 */
80 : for(;;)
81 : {
82 1983719 : rcn = (long)(prc210_no[rc>>1]);
83 1983719 : if (rcn != NPRC) break;
84 1083212 : rc += 2; /* cannot wrap since 209 is coprime and rc odd */
85 : }
86 900507 : if (rc > rc0) n += rc - rc0;
87 : /* now find an actual (pseudo)prime */
88 : for(;;)
89 : {
90 10615277 : if (uisprime(n)) break;
91 9714770 : n += prc210_d1[rcn];
92 9714770 : if (++rcn > 47) rcn = 0;
93 : }
94 900538 : return n;
95 : }
96 :
97 : GEN
98 126847 : nextprime(GEN n)
99 : {
100 : long rc, rc0, rcn;
101 126847 : pari_sp av = avma;
102 :
103 126847 : if (typ(n) != t_INT)
104 : {
105 14 : n = gceil(n);
106 14 : if (typ(n) != t_INT) pari_err_TYPE("nextprime",n);
107 : }
108 126840 : if (signe(n) <= 0) { set_avma(av); return gen_2; }
109 126840 : if (lgefint(n) == 3)
110 : {
111 118710 : ulong k = unextprime(uel(n,2));
112 118710 : set_avma(av);
113 118710 : if (k) return utoipos(k);
114 : #ifdef LONG_IS_64BIT
115 6 : return uutoi(1,13);
116 : #else
117 1 : return uutoi(1,15);
118 : #endif
119 : }
120 : /* here n > 7 */
121 8130 : if (!mod2(n)) n = addui(1,n);
122 8130 : rc = rc0 = umodiu(n, 210);
123 : /* find next prime residue class mod 210 */
124 : for(;;)
125 : {
126 17701 : rcn = (long)(prc210_no[rc>>1]);
127 17701 : if (rcn != NPRC) break;
128 9571 : rc += 2; /* cannot wrap since 209 is coprime and rc odd */
129 : }
130 8130 : if (rc > rc0) n = addui(rc - rc0, n);
131 : /* now find an actual (pseudo)prime */
132 : for(;;)
133 : {
134 84466 : if (BPSW_psp(n)) break;
135 76336 : n = addui(prc210_d1[rcn], n);
136 76336 : if (++rcn > 47) rcn = 0;
137 : }
138 8130 : if (avma == av) return icopy(n);
139 8130 : return gerepileuptoint(av, n);
140 : }
141 :
142 : ulong
143 32 : uprecprime(ulong n)
144 : {
145 : long rc, rc0, rcn;
146 : { /* check if n <= 10 */
147 32 : if (n <= 1) return 0;
148 25 : if (n == 2) return 2;
149 18 : if (n <= 4) return 3;
150 18 : if (n <= 6) return 5;
151 18 : if (n <= 10) return 7;
152 : }
153 18 : if (n <= maxprimelim())
154 : {
155 0 : long i = PRIMES_search(n);
156 0 : return i > 0? n: pari_PRIMES[-i-1];
157 : }
158 : /* here n >= 11 */
159 18 : if (!(n % 2)) n--;
160 18 : rc = rc0 = n % 210;
161 : /* find previous prime residue class mod 210 */
162 : for(;;)
163 : {
164 36 : rcn = (long)(prc210_no[rc>>1]);
165 36 : if (rcn != NPRC) break;
166 18 : rc -= 2; /* cannot wrap since 1 is coprime and rc odd */
167 : }
168 18 : if (rc < rc0) n += rc - rc0;
169 : /* now find an actual (pseudo)prime */
170 : for(;;)
171 : {
172 36 : if (uisprime(n)) break;
173 18 : if (--rcn < 0) rcn = 47;
174 18 : n -= prc210_d1[rcn];
175 : }
176 18 : return n;
177 : }
178 :
179 : GEN
180 49 : precprime(GEN n)
181 : {
182 : long rc, rc0, rcn;
183 49 : pari_sp av = avma;
184 :
185 49 : if (typ(n) != t_INT)
186 : {
187 14 : n = gfloor(n);
188 14 : if (typ(n) != t_INT) pari_err_TYPE("nextprime",n);
189 : }
190 42 : if (signe(n) <= 0) { set_avma(av); return gen_0; }
191 42 : if (lgefint(n) <= 3)
192 : {
193 32 : ulong k = uel(n,2);
194 32 : return gc_utoi(av, uprecprime(k));
195 : }
196 10 : if (!mod2(n)) n = subiu(n,1);
197 10 : rc = rc0 = umodiu(n, 210);
198 : /* find previous prime residue class mod 210 */
199 : for(;;)
200 : {
201 20 : rcn = (long)(prc210_no[rc>>1]);
202 20 : if (rcn != NPRC) break;
203 10 : rc -= 2; /* cannot wrap since 1 is coprime and rc odd */
204 : }
205 10 : if (rc0 > rc) n = subiu(n, rc0 - rc);
206 : /* now find an actual (pseudo)prime */
207 : for(;;)
208 : {
209 48 : if (BPSW_psp(n)) break;
210 38 : if (--rcn < 0) rcn = 47;
211 38 : n = subiu(n, prc210_d1[rcn]);
212 : }
213 10 : if (avma == av) return icopy(n);
214 10 : return gerepileuptoint(av, n);
215 : }
216 :
217 : /* Find next single-word prime strictly larger than p.
218 : * If *n < pari_PRIMES[0], p is *n-th prime, otherwise imitate nextprime().
219 : * *rcn = NPRC or the correct residue class for the current p; we'll use this
220 : * to track the current prime residue class mod 210 once we're out of range of
221 : * the prime table, and we'll update it before that if it isn't NPRC.
222 : *
223 : * *q is incremented whenever q!=NULL and we wrap from 209 mod 210 to
224 : * 1 mod 210 */
225 : static ulong
226 4535017 : snextpr(ulong p, long *n, long *rcn, long *q, int (*ispsp)(ulong))
227 : {
228 4535017 : if (*n < pari_PRIMES[0])
229 : {
230 4535017 : ulong t, p1 = t = pari_PRIMES[++*n]; /* nextprime(p + 1) */
231 4535017 : if (*rcn != NPRC)
232 : {
233 15899542 : while (t > p)
234 : {
235 11381022 : t -= prc210_d1[*rcn];
236 11381022 : if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
237 : }
238 : /* assert(d1 == p) */
239 : }
240 4535017 : return p1;
241 : }
242 0 : if (unextprime_overflow(p)) pari_err_OVERFLOW("snextpr");
243 : /* we are beyond the prime table, initialize if needed */
244 0 : if (*rcn == NPRC) *rcn = prc210_no[(p % 210) >> 1]; /* != NPRC */
245 : /* look for the next one */
246 : do {
247 0 : p += prc210_d1[*rcn];
248 0 : if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
249 0 : } while (!ispsp(p));
250 0 : return p;
251 : }
252 :
253 : /********************************************************************/
254 : /** **/
255 : /** INTEGER FACTORIZATION **/
256 : /** **/
257 : /********************************************************************/
258 : int factor_add_primes = 0, factor_proven = 0;
259 :
260 : /***********************************************************************/
261 : /** **/
262 : /** FACTORIZATION (ECM) -- GN Jul-Aug 1998 **/
263 : /** Integer factorization using the elliptic curves method (ECM). **/
264 : /** ellfacteur() returns a non trivial factor of N, assuming N>0, **/
265 : /** is composite, and has no prime divisor below tridiv_bound(N) **/
266 : /** Thanks to Paul Zimmermann for much helpful advice and to **/
267 : /** Guillaume Hanrot and Igor Schein for intensive testing **/
268 : /** **/
269 : /***********************************************************************/
270 : #define nbcmax 64 /* max number of simultaneous curves */
271 :
272 : static const ulong TB1[] = {
273 : 142,172,208,252,305,370,450,545,661,801,972,1180,1430,
274 : 1735,2100,2550,3090,3745,4540,5505,6675,8090,9810,11900,
275 : 14420,17490,21200,25700,31160,37780UL,45810UL,55550UL,67350UL,
276 : 81660UL,99010UL,120050UL,145550UL,176475UL,213970UL,259430UL,
277 : 314550UL,381380UL,462415UL,560660UL,679780UL,824220UL,999340UL,
278 : 1211670UL,1469110UL,1781250UL,2159700UL,2618600UL,3175000UL,
279 : 3849600UL,4667500UL,5659200UL,6861600UL,8319500UL,10087100UL,
280 : 12230300UL,14828900UL,17979600UL,21799700UL,26431500UL,
281 : 32047300UL,38856400UL, /* 110 times that still fits into 32bits */
282 : #ifdef LONG_IS_64BIT
283 : 47112200UL,57122100UL,69258800UL,83974200UL,101816200UL,
284 : 123449000UL,149678200UL,181480300UL,220039400UL,266791100UL,
285 : 323476100UL,392204900UL,475536500UL,576573500UL,699077800UL,
286 : 847610500UL,1027701900UL,1246057200UL,1510806400UL,1831806700UL,
287 : 2221009800UL,2692906700UL,3265067200UL,3958794400UL,4799917500UL
288 : #endif
289 : };
290 : static const ulong TB1_for_stage[] = {
291 : /* Start below the optimal B1 for finding factors which would just have been
292 : * missed by pollardbrent(), and escalate, changing curves to give good
293 : * coverage of the small factor ranges. Entries grow faster than what would
294 : * be optimal but a table instead of a 2D array keeps the code simple */
295 : 500,520,560,620,700,800,900,1000,1150,1300,1450,1600,1800,2000,
296 : 2200,2450,2700,2950,3250,3600,4000,4400,4850,5300,5800,6400,
297 : 7100,7850,8700,9600,10600,11700,12900,14200,15700,17300,
298 : 19000,21000,23200,25500,28000,31000,34500UL,38500UL,43000UL,
299 : 48000UL,53800UL,60400UL,67750UL,76000UL,85300UL,95700UL,
300 : 107400UL,120500UL,135400UL,152000UL,170800UL,191800UL,215400UL,
301 : 241800UL,271400UL,304500UL,341500UL,383100UL,429700UL,481900UL,
302 : 540400UL,606000UL,679500UL,761800UL,854100UL,957500UL,1073500UL
303 : };
304 :
305 : /* addition/doubling/multiplication of a point on an 'elliptic curve mod N'
306 : * may result in one of three things:
307 : * - a new bona fide point
308 : * - a point at infinity (denominator divisible by N)
309 : * - a point at infinity mod some p | N but finite mod q | N betraying itself
310 : * by a denominator which has nontrivial gcd with N.
311 : *
312 : * In the second case, addition/doubling aborts, copying one of the summands
313 : * to the destination array of points unless they coincide.
314 : * Multiplication will stop at some unpredictable intermediate stage: The
315 : * destination will contain _some_ multiple of the input point, but not
316 : * necessarily the desired one, which doesn't matter. As long as we're
317 : * multiplying (B1 phase) we simply carry on with the next multiplier.
318 : * During the B2 phase, the only additions are the giant steps, and the
319 : * worst that can happen here is that we lose one residue class mod 210
320 : * of prime multipliers on 4 of the curves, so again, we ignore the problem
321 : * and just carry on.)
322 : *
323 : * Idea: select nbc curves mod N and one point P on each of them. For each
324 : * such P, compute [M]P = Q where M is the product of all powers <= B2 of
325 : * primes <= nextprime(B1). Then check whether [p]Q for p < nextprime(B2)
326 : * betrays a factor. This second stage looks separately at the primes in
327 : * each residue class mod 210, four curves at a time, and steps additively
328 : * to ever larger multipliers, by comparing X coordinates of points which we
329 : * would need to add in order to reach another prime multiplier in the same
330 : * residue class. 'Comparing' means that we accumulate a product of
331 : * differences of X coordinates, and from time to time take a gcd of this
332 : * product with N. Montgomery's multi-inverse trick is used heavily. */
333 :
334 : /* *** auxiliary functions for ellfacteur: *** */
335 : /* (Rx,Ry) <- (Px,Py)+(Qx,Qy) over Z/NZ, z=1/(Px-Qx). If Ry = NULL, don't set */
336 : static void
337 8303292 : FpE_add_i(GEN N, GEN z, GEN Px, GEN Py, GEN Qx, GEN Qy, GEN *Rx, GEN *Ry)
338 : {
339 8303292 : GEN slope = modii(mulii(subii(Py, Qy), z), N);
340 8303292 : GEN t = subii(sqri(slope), addii(Qx, Px));
341 8303292 : affii(modii(t, N), *Rx);
342 8303292 : if (Ry) {
343 8230924 : t = subii(mulii(slope, subii(Px, *Rx)), Py);
344 8230924 : affii(modii(t, N), *Ry);
345 : }
346 8303292 : }
347 : /* X -> Z; cannot add on one of the curves: make sure Z contains
348 : * something useful before letting caller proceed */
349 : static void
350 26135 : ZV_aff(long n, GEN *X, GEN *Z)
351 : {
352 26135 : if (X != Z) {
353 : long k;
354 1576379 : for (k = n; k--; ) affii(X[k],Z[k]);
355 : }
356 26135 : }
357 :
358 : /* Parallel addition on nbc curves, assigning the result to locations at and
359 : * following *X3, *Y3. (If Y-coords of result not desired, set Y=NULL.)
360 : * Safe even if (X3,Y3) = (X2,Y2), _not_ if (X1,Y1). It is also safe to
361 : * overwrite Y2 with X3. If nbc1 < nbc, the first summand is
362 : * assumed to hold only nbc1 distinct points, repeated as often as we need
363 : * them (to add one point on each of a few curves to several other points on
364 : * the same curves): only used with nbc1 = nbc or nbc1 = 4 | nbc.
365 : *
366 : * Return 0 [SUCCESS], 1 [N | den], 2 [gcd(den, N) is a factor of N, preserved
367 : * in gl.
368 : * Stack space is bounded by a constant multiple of lgefint(N)*nbc:
369 : * - Phase 2 creates 12 items on the stack per iteration, of which 4 are twice
370 : * as long and 1 is thrice as long as N, i.e. 18 units per iteration.
371 : * - Phase 1 creates 4 units.
372 : * Total can be as large as 4*nbcmax + 18*8 units; ecm_elladd2() is
373 : * just as bad, and elldouble() comes to 3*nbcmax + 29*8 units. */
374 : static int
375 241870 : ecm_elladd0(GEN N, GEN *gl, long nbc, long nbc1,
376 : GEN *X1, GEN *Y1, GEN *X2, GEN *Y2, GEN *X3, GEN *Y3)
377 : {
378 241870 : const ulong mask = (nbc1 == 4)? 3: ~0UL; /*nbc1 = 4 or nbc*/
379 241870 : GEN W[2*nbcmax], *A = W+nbc; /* W[0],A[0] unused */
380 : long i;
381 241870 : pari_sp av = avma;
382 :
383 241870 : W[1] = subii(X1[0], X2[0]);
384 7837896 : for (i=1; i<nbc; i++)
385 : { /*prepare for multi-inverse*/
386 7596026 : A[i] = subii(X1[i&mask], X2[i]); /* don't waste time reducing mod N */
387 7596026 : W[i+1] = modii(mulii(A[i], W[i]), N);
388 : }
389 241870 : if (!invmod(W[nbc], N, gl))
390 : {
391 75 : if (!equalii(N,*gl)) return 2;
392 49 : ZV_aff(nbc, X2,X3);
393 49 : if (Y3) ZV_aff(nbc, Y2,Y3);
394 49 : return gc_int(av,1);
395 : }
396 :
397 7836220 : while (i--) /* nbc times */
398 : {
399 7836220 : pari_sp av2 = avma;
400 7836220 : GEN Px = X1[i&mask], Py = Y1[i&mask], Qx = X2[i], Qy = Y2[i];
401 7836220 : GEN z = i? mulii(*gl,W[i]): *gl; /*1/(Px-Qx)*/
402 7836220 : FpE_add_i(N,z, Px,Py,Qx,Qy, X3+i, Y3? Y3+i: NULL);
403 7836220 : if (!i) break;
404 7594425 : set_avma(av2); *gl = modii(mulii(*gl, A[i]), N);
405 : }
406 241795 : return gc_int(av,0);
407 : }
408 :
409 : /* Shortcut, for use in cases where Y coordinates follow their corresponding
410 : * X coordinates, and first summand doesn't need to be repeated */
411 : static int
412 235914 : ecm_elladd(GEN N, GEN *gl, long nbc, GEN *X1, GEN *X2, GEN *X3) {
413 235914 : return ecm_elladd0(N, gl, nbc, nbc, X1, X1+nbc, X2, X2+nbc, X3, X3+nbc);
414 : }
415 :
416 : /* As ecm_elladd except it does twice as many additions (and hides even more
417 : * of the cost of the modular inverse); the net effect is the same as
418 : * ecm_elladd(nbc,X1,X2,X3) && ecm_elladd(nbc,X4,X5,X6). Safe to
419 : * have X2=X3, X5=X6, or X1,X2 coincide with X4,X5 in any order. */
420 : static int
421 7233 : ecm_elladd2(GEN N, GEN *gl, long nbc,
422 : GEN *X1, GEN *X2, GEN *X3, GEN *X4, GEN *X5, GEN *X6)
423 : {
424 7233 : GEN *Y1 = X1+nbc, *Y2 = X2+nbc, *Y3 = X3+nbc;
425 7233 : GEN *Y4 = X4+nbc, *Y5 = X5+nbc, *Y6 = X6+nbc;
426 7233 : GEN W[4*nbcmax], *A = W+2*nbc; /* W[0],A[0] unused */
427 : long i, j;
428 7233 : pari_sp av = avma;
429 :
430 7233 : W[1] = subii(X1[0], X2[0]);
431 233544 : for (i=1; i<nbc; i++)
432 : {
433 226311 : A[i] = subii(X1[i], X2[i]); /* don't waste time reducing mod N here */
434 226311 : W[i+1] = modii(mulii(A[i], W[i]), N);
435 : }
436 240777 : for (j=0; j<nbc; i++,j++)
437 : {
438 233544 : A[i] = subii(X4[j], X5[j]);
439 233544 : W[i+1] = modii(mulii(A[i], W[i]), N);
440 : }
441 7233 : if (!invmod(W[2*nbc], N, gl))
442 : {
443 1 : if (!equalii(N,*gl)) return 2;
444 1 : ZV_aff(2*nbc, X2,X3); /* hack: 2*nbc => copy Y2->Y3 */
445 1 : ZV_aff(2*nbc, X5,X6); /* also copy Y5->Y6 */
446 1 : return gc_int(av,1);
447 : }
448 :
449 240768 : while (j--) /* nbc times */
450 : {
451 233536 : pari_sp av2 = avma;
452 233536 : GEN Px = X4[j], Py = Y4[j], Qx = X5[j], Qy = Y5[j];
453 233536 : GEN z = mulii(*gl,W[--i]); /*1/(Px-Qx)*/
454 233536 : FpE_add_i(N,z, Px,Py, Qx,Qy, X6+j,Y6+j);
455 233536 : set_avma(av2); *gl = modii(mulii(*gl, A[i]), N);
456 : }
457 233536 : while (i--) /* nbc times */
458 : {
459 233536 : pari_sp av2 = avma;
460 233536 : GEN Px = X1[i], Py = Y1[i], Qx = X2[i], Qy = Y2[i];
461 233536 : GEN z = i? mulii(*gl, W[i]): *gl; /*1/(Px-Qx)*/
462 233536 : FpE_add_i(N,z, Px,Py, Qx,Qy, X3+i,Y3+i);
463 233536 : if (!i) break;
464 226304 : set_avma(av2); *gl = modii(mulii(*gl, A[i]), N);
465 : }
466 7232 : return gc_int(av,0);
467 : }
468 :
469 : /* Parallel doubling on nbc curves, assigning the result to locations at
470 : * and following *X2. Safe to be called with X2 equal to X1. Return
471 : * value as for ecm_elladd. If we find a point at infinity mod N,
472 : * and if X1 != X2, we copy the points at X1 to X2. */
473 : static int
474 40628 : elldouble(GEN N, GEN *gl, long nbc, GEN *X1, GEN *X2)
475 : {
476 40628 : GEN *Y1 = X1+nbc, *Y2 = X2+nbc;
477 : GEN W[nbcmax+1]; /* W[0] unused */
478 : long i;
479 40628 : pari_sp av = avma;
480 40628 : /*W[0] = gen_1;*/ W[1] = Y1[0];
481 1237984 : for (i=1; i<nbc; i++) W[i+1] = modii(mulii(Y1[i], W[i]), N);
482 40628 : if (!invmod(W[nbc], N, gl))
483 : {
484 0 : if (!equalii(N,*gl)) return 2;
485 0 : ZV_aff(2*nbc,X1,X2); /* also copies Y1->Y2 */
486 0 : return gc_int(av,1);
487 : }
488 1278612 : while (i--) /* nbc times */
489 : {
490 : pari_sp av2;
491 1237984 : GEN v, w, L, z = i? mulii(*gl,W[i]): *gl;
492 1237984 : if (i) *gl = modii(mulii(*gl, Y1[i]), N);
493 1237984 : av2 = avma;
494 1237984 : L = modii(mulii(addui(1, mului(3, Fp_sqr(X1[i],N))), z), N);
495 1237984 : if (signe(L)) /* half of zero is still zero */
496 1237984 : L = shifti(mod2(L)? addii(L, N): L, -1);
497 1237984 : v = modii(subii(sqri(L), shifti(X1[i],1)), N);
498 1237984 : w = modii(subii(mulii(L, subii(X1[i], v)), Y1[i]), N);
499 1237984 : affii(v, X2[i]);
500 1237984 : affii(w, Y2[i]);
501 1237984 : set_avma(av2);
502 : }
503 40628 : return gc_int(av,0);
504 : }
505 :
506 : /* Parallel multiplication by an odd prime k on nbc curves, storing the
507 : * result to locations at and following *X2. Safe to be called with X2 = X1.
508 : * Return values as ecm_elladd. Uses (a simplified variant of) Montgomery's
509 : * PRAC algorithm; see ftp://ftp.cwi.nl/pub/pmontgom/Lucas.ps.gz .
510 : * With thanks to Paul Zimmermann for the reference. --GN1998Aug13 */
511 : static int
512 209741 : get_rule(ulong d, ulong e)
513 : {
514 209741 : if (d <= e + (e>>2)) /* floor(1.25*e) */
515 : {
516 16710 : if ((d+e)%3 == 0) return 0; /* rule 1 */
517 9969 : if ((d-e)%6 == 0) return 1; /* rule 2 */
518 : }
519 : /* d <= 4*e but no ofl */
520 202946 : if ((d+3)>>2 <= e) return 2; /* rule 3, common case */
521 12187 : if ((d&1)==(e&1)) return 1; /* rule 4 = rule 2 */
522 6348 : if (!(d&1)) return 3; /* rule 5 */
523 1770 : if (d%3 == 0) return 4; /* rule 6 */
524 417 : if ((d+e)%3 == 0) return 5; /* rule 7 */
525 0 : if ((d-e)%3 == 0) return 6; /* rule 8 */
526 : /* when we get here, e is even, otherwise one of rules 4,5 would apply */
527 0 : return 7; /* rule 9 */
528 : }
529 :
530 : /* PRAC implementation notes - main changes against the paper version:
531 : * (1) The general function [m+n]P = f([m]P,[n]P,[m-n]P) collapses (for m!=n)
532 : * to an ecm_elladd() which does not depend on the third argument; thus
533 : * references to the third variable (C in the paper) can be eliminated.
534 : * (2) Since our multipliers are prime, the outer loop of the paper
535 : * version executes only once, and thus is invisible above.
536 : * (3) The first step in the inner loop of the paper version will always be
537 : * rule 3, but the addition requested by this rule amounts to a doubling, and
538 : * will always be followed by a swap, so we have unrolled this first iteration.
539 : * (4) Simplifications in rules 6 and 7 are possible given the above, and we
540 : * save one addition in each of the two cases. NB none of the other
541 : * ecm_elladd()s in the loop can ever degenerate into an elldouble.
542 : * (5) I tried to optimize for rule 3, which is used more frequently than all
543 : * others together, but it didn't improve things, so I removed the nested
544 : * tight loop again. --GN */
545 : /* The main loop body of ellfacteur() runs _slower_ under PRAC than under a
546 : * straightforward left-shift binary multiplication when N has <30 digits and
547 : * B1 is small; PRAC wins when N and B1 get larger. Weird. --GN */
548 : /* k>2 assumed prime, XAUX = scratchpad */
549 : static int
550 26036 : ellmult(GEN N, GEN *gl, long nbc, ulong k, GEN *X1, GEN *X2, GEN *XAUX)
551 : {
552 : ulong r, d, e, e1;
553 : int res;
554 26036 : GEN *A = X2, *B = XAUX, *T = XAUX + 2*nbc;
555 :
556 26036 : ZV_aff(2*nbc,X1,XAUX);
557 : /* first doubling picks up X1; after this we'll be working in XAUX and
558 : * X2 only, mostly via A and B and T */
559 26036 : if ((res = elldouble(N, gl, nbc, X1, X2)) != 0) return res;
560 :
561 : /* split the work at the golden ratio */
562 26036 : r = (ulong)(k*0.61803398875 + .5);
563 26036 : d = k - r;
564 26036 : e = r - d; /* d+e == r, so no danger of ofl below */
565 235756 : while (d != e)
566 : { /* apply one of the nine transformations from PM's Table 4. */
567 209741 : switch(get_rule(d,e))
568 : {
569 6741 : case 0: /* rule 1 */
570 6741 : if ( (res = ecm_elladd(N, gl, nbc, A, B, T)) ) return res;
571 6740 : if ( (res = ecm_elladd2(N, gl, nbc, T, A, A, T, B, B)) != 0) return res;
572 6739 : e1 = d - e; d = (d + e1)/3; e = (e - e1)/3; break;
573 5893 : case 1: /* rules 2 and 4 */
574 5893 : if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
575 5891 : if ( (res = elldouble(N, gl, nbc, A, A)) ) return res;
576 5891 : d = (d-e)>>1; break;
577 4578 : case 3: /* rule 5 */
578 4578 : if ( (res = elldouble(N, gl, nbc, A, A)) ) return res;
579 4578 : d >>= 1; break;
580 1353 : case 4: /* rule 6 */
581 1353 : if ( (res = elldouble(N, gl, nbc, A, T)) ) return res;
582 1353 : if ( (res = ecm_elladd(N, gl, nbc, T, A, A)) ) return res;
583 1353 : if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
584 1353 : d = d/3 - e; break;
585 190759 : case 2: /* rule 3 */
586 190759 : if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
587 190742 : d -= e; break;
588 417 : case 5: /* rule 7 */
589 417 : if ( (res = elldouble(N, gl, nbc, A, T)) ) return res;
590 417 : if ( (res = ecm_elladd2(N, gl, nbc, T, A, A, T, B, B)) != 0) return res;
591 417 : d = (d - 2*e)/3; break;
592 0 : case 6: /* rule 8 */
593 0 : if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
594 0 : if ( (res = elldouble(N, gl, nbc, A, T)) ) return res;
595 0 : if ( (res = ecm_elladd(N, gl, nbc, T, A, A)) ) return res;
596 0 : d = (d - e)/3; break;
597 0 : case 7: /* rule 9 */
598 0 : if ( (res = elldouble(N, gl, nbc, B, B)) ) return res;
599 0 : e >>= 1; break;
600 : }
601 : /* swap d <-> e and A <-> B if necessary */
602 209720 : if (d < e) { lswap(d,e); pswap(A,B); }
603 : }
604 26015 : return ecm_elladd(N, gl, nbc, XAUX, X2, X2);
605 : }
606 :
607 : struct ECM {
608 : pari_timer T;
609 : long nbc, nbc2, seed;
610 : GEN *X, *XAUX, *XT, *XD, *XB, *XB2, *XH, *Xh, *Yh;
611 : };
612 :
613 : /* memory layout in ellfacteur(): a large array of GEN pointers, and one
614 : * huge chunk of memory containing all the actual GEN (t_INT) objects.
615 : * nbc is constant throughout the invocation:
616 : * - The B1 stage of each iteration through the main loop needs little
617 : * space: enough for the X and Y coordinates of the current points,
618 : * and twice as much again as scratchpad for ellmult().
619 : * - The B2 stage, starting from some current set of points Q, needs, in
620 : * succession:
621 : * + space for [2]Q, [4]Q, ..., [10]Q, and [p]Q for building the helix;
622 : * + space for 48*nbc X and Y coordinates to hold the helix. This could
623 : * re-use [2]Q,...,[8]Q, but only with difficulty, since we don't
624 : * know in advance which residue class mod 210 our p is going to be in.
625 : * It can and should re-use [p]Q, though;
626 : * + space for (temporarily [30]Q and then) [210]Q, [420]Q, and several
627 : * further doublings until the giant step multiplier is reached. This
628 : * can re-use the remaining cells from above. The computation of [210]Q
629 : * will have been the last call to ellmult() within this iteration of the
630 : * main loop, so the scratchpad is now also free to be re-used. We also
631 : * compute [630]Q by a parallel addition; we'll need it later to get the
632 : * baby-step table bootstrapped a little faster.
633 : * + Finally, for no more than 4 curves at a time, room for up to 1024 X
634 : * coordinates only: the Y coordinates needed whilst setting up this baby
635 : * step table are temporarily stored in the upper half, and overwritten
636 : * during the last series of additions.
637 : *
638 : * Graphically: after end of B1 stage (X,Y are the coords of Q):
639 : * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
640 : * | X Y | scratch | [2]Q| [4]Q| [6]Q| [8]Q|[10]Q| ... | ...
641 : * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
642 : * *X *XAUX *XT *XD *XB
643 : *
644 : * [30]Q is computed from [10]Q. [210]Q can go into XY, etc:
645 : * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
646 : * |[210]|[420]|[630]|[840]|[1680,3360,6720,...,2048*210] |bstp table...
647 : * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
648 : * *X *XAUX *XT *XD [*XG, somewhere here] *XB .... *XH
649 : *
650 : * So we need (13 + 48) * 2 * nbc slots here + 4096 slots for the baby step
651 : * table (not all of which will be used when we start with a small B1, but
652 : * better to allocate and initialize ahead of time all the slots that might
653 : * be needed later).
654 : *
655 : * Note on memory locality: During the B2 phase, accesses to the helix
656 : * (once it is set up) will be clustered by curves (4 out of nbc at a time).
657 : * Accesses to the baby steps table will wander from one end of the array to
658 : * the other and back, one such cycle per giant step, and during a full cycle
659 : * we would expect on the order of 2E4 accesses when using the largest giant
660 : * step size. Thus we shouldn't be doing too bad with respect to thrashing
661 : * a 512KBy L2 cache. However, we don't want the baby step table to grow
662 : * larger than this, even if it would reduce the number of EC operations by a
663 : * few more per cent for very large B2, lest cache thrashing slow down
664 : * everything disproportionally. --GN */
665 : /* Auxiliary routines need < (3*nbc+240)*lN words on the PARI stack, in
666 : * addition to the spc*(lN+1) words occupied by our main table. */
667 : static void
668 65 : ECM_alloc(struct ECM *E, long lN)
669 : {
670 65 : const long bstpmax = 1024; /* max number of baby step table entries */
671 65 : long spc = (13 + 48) * E->nbc2 + bstpmax * 4;
672 65 : long len = spc + 385 + spc*lN;
673 65 : long i, tw = _evallg(lN) | evaltyp(t_INT);
674 65 : GEN w, *X = (GEN*)new_chunk(len);
675 : /* hack for X[i] = cgeti(lN). X = current point in B1 phase */
676 65 : w = (GEN)(X + spc + 385);
677 417585 : for (i = spc-1; i >= 0; i--) { X[i] = w; *w = tw; w += lN; }
678 65 : E->X = X;
679 65 : E->XAUX = E->X + E->nbc2; /* scratchpad for ellmult() */
680 65 : E->XT = E->XAUX + E->nbc2; /* ditto, will later hold [3*210]Q */
681 65 : E->XD = E->XT + E->nbc2; /* room for various multiples */
682 65 : E->XB = E->XD + 10*E->nbc2; /* start of baby steps table */
683 65 : E->XB2 = E->XB + 2 * bstpmax; /* middle of baby steps table */
684 65 : E->XH = E->XB2 + 2 * bstpmax; /* end of bstps table, start of helix */
685 65 : E->Xh = E->XH + 48*E->nbc2; /* little helix, X coords */
686 65 : E->Yh = E->XH + 192; /* ditto, Y coords */
687 : /* XG,YG set inside the main loop, since they depend on B2 */
688 : /* E.Xh range of 384 pointers not set; these will later duplicate the pointers
689 : * in the E.XH range, 4 curves at a time. Some of the cells reserved here for
690 : * the E.XB range will never be used, instead, we'll warp the pointers to
691 : * connect to (read-only) GENs in the X/E.XD range */
692 65 : }
693 : /* N.B. E->seed is not initialized here */
694 : static void
695 65 : ECM_init(struct ECM *E, GEN N, long nbc)
696 : {
697 65 : if (nbc < 0)
698 : { /* choose a sensible default */
699 57 : const long size = expi(N) + 1;
700 57 : nbc = ((size >> 3) << 2) - 80;
701 57 : if (nbc < 8) nbc = 8;
702 : }
703 65 : if (nbc > nbcmax) nbc = nbcmax;
704 65 : E->nbc = nbc;
705 65 : E->nbc2 = nbc << 1;
706 65 : ECM_alloc(E, lgefint(N));
707 65 : }
708 :
709 : static GEN
710 102 : ECM_loop(struct ECM *E, GEN N, ulong B1)
711 : {
712 102 : const ulong B2 = 110 * B1, B2_rt = usqrt(B2);
713 102 : const ulong nbc = E->nbc, nbc2 = E->nbc2;
714 : pari_sp av1, avtmp;
715 : long i, np, np0, gse, gss, bstp, bstp0, rcn0, rcn;
716 : ulong B2_p, m, p, p0;
717 : GEN g, *XG, *YG;
718 102 : GEN *X = E->X, *XAUX = E->XAUX, *XT = E->XT, *XD = E->XD;
719 102 : GEN *XB = E->XB, *XB2 = E->XB2, *XH = E->XH, *Xh = E->Xh, *Yh = E->Yh;
720 : /* pick curves */
721 4614 : for (i = nbc2; i--; ) affui(E->seed++, X[i]);
722 : /* pick giant step exponent and size */
723 102 : gse = B1 < 656
724 : ? (B1 < 200? 5: 6)
725 102 : : (B1 < 10500
726 : ? (B1 < 2625? 7: 8)
727 : : (B1 < 42000? 9: 10));
728 102 : gss = 1UL << gse;
729 : /* With 32 baby steps, a giant step corresponds to 32*420 = 13440,
730 : * appropriate for the smallest B2s. With 1024, a giant step will be 430080;
731 : * appropriate for B1 >~ 42000, where 512 baby steps would imply roughly
732 : * the same number of curve additions. */
733 102 : XG = XT + gse*nbc2; /* will later hold [2^(gse+1)*210]Q */
734 102 : YG = XG + nbc;
735 :
736 102 : if (DEBUGLEVEL >= 4) {
737 0 : err_printf("ECM: time = %6ld ms\nECM: B1 = %4lu,", timer_delay(&E->T), B1);
738 0 : err_printf("\tB2 = %6lu,\tgss = %4ld*420\n", B2, gss);
739 : }
740 102 : p = 2; np = 1; /* p is np-th prime */
741 :
742 : /* ---B1 PHASE--- */
743 : /* treat p=2 separately */
744 102 : B2_p = B2 >> 1;
745 1736 : for (m=1; m<=B2_p; m<<=1)
746 : {
747 1634 : int fl = elldouble(N, &g, nbc, X, X);
748 1634 : if (fl > 1) return g; else if (fl) break;
749 : }
750 102 : rcn = NPRC; /* multipliers begin at the beginning */
751 : /* p=3,...,nextprime(B1) */
752 6662 : while (p < B1 && p <= B2_rt)
753 : {
754 6568 : pari_sp av2 = avma;
755 6568 : p = snextpr(p, &np, &rcn, NULL, uisprime);
756 6568 : B2_p = B2/p; /* beware integer overflow on 32-bit CPUs */
757 22497 : for (m=1; m<=B2_p; m*=p)
758 : {
759 15955 : int fl = ellmult(N, &g, nbc, p, X, X, XAUX);
760 15955 : if (fl > 1) return g; else if (fl) break;
761 15929 : set_avma(av2);
762 : }
763 6560 : set_avma(av2);
764 : }
765 : /* primes p larger than sqrt(B2) appear only to the 1st power */
766 9929 : while (p < B1)
767 : {
768 9853 : pari_sp av2 = avma;
769 9853 : p = snextpr(p, &np, &rcn, NULL, uisprime);
770 9853 : if (ellmult(N, &g, nbc, p, X, X, XAUX) > 1) return g;
771 9835 : set_avma(av2);
772 : }
773 76 : if (DEBUGLEVEL >= 4) {
774 0 : err_printf("ECM: time = %6ld ms, B1 phase done, ", timer_delay(&E->T));
775 0 : err_printf("p = %lu, setting up for B2\n", p);
776 : }
777 :
778 : /* ---B2 PHASE--- */
779 : /* compute [2]Q,...,[10]Q, needed to build the helix */
780 76 : if (elldouble(N, &g, nbc, X, XD) > 1) return g; /*[2]Q*/
781 76 : if (elldouble(N, &g, nbc, XD, XD + nbc2) > 1) return g; /*[4]Q*/
782 76 : if (ecm_elladd(N, &g, nbc,
783 76 : XD, XD + nbc2, XD + (nbc<<2)) > 1) return g; /* [6]Q */
784 76 : if (ecm_elladd2(N, &g, nbc,
785 : XD, XD + (nbc<<2), XT + (nbc<<3),
786 76 : XD + nbc2, XD + (nbc<<2), XD + (nbc<<3)) > 1)
787 0 : return g; /* [8]Q and [10]Q */
788 76 : if (DEBUGLEVEL >= 7) err_printf("\t(got [2]Q...[10]Q)\n");
789 :
790 : /* get next prime (still using the foolproof test) */
791 76 : p = snextpr(p, &np, &rcn, NULL, uisprime);
792 : /* make sure we have the residue class number (mod 210) */
793 76 : if (rcn == NPRC)
794 : {
795 76 : rcn = prc210_no[(p % 210) >> 1];
796 76 : if (rcn == NPRC)
797 : {
798 0 : err_printf("ECM: %lu should have been prime but isn\'t\n", p);
799 0 : pari_err_BUG("ellfacteur");
800 : }
801 : }
802 :
803 : /* compute [p]Q and put it into its place in the helix */
804 76 : if (ellmult(N, &g, nbc, p, X, XH + rcn*nbc2, XAUX) > 1)
805 0 : return g;
806 76 : if (DEBUGLEVEL >= 7)
807 0 : err_printf("\t(got [p]Q, p = %lu = prc210_rp[%ld] mod 210)\n", p, rcn);
808 :
809 : /* save current p, np, and rcn; we'll need them more than once below */
810 76 : p0 = p; np0 = np; rcn0 = rcn;
811 76 : bstp0 = 0; /* p is at baby-step offset 0 from itself */
812 :
813 : /* fill up the helix, stepping forward through the prime residue classes
814 : * mod 210 until we're back at the r'class of p0. Keep updating p so
815 : * that we can print meaningful diagnostics if a factor shows up; don't
816 : * bother checking which of these p's are in fact prime */
817 3648 : for (i = 47; i; i--) /* 47 iterations */
818 : {
819 3572 : ulong dp = (ulong)prc210_d1[rcn];
820 3572 : p += dp;
821 3572 : if (rcn == 47)
822 : { /* wrap mod 210 */
823 76 : if (ecm_elladd(N, &g, nbc, XT+dp*nbc, XH+rcn*nbc2, XH) > 1) return g;
824 76 : rcn = 0; continue;
825 : }
826 3496 : if (ecm_elladd(N, &g, nbc, XT+dp*nbc, XH+rcn*nbc2, XH+rcn*nbc2+nbc2) > 1)
827 0 : return g;
828 3496 : rcn++;
829 : }
830 76 : if (DEBUGLEVEL >= 7) err_printf("\t(got initial helix)\n");
831 : /* compute [210]Q etc, needed for the baby step table */
832 76 : if (ellmult(N, &g, nbc, 3, XD + (nbc<<3), X, XAUX) > 1) return g;
833 76 : if (ellmult(N, &g, nbc, 7, X, X, XAUX) > 1) return g; /* [210]Q */
834 : /* this was the last call to ellmult() in the main loop body; may now
835 : * overwrite XAUX and slots XD and following */
836 76 : if (elldouble(N, &g, nbc, X, XAUX) > 1) return g; /* [420]Q */
837 76 : if (ecm_elladd(N, &g, nbc, X, XAUX, XT) > 1) return g;/*[630]Q*/
838 76 : if (ecm_elladd(N, &g, nbc, X, XT, XD) > 1) return g; /*[840]Q*/
839 567 : for (i=1; i <= gse; i++)
840 491 : if (elldouble(N, &g, nbc, XT + i*nbc2, XD + i*nbc2) > 1) return g;
841 : /* (the last iteration has initialized XG to [210*2^(gse+1)]Q) */
842 :
843 76 : if (DEBUGLEVEL >= 4)
844 0 : err_printf("ECM: time = %6ld ms, entering B2 phase, p = %lu\n",
845 : timer_delay(&E->T), p);
846 :
847 394 : for (i = nbc - 4; i >= 0; i -= 4)
848 : { /* loop over small sets of 4 curves at a time */
849 : GEN *Xb;
850 : long j, k;
851 325 : if (DEBUGLEVEL >= 6)
852 0 : err_printf("ECM: finishing curves %ld...%ld\n", i, i+3);
853 : /* Copy relevant pointers from XH to Xh. Memory layout in XH:
854 : * nbc X coordinates, nbc Y coordinates for residue class
855 : * 1 mod 210, then the same for r.c. 11 mod 210, etc. Memory layout for
856 : * Xh is: four X coords for 1 mod 210, four for 11 mod 210, ..., four
857 : * for 209 mod 210, then the corresponding Y coordinates in the same
858 : * order. This allows a giant step on Xh using just three calls to
859 : * ecm_elladd0() each acting on 64 points in parallel */
860 15925 : for (j = 48; j--; )
861 : {
862 15600 : k = nbc2*j + i;
863 15600 : m = j << 2; /* X coordinates */
864 15600 : Xh[m] = XH[k]; Xh[m+1] = XH[k+1];
865 15600 : Xh[m+2] = XH[k+2]; Xh[m+3] = XH[k+3];
866 15600 : k += nbc; /* Y coordinates */
867 15600 : Yh[m] = XH[k]; Yh[m+1] = XH[k+1];
868 15600 : Yh[m+2] = XH[k+2]; Yh[m+3] = XH[k+3];
869 : }
870 : /* Build baby step table of X coords of multiples of [210]Q. XB[4*j]
871 : * will point at X coords on four curves from [(j+1)*210]Q. Until
872 : * we're done, we need some Y coords as well, which we keep in the
873 : * second half of the table, overwriting them at the end when gse=10.
874 : * Multiples which we already have (by 1,2,3,4,8,16,...,2^gse) are
875 : * entered simply by copying the pointers, ignoring the few slots in w
876 : * that were initially reserved for them. Here are the initial entries */
877 975 : for (Xb=XB,k=2,j=i; k--; Xb=XB2,j+=nbc) /* first X, then Y coords */
878 : {
879 650 : Xb[0] = X[j]; Xb[1] = X[j+1]; /* [210]Q */
880 650 : Xb[2] = X[j+2]; Xb[3] = X[j+3];
881 650 : Xb[4] = XAUX[j]; Xb[5] = XAUX[j+1]; /* [420]Q */
882 650 : Xb[6] = XAUX[j+2]; Xb[7] = XAUX[j+3];
883 650 : Xb[8] = XT[j]; Xb[9] = XT[j+1]; /* [630]Q */
884 650 : Xb[10] = XT[j+2]; Xb[11] = XT[j+3];
885 650 : Xb += 4; /* points at [420]Q */
886 : /* ... entries at powers of 2 times 210 .... */
887 4075 : for (m = 2; m < (ulong)gse+k; m++) /* omit Y coords of [2^gse*210]Q */
888 : {
889 3425 : long m2 = m*nbc2 + j;
890 3425 : Xb += (2UL<<m); /* points at [2^m*210]Q */
891 3425 : Xb[0] = XAUX[m2]; Xb[1] = XAUX[m2+1];
892 3425 : Xb[2] = XAUX[m2+2]; Xb[3] = XAUX[m2+3];
893 : }
894 : }
895 325 : if (DEBUGLEVEL >= 7)
896 0 : err_printf("\t(extracted precomputed helix / baby step entries)\n");
897 : /* ... glue in between, up to 16*210 ... */
898 325 : if (ecm_elladd0(N, &g, 12, 4, /* 12 pts + (4 pts replicated thrice) */
899 : XB + 12, XB2 + 12,
900 : XB, XB2,
901 0 : XB + 16, XB2 + 16) > 1) return g; /*4+{1,2,3} = {5,6,7}*/
902 325 : if (ecm_elladd0(N, &g, 28, 4, /* 28 pts + (4 pts replicated 7fold) */
903 : XB + 28, XB2 + 28,
904 : XB, XB2,
905 0 : XB + 32, XB2 + 32) > 1) return g;/*8+{1...7} = {9...15}*/
906 : /* ... and the remainder of the lot */
907 1225 : for (m = 5; m <= (ulong)gse; m++)
908 : { /* fill in from 2^(m-1)+1 to 2^m-1 in chunks of 64 and 60 points */
909 900 : ulong m2 = 2UL << m; /* will point at 2^(m-1)+1 */
910 1979 : for (j = 0; (ulong)j < m2-64; j+=64) /* executed 0 times when m = 5 */
911 : {
912 1906 : if (ecm_elladd0(N, &g, 64, 4,
913 1079 : XB + m2-4, XB2 + m2-4,
914 1079 : XB + j, XB2 + j,
915 1906 : XB + m2+j, (m<(ulong)gse? XB2+m2+j: NULL)) > 1)
916 0 : return g;
917 : } /* j = m2-64 here, 60 points left */
918 1225 : if (ecm_elladd0(N, &g, 60, 4,
919 900 : XB + m2-4, XB2 + m2-4,
920 900 : XB + j, XB2 + j,
921 1225 : XB + m2+j, (m<(ulong)gse? XB2+m2+j: NULL)) > 1)
922 0 : return g;
923 : /* when m=gse, drop Y coords of result, and when both equal 1024,
924 : * overwrite Y coords of second argument with X coords of result */
925 : }
926 325 : if (DEBUGLEVEL >= 7) err_printf("\t(baby step table complete)\n");
927 : /* initialize a few other things */
928 325 : bstp = bstp0; p = p0; np = np0; rcn = rcn0;
929 325 : g = gen_1; av1 = avma;
930 : /* scratchspace for prod (x_i-x_j) */
931 325 : avtmp = (pari_sp)new_chunk(8 * lgefint(N));
932 : /* The correct entry in XB to use depends on bstp and on where we are
933 : * on the helix. As we skip from prime to prime, bstp is incremented
934 : * by snextpr each time we wrap around through residue class number 0
935 : * (1 mod 210), but the baby step should not be taken until rcn>=rcn0,
936 : * i.e. until we pass again the residue class of p0.
937 : *
938 : * The correct signed multiplier is thus k = bstp - (rcn < rcn0),
939 : * and the offset from XB is four times (|k| - 1). When k=0, we ignore
940 : * the current prime: if it had led to a factorization, this
941 : * would have been noted during the last giant step, or -- when we
942 : * first get here -- whilst initializing the helix. When k > gss,
943 : * we must do a giant step and bump bstp back by -2*gss.
944 : *
945 : * The gcd of the product of X coord differences against N is taken just
946 : * before we do a giant step. */
947 4518838 : while (p < B2)
948 : {/* loop over probable primes p0 < p <= nextprime(B2), inserting giant
949 : * steps as necessary */
950 4518520 : p = snextpr(p, &np, &rcn, &bstp, uis2psp); /* next probable prime */
951 : /* work out the corresponding baby-step multiplier */
952 4518520 : k = bstp - (rcn < rcn0 ? 1 : 0);
953 4518520 : if (k > gss)
954 : { /* giant-step time, take gcd */
955 1116 : g = gcdii(g, N);
956 1116 : if (!is_pm1(g) && !equalii(g, N)) return g;
957 1109 : g = gen_1; set_avma(av1);
958 2218 : while (k > gss)
959 : { /* giant step */
960 1109 : if (DEBUGLEVEL >= 7) err_printf("\t(giant step at p = %lu)\n", p);
961 1109 : if (ecm_elladd0(N, &g, 64, 4, XG + i, YG + i,
962 0 : Xh, Yh, Xh, Yh) > 1) return g;
963 1109 : if (ecm_elladd0(N, &g, 64, 4, XG + i, YG + i,
964 : Xh + 64, Yh + 64, Xh + 64, Yh + 64) > 1)
965 0 : return g;
966 1109 : if (ecm_elladd0(N, &g, 64, 4, XG + i, YG + i,
967 : Xh + 128, Yh + 128, Xh + 128, Yh + 128) > 1)
968 0 : return g;
969 1109 : bstp -= (gss << 1);
970 1109 : k = bstp - (rcn < rcn0? 1: 0); /* recompute multiplier */
971 : }
972 : }
973 4518513 : if (!k) continue; /* point of interest is already in Xh */
974 4493724 : if (k < 0) k = -k;
975 4493724 : m = ((ulong)k - 1) << 2;
976 : /* accumulate product of differences of X coordinates */
977 4493724 : j = rcn<<2;
978 4493724 : avma = avtmp; /* go to garbage zone; don't use set_avma */
979 4493724 : g = modii(mulii(g, subii(XB[m], Xh[j])), N);
980 4493724 : g = modii(mulii(g, subii(XB[m+1], Xh[j+1])), N);
981 4493724 : g = modii(mulii(g, subii(XB[m+2], Xh[j+2])), N);
982 4493724 : g = mulii(g, subii(XB[m+3], Xh[j+3]));
983 4493724 : set_avma(av1);
984 4493724 : g = modii(g, N);
985 : }
986 318 : set_avma(av1);
987 : }
988 69 : return NULL;
989 : }
990 :
991 : /* ellfacteur() tuned to be useful as a first stage before MPQS, especially for
992 : * large arguments, when 'insist' is false, and now also for the case when
993 : * 'insist' is true, vaguely following suggestions by Paul Zimmermann
994 : * (http://www.loria.fr/~zimmerma/records/ecmnet.html). --GN 1998Jul,Aug */
995 : static GEN
996 3281 : ellfacteur(GEN N, int insist)
997 : {
998 3281 : const long size = expi(N) + 1;
999 3281 : pari_sp av = avma;
1000 : struct ECM E;
1001 3281 : long nbc, dsn, dsnmax, rep = 0;
1002 3281 : if (insist)
1003 : {
1004 8 : const long DSNMAX = numberof(TB1)-1;
1005 8 : dsnmax = (size >> 2) - 10;
1006 8 : if (dsnmax < 0) dsnmax = 0;
1007 7 : else if (dsnmax > DSNMAX) dsnmax = DSNMAX;
1008 8 : E.seed = 1 + (nbcmax<<7)*(size&0xffff); /* seed for choice of curves */
1009 :
1010 8 : dsn = (size >> 3) - 5;
1011 8 : if (dsn < 0) dsn = 0; else if (dsn > 47) dsn = 47;
1012 : /* pick up the torch where noninsistent stage would have given up */
1013 8 : nbc = dsn + (dsn >> 2) + 9; /* 8 or more curves in parallel */
1014 8 : nbc &= ~3; /* 4 | nbc */
1015 : }
1016 : else
1017 : {
1018 3273 : dsn = (size - 140) >> 3;
1019 3273 : if (dsn < 0)
1020 : {
1021 : #ifndef __EMX__ /* unless DOS/EMX: MPQS's disk access is abysmally slow */
1022 3216 : if (DEBUGLEVEL >= 4)
1023 0 : err_printf("ECM: number too small to justify this stage\n");
1024 3216 : return NULL; /* too small, decline the task */
1025 : #endif
1026 : dsn = 0;
1027 57 : } else if (dsn > 12) dsn = 12;
1028 57 : rep = (size <= 248 ?
1029 57 : (size <= 176 ? (size - 124) >> 4 : (size - 148) >> 3) :
1030 18 : (size - 224) >> 1);
1031 : #ifdef __EMX__ /* DOS/EMX: extra rounds (shun MPQS) */
1032 : rep += 20;
1033 : #endif
1034 57 : dsnmax = 72;
1035 : /* Use disjoint sets of curves for non-insist and insist phases; moreover,
1036 : * repeated calls acting on factors of the same original number should try
1037 : * to use fresh curves. The following achieves this */
1038 57 : E.seed = 1 + (nbcmax<<3)*(size & 0xf);
1039 57 : nbc = -1;
1040 : }
1041 65 : ECM_init(&E, N, nbc);
1042 65 : if (DEBUGLEVEL >= 4)
1043 : {
1044 0 : timer_start(&E.T);
1045 0 : err_printf("ECM: working on %ld curves at a time; initializing", E.nbc);
1046 0 : if (!insist)
1047 : {
1048 0 : if (rep == 1) err_printf(" for one round");
1049 0 : else err_printf(" for up to %ld rounds", rep);
1050 : }
1051 0 : err_printf("...\n");
1052 : }
1053 65 : if (dsn > dsnmax) dsn = dsnmax;
1054 : for(;;)
1055 37 : {
1056 102 : ulong B1 = insist? TB1[dsn]: TB1_for_stage[dsn];
1057 102 : GEN g = ECM_loop(&E, N, B1);
1058 102 : if (g)
1059 : {
1060 33 : if (DEBUGLEVEL >= 4)
1061 0 : err_printf("ECM: time = %6ld ms\n\tfound factor = %Ps\n",
1062 : timer_delay(&E.T), g);
1063 33 : return gerepilecopy(av, g);
1064 : }
1065 69 : if (dsn < dsnmax)
1066 : {
1067 68 : if (insist) dsn++;
1068 68 : else { dsn += 2; if (dsn > dsnmax) dsn = dsnmax; }
1069 : }
1070 69 : if (!insist && !--rep)
1071 : {
1072 32 : if (DEBUGLEVEL >= 4)
1073 0 : err_printf("ECM: time = %6ld ms,\tellfacteur giving up.\n",
1074 : timer_delay(&E.T));
1075 32 : return gc_NULL(av);
1076 : }
1077 : }
1078 : }
1079 : /* assume rounds >= 1, seed >= 1, B1 <= ULONG_MAX / 110 */
1080 : GEN
1081 0 : Z_ECM(GEN N, long rounds, long seed, ulong B1)
1082 : {
1083 0 : pari_sp av = avma;
1084 : struct ECM E;
1085 : long i;
1086 0 : E.seed = seed;
1087 0 : ECM_init(&E, N, -1);
1088 0 : if (DEBUGLEVEL >= 4) timer_start(&E.T);
1089 0 : for (i = rounds; i--; )
1090 : {
1091 0 : GEN g = ECM_loop(&E, N, B1);
1092 0 : if (g) return gerepilecopy(av, g);
1093 : }
1094 0 : return gc_NULL(av);
1095 : }
1096 :
1097 : /***********************************************************************/
1098 : /** **/
1099 : /** FACTORIZATION (Pollard-Brent rho) --GN1998Jun18-26 **/
1100 : /** pollardbrent() returns a nontrivial factor of n, assuming n is **/
1101 : /** composite and has no small prime divisor, or NULL if going on **/
1102 : /** would take more time than we want to spend. Sometimes it finds **/
1103 : /** more than one factor, and returns a structure suitable for **/
1104 : /** interpretation by ifac_crack. (Cf Algo 8.5.2 in ACiCNT) **/
1105 : /** **/
1106 : /***********************************************************************/
1107 : #define VALUE(x) gel(x,0)
1108 : #define EXPON(x) gel(x,1)
1109 : #define CLASS(x) gel(x,2)
1110 :
1111 : INLINE void
1112 53786 : INIT(GEN x, GEN v, GEN e, GEN c) {
1113 53786 : VALUE(x) = v;
1114 53786 : EXPON(x) = e;
1115 53786 : CLASS(x) = c;
1116 53786 : }
1117 : static void
1118 47566 : ifac_delete(GEN x) { INIT(x,NULL,NULL,NULL); }
1119 :
1120 : static void
1121 0 : rho_dbg(pari_timer *T, long c, long msg_mask)
1122 : {
1123 0 : if (c & msg_mask) return;
1124 0 : err_printf("Rho: time = %6ld ms,\t%3ld round%s\n",
1125 : timer_delay(T), c, (c==1?"":"s"));
1126 : }
1127 :
1128 : static void
1129 27715482 : one_iter(GEN *x, GEN *P, GEN x1, GEN n, long delta)
1130 : {
1131 27715482 : *x = addis(remii(sqri(*x), n), delta);
1132 27646334 : *P = modii(mulii(*P, subii(x1, *x)), n);
1133 27716411 : }
1134 : /* Return NULL when we run out of time, or a single t_INT containing a
1135 : * nontrivial factor of n, or a vector of t_INTs, each triple of successive
1136 : * entries containing a factor, an exponent (equal to one), and a factor
1137 : * class (NULL for unknown or zero for known composite), matching the
1138 : * internal representation used by the ifac_*() routines below. Repeated
1139 : * factors may arise; the caller will sort the factors anyway. Result
1140 : * is not gerepile-able (contains NULL) */
1141 : static GEN
1142 439 : pollardbrent_i(GEN n, long size, long c0, long retries)
1143 : {
1144 439 : long tf = lgefint(n), delta, msg_mask, c, k, k1, l;
1145 : pari_sp av;
1146 : GEN x, x1, y, P, g, g1, res;
1147 : pari_timer T;
1148 :
1149 439 : if (DEBUGLEVEL >= 4) timer_start(&T);
1150 439 : c = c0 << 5; /* 2^5 iterations per round */
1151 878 : msg_mask = (size >= 448? 0x1fff:
1152 439 : (size >= 192? (256L<<((size-128)>>6))-1: 0xff));
1153 439 : y = cgeti(tf);
1154 439 : x1= cgeti(tf);
1155 439 : av = avma;
1156 :
1157 439 : PB_RETRY:
1158 : /* trick to make a 'random' choice determined by n. Don't use x^2+0 or
1159 : * x^2-2, ever. Don't use x^2-3 or x^2-7 with a starting value of 2.
1160 : * x^2+4, x^2+9 are affine conjugate to x^2+1, so don't use them either.
1161 : *
1162 : * (the point being that when we get called again on a composite cofactor
1163 : * of something we've already seen, we had better avoid the same delta) */
1164 439 : switch ((size + retries) & 7)
1165 : {
1166 7 : case 0: delta= 1; break;
1167 99 : case 1: delta= -1; break;
1168 109 : case 2: delta= 3; break;
1169 49 : case 3: delta= 5; break;
1170 98 : case 4: delta= -5; break;
1171 28 : case 5: delta= 7; break;
1172 21 : case 6: delta= 11; break;
1173 : /* case 7: */
1174 28 : default: delta=-11; break;
1175 : }
1176 439 : if (DEBUGLEVEL >= 4)
1177 : {
1178 0 : if (!retries)
1179 0 : err_printf("Rho: searching small factor of %ld-bit integer\n", size);
1180 : else
1181 0 : err_printf("Rho: restarting for remaining rounds...\n");
1182 0 : err_printf("Rho: using X^2%+1ld for up to %ld rounds of 32 iterations\n",
1183 : delta, c >> 5);
1184 : }
1185 439 : x = gen_2; P = gen_1; g1 = NULL; k = 1; l = 1;
1186 439 : affui(2, y);
1187 439 : affui(2, x1);
1188 : for (;;) /* terminated under the control of c */
1189 : { /* use the polynomial x^2 + delta */
1190 12965633 : one_iter(&x, &P, x1, n, delta);
1191 :
1192 12967547 : if ((--c & 0x1f)==0)
1193 : { /* one round complete */
1194 405415 : g = gcdii(n, P); if (!is_pm1(g)) goto fin;
1195 405165 : if (c <= 0)
1196 : { /* getting bored */
1197 88 : if (DEBUGLEVEL >= 4)
1198 0 : err_printf("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
1199 : timer_delay(&T));
1200 88 : return NULL;
1201 : }
1202 405077 : P = gen_1;
1203 405077 : if (DEBUGLEVEL >= 4) rho_dbg(&T, c0-(c>>5), msg_mask);
1204 405077 : affii(x,y); x = y; set_avma(av);
1205 : }
1206 :
1207 12965387 : if (--k) continue; /* normal end of loop body */
1208 :
1209 4837 : if (c & 0x1f) /* otherwise, we already checked */
1210 : {
1211 2620 : g = gcdii(n, P); if (!is_pm1(g)) goto fin;
1212 2557 : P = gen_1;
1213 : }
1214 :
1215 : /* Fast forward phase, doing l inner iterations without computing gcds.
1216 : * Check first whether it would take us beyond the alloted time.
1217 : * Fast forward rounds count only half (although they're taking
1218 : * more like 2/3 the time of normal rounds). This to counteract the
1219 : * nuisance that all c0 between 4096 and 6144 would act exactly as
1220 : * 4096; with the halving trick only the range 4096..5120 collapses
1221 : * (similarly for all other powers of two) */
1222 4774 : if ((c -= (l>>1)) <= 0)
1223 : { /* got bored */
1224 67 : if (DEBUGLEVEL >= 4)
1225 0 : err_printf("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
1226 : timer_delay(&T));
1227 67 : return NULL;
1228 : }
1229 4707 : c &= ~0x1f; /* keep it on multiples of 32 */
1230 :
1231 : /* Fast forward loop */
1232 4707 : affii(x, x1); set_avma(av); x = x1;
1233 4707 : k = l; l <<= 1;
1234 : /* don't show this for the first several (short) fast forward phases. */
1235 4707 : if (DEBUGLEVEL >= 4 && (l>>7) > msg_mask)
1236 0 : err_printf("Rho: fast forward phase (%ld rounds of 64)...\n", l>>7);
1237 14778378 : for (k1=k; k1; k1--)
1238 : {
1239 14774587 : one_iter(&x, &P, x1, n, delta);
1240 14773083 : if ((k1 & 0x1f) == 0) gerepileall(av, 2, &x, &P);
1241 : }
1242 3870 : if (DEBUGLEVEL >= 4 && (l>>7) > msg_mask)
1243 0 : err_printf("Rho: time = %6ld ms,\t%3ld rounds, back to normal mode\n",
1244 0 : timer_delay(&T), c0-(c>>5));
1245 3870 : affii(x,y); P = gerepileuptoint(av, P); x = y;
1246 : } /* forever */
1247 :
1248 284 : fin:
1249 : /* An accumulated gcd was > 1 */
1250 284 : if (!equalii(g,n))
1251 : { /* if it isn't n, and looks prime, return it */
1252 256 : if (MR_Jaeschke(g))
1253 : {
1254 256 : if (DEBUGLEVEL >= 4)
1255 : {
1256 0 : rho_dbg(&T, c0-(c>>5), 0);
1257 0 : err_printf("\tfound factor = %Ps\n",g);
1258 : }
1259 256 : return g;
1260 : }
1261 0 : set_avma(av); g1 = icopy(g); /* known composite, keep it safe */
1262 0 : av = avma;
1263 : }
1264 28 : else g1 = n; /* and work modulo g1 for backtracking */
1265 :
1266 : /* Here g1 is known composite */
1267 28 : if (DEBUGLEVEL >= 4 && size > 192)
1268 0 : err_printf("Rho: hang on a second, we got something here...\n");
1269 28 : x = y;
1270 : for(;;)
1271 : { /* backtrack until period recovered. Must terminate */
1272 252 : x = addis(remii(sqri(x), g1), delta);
1273 252 : g = gcdii(subii(x1, x), g1); if (!is_pm1(g)) break;
1274 :
1275 224 : if (DEBUGLEVEL >= 4 && (--c & 0x1f) == 0) rho_dbg(&T, c0-(c>>5), msg_mask);
1276 : }
1277 :
1278 28 : if (g1 == n || equalii(g,g1))
1279 : {
1280 28 : if (g1 == n && equalii(g,g1))
1281 : { /* out of luck */
1282 0 : if (DEBUGLEVEL >= 4)
1283 : {
1284 0 : rho_dbg(&T, c0-(c>>5), 0);
1285 0 : err_printf("\tPollard-Brent failed.\n");
1286 : }
1287 0 : if (++retries >= 4) pari_err_BUG("");
1288 0 : goto PB_RETRY;
1289 : }
1290 : /* half lucky: we've split n, but g1 equals either g or n */
1291 28 : if (DEBUGLEVEL >= 4)
1292 : {
1293 0 : rho_dbg(&T, c0-(c>>5), 0);
1294 0 : err_printf("\tfound %sfactor = %Ps\n", (g1!=n ? "composite " : ""), g);
1295 : }
1296 28 : res = cgetg(7, t_VEC);
1297 : /* g^1: known composite when g1!=n */
1298 28 : INIT(res+1, g, gen_1, (g1!=n? gen_0: NULL));
1299 : /* cofactor^1: status unknown */
1300 28 : INIT(res+4, diviiexact(n,g), gen_1, NULL);
1301 28 : return res;
1302 : }
1303 : /* g < g1 < n : our lucky day -- we've split g1, too */
1304 0 : res = cgetg(10, t_VEC);
1305 : /* unknown status for all three factors */
1306 0 : INIT(res+1, g, gen_1, NULL);
1307 0 : INIT(res+4, diviiexact(g1,g), gen_1, NULL);
1308 0 : INIT(res+7, diviiexact(n,g1), gen_1, NULL);
1309 0 : if (DEBUGLEVEL >= 4)
1310 : {
1311 0 : rho_dbg(&T, c0-(c>>5), 0);
1312 0 : err_printf("\tfound factors = %Ps, %Ps,\n\tand %Ps\n",
1313 0 : gel(res,1), gel(res,4), gel(res,7));
1314 : }
1315 0 : return res;
1316 : }
1317 : /* Tuning parameter: for input up to 64 bits long, we must not spend more
1318 : * than a very short time, for fear of slowing things down on average.
1319 : * With the current tuning formula, increase our efforts somewhat at 49 bit
1320 : * input (an extra round for each bit at first), and go up more and more
1321 : * rapidly after we pass 80 bits.-- Changed this to adjust for the presence of
1322 : * squfof, which will finish input up to 59 bits quickly. */
1323 : static GEN
1324 5258 : pollardbrent(GEN n)
1325 : {
1326 5258 : const long tune_pb_min = 14; /* even 15 seems too much. */
1327 5258 : long c0, size = expi(n) + 1;
1328 5258 : if (size <= 28)
1329 154 : c0 = 32;/* amounts very nearly to 'insist'. Now that we have squfof(), we
1330 : * don't insist any more when input is 2^29 ... 2^32 */
1331 5104 : else if (size <= 96)
1332 4819 : return NULL;
1333 285 : else if (size <= 301)
1334 : /* nonlinear increase in effort, kicking in around 80 bits */
1335 : /* 301 gives 48121 + tune_pb_min */
1336 278 : c0 = tune_pb_min + size - 60 +
1337 278 : ((size-73)>>1)*((size-70)>>3)*((size-56)>>4);
1338 : else
1339 7 : c0 = 49152; /* ECM is faster when it'd take longer */
1340 439 : return pollardbrent_i(n, size, c0, 0);
1341 : }
1342 : GEN
1343 0 : Z_pollardbrent(GEN n, long rounds, long seed)
1344 : {
1345 0 : pari_sp av = avma;
1346 0 : GEN v = pollardbrent_i(n, expi(n)+1, rounds, seed);
1347 0 : if (!v) return NULL;
1348 0 : if (typ(v) == t_INT) v = mkvec2(v, diviiexact(n,v));
1349 0 : else if (lg(v) == 7) v = mkvec2(gel(v,1), gel(v,4));
1350 0 : else v = mkvec3(gel(v,1), gel(v,4), gel(v,7));
1351 0 : return gerepilecopy(av, v);
1352 : }
1353 :
1354 : /***********************************************************************/
1355 : /** FACTORIZATION (Shanks' SQUFOF) --GN2000Sep30-Oct01 **/
1356 : /** squfof() returns a nontrivial factor of n, assuming n is odd, **/
1357 : /** composite, not a pure square, and has no small prime divisor, **/
1358 : /** or NULL if it fails to find one. It works on two discriminants **/
1359 : /** simultaneously (n and 5n for n=1(4), 3n and 4n for n=3(4)). **/
1360 : /** Present implementation is limited to input <2^59, and works most **/
1361 : /** of the time in signed arithmetic on integers <2^31 in absolute **/
1362 : /** size. (Cf. Algo 8.7.2 in ACiCNT) **/
1363 : /***********************************************************************/
1364 :
1365 : /* The following is invoked to walk back along the ambiguous cycle* until we
1366 : * hit an ambiguous form and thus the desired factor, which it returns. If it
1367 : * fails for any reason, it returns 0. It doesn't interfere with timing and
1368 : * diagnostics, which it leaves to squfof().
1369 : *
1370 : * Before we invoke this, we've found a form (A, B, -C) with A = a^2, where a
1371 : * isn't blacklisted and where gcd(a, B) = 1. According to ACiCANT, we should
1372 : * now proceed reducing the form (a, -B, -aC), but it is easy to show that the
1373 : * first reduction step always sends this to (-aC, B, a), and the next one,
1374 : * with q computed as usual from B and a (occupying the c position), gives a
1375 : * reduced form, whose third member is easiest to recover by going back to D.
1376 : * From this point onwards, we're once again working with single-word numbers.
1377 : * No need to track signs, just work with the abs values of the coefficients. */
1378 : static long
1379 2158 : squfof_ambig(long a, long B, long dd, GEN D)
1380 : {
1381 : long b, c, q, qa, qc, qcb, a0, b0, b1, c0;
1382 2158 : long cnt = 0; /* count reduction steps on the cycle */
1383 :
1384 2158 : q = (dd + (B>>1)) / a;
1385 2158 : qa = q * a;
1386 2158 : b = (qa - B) + qa; /* avoid overflow */
1387 : {
1388 2158 : pari_sp av = avma;
1389 2158 : c = itos(divis(shifti(subii(D, sqrs(b)), -2), a));
1390 2158 : set_avma(av);
1391 : }
1392 : #ifdef DEBUG_SQUFOF
1393 : err_printf("SQUFOF: ambigous cycle of discriminant %Ps\n", D);
1394 : err_printf("SQUFOF: Form on ambigous cycle (%ld, %ld, %ld)\n", a, b, c);
1395 : #endif
1396 :
1397 2158 : a0 = a; b0 = b1 = b; /* end of loop detection and safeguard */
1398 :
1399 : for (;;) /* reduced cycles are finite */
1400 : { /* reduction step */
1401 1007688 : c0 = c;
1402 1007688 : if (c0 > dd)
1403 281271 : q = 1;
1404 : else
1405 726417 : q = (dd + (b>>1)) / c0;
1406 1007688 : if (q == 1)
1407 : {
1408 417320 : qcb = c0 - b; b = c0 + qcb; c = a - qcb;
1409 : }
1410 : else
1411 : {
1412 590368 : qc = q*c0; qcb = qc - b; b = qc + qcb; c = a - q*qcb;
1413 : }
1414 1007688 : a = c0;
1415 :
1416 1007688 : cnt++; if (b == b1) break;
1417 :
1418 : /* safeguard against infinite loop: recognize when we've walked the entire
1419 : * cycle in vain. (I don't think this can actually happen -- exercise.) */
1420 1005530 : if (b == b0 && a == a0) return 0;
1421 :
1422 1005530 : b1 = b;
1423 : }
1424 2158 : q = a&1 ? a : a>>1;
1425 2158 : if (DEBUGLEVEL >= 4)
1426 : {
1427 0 : if (q > 1)
1428 0 : err_printf("SQUFOF: found factor %ld from ambiguous form\n"
1429 : "\tafter %ld steps on the ambiguous cycle\n",
1430 0 : q / ugcd(q,15), cnt);
1431 : else
1432 0 : err_printf("SQUFOF: ...found nothing on the ambiguous cycle\n"
1433 : "\tafter %ld steps there\n", cnt);
1434 0 : if (DEBUGLEVEL >= 6) err_printf("SQUFOF: squfof_ambig returned %ld\n", q);
1435 : }
1436 2158 : return q;
1437 : }
1438 :
1439 : #define SQUFOF_BLACKLIST_SZ 64
1440 :
1441 : /* assume 2,3,5 do not divide n */
1442 : static GEN
1443 4974 : squfof(GEN n)
1444 : {
1445 : ulong d1, d2;
1446 4974 : long tf = lgefint(n), nm4, cnt = 0;
1447 : long a1, b1, c1, dd1, L1, a2, b2, c2, dd2, L2, a, q, c, qc, qcb;
1448 : GEN D1, D2;
1449 4974 : pari_sp av = avma;
1450 : long blacklist1[SQUFOF_BLACKLIST_SZ], blacklist2[SQUFOF_BLACKLIST_SZ];
1451 4974 : long blp1 = 0, blp2 = 0;
1452 4974 : int act1 = 1, act2 = 1;
1453 :
1454 : #ifdef LONG_IS_64BIT
1455 4206 : if (tf > 3 || (tf == 3 && uel(n,2) >= (1UL << 46)))
1456 : #else /* 32 bits */
1457 768 : if (tf > 4 || (tf == 4 && (ulong)(*int_MSW(n)) >= (1UL << 17))) /* 49 */
1458 : #endif
1459 3280 : return NULL; /* n too large */
1460 :
1461 : /* now we have 5 < n < 2^59 */
1462 1694 : nm4 = mod4(n);
1463 1694 : if (nm4 == 1)
1464 : { /* n = 1 (mod4): run one iteration on D1 = n, another on D2 = 5n */
1465 791 : D1 = n;
1466 791 : D2 = mului(5,n); d2 = itou(sqrti(D2)); dd2 = (long)((d2>>1) + (d2&1));
1467 791 : b2 = (long)((d2-1) | 1); /* b1, b2 will always stay odd */
1468 : }
1469 : else
1470 : { /* n = 3 (mod4): run one iteration on D1 = 3n, another on D2 = 4n */
1471 903 : D1 = mului(3,n);
1472 903 : D2 = shifti(n,2); dd2 = itou(sqrti(n)); d2 = dd2 << 1;
1473 903 : b2 = (long)(d2 & (~1UL)); /* largest even below d2, will stay even */
1474 : }
1475 1694 : d1 = itou(sqrti(D1));
1476 1694 : b1 = (long)((d1-1) | 1); /* largest odd number not exceeding d1 */
1477 1694 : c1 = itos(shifti(subii(D1, sqru((ulong)b1)), -2));
1478 1694 : if (!c1) pari_err_BUG("squfof [caller of] (n or 3n is a square)");
1479 1694 : c2 = itos(shifti(subii(D2, sqru((ulong)b2)), -2));
1480 1694 : if (!c2) pari_err_BUG("squfof [caller of] (5n is a square)");
1481 1694 : L1 = (long)usqrt(d1);
1482 1694 : L2 = (long)usqrt(d2);
1483 : /* dd1 used to compute floor((d1+b1)/2) as dd1+floor(b1/2), without
1484 : * overflowing the 31bit signed integer size limit. Same for dd2. */
1485 1694 : dd1 = (long) ((d1>>1) + (d1&1));
1486 1694 : a1 = a2 = 1;
1487 :
1488 : /* The two (identity) forms (a1,b1,-c1) and (a2,b2,-c2) are now set up.
1489 : *
1490 : * a1 and c1 represent the absolute values of the a,c coefficients; we keep
1491 : * track of the sign separately, via the iteration counter cnt: when cnt is
1492 : * even, c is understood to be negative, else c is positive and a < 0.
1493 : *
1494 : * L1, L2 are the limits for blacklisting small leading coefficients
1495 : * on the principal cycle, to guarantee that when we find a square form,
1496 : * its square root will belong to an ambiguous cycle (i.e. won't be an
1497 : * earlier form on the principal cycle).
1498 : *
1499 : * When n = 3(mod 4), D2 = 12(mod 16), and b^2 is always 0 or 4 mod 16.
1500 : * It follows that 4*a*c must be 4 or 8 mod 16, respectively, so at most
1501 : * one of a,c can be divisible by 2 at most to the first power. This fact
1502 : * is used a couple of times below.
1503 : *
1504 : * The flags act1, act2 remain true while the respective cycle is still
1505 : * active; we drop them to false when we return to the identity form with-
1506 : * out having found a square form (or when the blacklist overflows, which
1507 : * shouldn't happen). */
1508 1694 : if (DEBUGLEVEL >= 4)
1509 0 : err_printf("SQUFOF: entering main loop with forms\n"
1510 : "\t(1, %ld, %ld) and (1, %ld, %ld)\n\tof discriminants\n"
1511 : "\t%Ps and %Ps, respectively\n", b1, -c1, b2, -c2, D1, D2);
1512 :
1513 : /* MAIN LOOP: walk around the principal cycle looking for a square form.
1514 : * Blacklist small leading coefficients.
1515 : *
1516 : * The reduction operator can be computed entirely in 32-bit arithmetic:
1517 : * Let q = floor(floor((d1+b1)/2)/c1) (when c1>dd1, q=1, which happens
1518 : * often enough to special-case it). Then the new b1 = (q*c1-b1) + q*c1,
1519 : * which does not overflow, and the new c1 = a1 - q*(q*c1-b1), which is
1520 : * bounded by d1 in abs size since both the old and the new a1 are positive
1521 : * and bounded by d1. */
1522 1459669 : while (act1 || act2)
1523 : {
1524 1459668 : if (act1)
1525 : { /* send first form through reduction operator if active */
1526 1459656 : c = c1;
1527 1459656 : q = (c > dd1)? 1: (dd1 + (b1>>1)) / c;
1528 1459656 : if (q == 1)
1529 604307 : { qcb = c - b1; b1 = c + qcb; c1 = a1 - qcb; }
1530 : else
1531 855349 : { qc = q*c; qcb = qc - b1; b1 = qc + qcb; c1 = a1 - q*qcb; }
1532 1459656 : a1 = c;
1533 :
1534 1459656 : if (a1 <= L1)
1535 : { /* blacklist this */
1536 1196 : if (blp1 >= SQUFOF_BLACKLIST_SZ) /* overflows: shouldn't happen */
1537 0 : act1 = 0; /* silently */
1538 : else
1539 : {
1540 1196 : if (DEBUGLEVEL >= 6)
1541 0 : err_printf("SQUFOF: blacklisting a = %ld on first cycle\n", a1);
1542 1196 : blacklist1[blp1++] = a1;
1543 : }
1544 : }
1545 : }
1546 1459668 : if (act2)
1547 : { /* send second form through reduction operator if active */
1548 1455178 : c = c2;
1549 1455178 : q = (c > dd2)? 1: (dd2 + (b2>>1)) / c;
1550 1455178 : if (q == 1)
1551 603667 : { qcb = c - b2; b2 = c + qcb; c2 = a2 - qcb; }
1552 : else
1553 851511 : { qc = q*c; qcb = qc - b2; b2 = qc + qcb; c2 = a2 - q*qcb; }
1554 1455178 : a2 = c;
1555 :
1556 1455178 : if (a2 <= L2)
1557 : { /* blacklist this */
1558 1096 : if (blp2 >= SQUFOF_BLACKLIST_SZ) /* overflows: shouldn't happen */
1559 0 : act2 = 0; /* silently */
1560 : else
1561 : {
1562 1096 : if (DEBUGLEVEL >= 6)
1563 0 : err_printf("SQUFOF: blacklisting a = %ld on second cycle\n", a2);
1564 1096 : blacklist2[blp2++] = a2;
1565 : }
1566 : }
1567 : }
1568 :
1569 : /* bump counter, loop if this is an odd iteration (i.e. if the real
1570 : * leading coefficients are negative) */
1571 1459668 : if (++cnt & 1) continue;
1572 :
1573 : /* second half of main loop entered only when the leading coefficients
1574 : * are positive (i.e., during even-numbered iterations) */
1575 :
1576 : /* examine first form if active */
1577 729834 : if (act1 && a1 == 1) /* back to identity */
1578 : { /* drop this discriminant */
1579 1 : act1 = 0;
1580 1 : if (DEBUGLEVEL >= 4)
1581 0 : err_printf("SQUFOF: first cycle exhausted after %ld iterations,\n"
1582 : "\tdropping it\n", cnt);
1583 : }
1584 729834 : if (act1)
1585 : {
1586 729827 : if (uissquareall((ulong)a1, (ulong*)&a))
1587 : { /* square form */
1588 1404 : if (DEBUGLEVEL >= 4)
1589 0 : err_printf("SQUFOF: square form (%ld^2, %ld, %ld) on first cycle\n"
1590 : "\tafter %ld iterations\n", a, b1, -c1, cnt);
1591 1404 : if (a <= L1)
1592 : { /* blacklisted? */
1593 : long j;
1594 2714 : for (j = 0; j < blp1; j++)
1595 1863 : if (a == blacklist1[j]) { a = 0; break; }
1596 : }
1597 1404 : if (a > 0)
1598 : { /* not blacklisted */
1599 851 : q = ugcd(a, b1); /* imprimitive form? */
1600 851 : if (q > 1)
1601 : { /* q^2 divides D1 hence n [ assuming n % 3 != 0 ] */
1602 0 : set_avma(av);
1603 0 : if (DEBUGLEVEL >= 4) err_printf("SQUFOF: found factor %ld^2\n", q);
1604 0 : return mkvec3(utoipos(q), gen_2, NULL);/* exponent 2, unknown status */
1605 : }
1606 : /* chase the inverse root form back along the ambiguous cycle */
1607 851 : q = squfof_ambig(a, b1, dd1, D1);
1608 851 : if (nm4 == 3 && q % 3 == 0) q /= 3;
1609 851 : if (q > 1) return gc_utoipos(av, q); /* SUCCESS! */
1610 : }
1611 553 : else if (DEBUGLEVEL >= 4) /* blacklisted */
1612 0 : err_printf("SQUFOF: ...but the root form seems to be on the "
1613 : "principal cycle\n");
1614 : }
1615 : }
1616 :
1617 : /* examine second form if active */
1618 729148 : if (act2 && a2 == 1) /* back to identity form */
1619 : { /* drop this discriminant */
1620 10 : act2 = 0;
1621 10 : if (DEBUGLEVEL >= 4)
1622 0 : err_printf("SQUFOF: second cycle exhausted after %ld iterations,\n"
1623 : "\tdropping it\n", cnt);
1624 : }
1625 729148 : if (act2)
1626 : {
1627 726902 : if (uissquareall((ulong)a2, (ulong*)&a))
1628 : { /* square form */
1629 1576 : if (DEBUGLEVEL >= 4)
1630 0 : err_printf("SQUFOF: square form (%ld^2, %ld, %ld) on second cycle\n"
1631 : "\tafter %ld iterations\n", a, b2, -c2, cnt);
1632 1576 : if (a <= L2)
1633 : { /* blacklisted? */
1634 : long j;
1635 2728 : for (j = 0; j < blp2; j++)
1636 1421 : if (a == blacklist2[j]) { a = 0; break; }
1637 : }
1638 1576 : if (a > 0)
1639 : { /* not blacklisted */
1640 1307 : q = ugcd(a, b2); /* imprimitive form? */
1641 : /* NB if b2 is even, a is odd, so the gcd is always odd */
1642 1307 : if (q > 1)
1643 : { /* q^2 divides D2 hence n [ assuming n % 5 != 0 ] */
1644 0 : set_avma(av);
1645 0 : if (DEBUGLEVEL >= 4) err_printf("SQUFOF: found factor %ld^2\n", q);
1646 0 : return mkvec3(utoipos(q), gen_2, NULL);/* exponent 2, unknown status */
1647 : }
1648 : /* chase the inverse root form along the ambiguous cycle */
1649 1307 : q = squfof_ambig(a, b2, dd2, D2);
1650 1307 : if (nm4 == 1 && q % 5 == 0) q /= 5;
1651 1307 : if (q > 1) return gc_utoipos(av, q); /* SUCCESS! */
1652 : }
1653 269 : else if (DEBUGLEVEL >= 4) /* blacklisted */
1654 0 : err_printf("SQUFOF: ...but the root form seems to be on the "
1655 : "principal cycle\n");
1656 : }
1657 : }
1658 : } /* end main loop */
1659 :
1660 : /* both discriminants turned out to be useless. */
1661 1 : if (DEBUGLEVEL>=4) err_printf("SQUFOF: giving up\n");
1662 1 : return gc_NULL(av);
1663 : }
1664 :
1665 : /***********************************************************************/
1666 : /* DETECTING ODD POWERS --GN1998Jun28 */
1667 : /* Factoring engines like MPQS which ultimately rely on computing */
1668 : /* gcd(N, x^2-y^2) to find a nontrivial factor of N can't split */
1669 : /* N = p^k for an odd prime p, since (Z/p^k)^* is then cyclic. Here */
1670 : /* is an analogue of Z_issquareall() for 3rd, 5th and 7th powers. */
1671 : /* The general case is handled by is_kth_power */
1672 : /***********************************************************************/
1673 :
1674 : /* Multistage sieve. First stages work mod 211, 209, 61, 203 in this order
1675 : * (first reduce mod the product of these and then take the remainder apart).
1676 : * Second stages use 117, 31, 43, 71. Moduli which are no longer interesting
1677 : * are skipped. Everything is encoded in a table of 106 24-bit masks. We only
1678 : * need the first half of the residues. Three bits per modulus indicate which
1679 : * residues are 7th (bit 2), 5th (bit 1) or 3rd (bit 0) powers; the eight
1680 : * moduli above are assigned right-to-left. The table was generated using: */
1681 :
1682 : #if 0
1683 : L = [71, 43, 31, [O(3^2),O(13)], [O(7),O(29)], 61, [O(11),O(19)], 211];
1684 : ispow(x, N, k)=
1685 : {
1686 : if (type(N) == "t_INT", return (ispower(Mod(x,N), k)));
1687 : for (i = 1, #N, if (!ispower(x + N[i], k), return (0))); 1
1688 : }
1689 : check(r) =
1690 : {
1691 : print1(" 0");
1692 : for (i=1,#L,
1693 : N = 0;
1694 : if (ispow(r, L[i], 3), N += 1);
1695 : if (ispow(r, L[i], 5), N += 2);
1696 : if (ispow(r, L[i], 7), N += 4);
1697 : print1(N);
1698 : ); print("ul, /* ", r, " */")
1699 : }
1700 : for (r = 0, 105, check(r))
1701 : #endif
1702 : static ulong powersmod[106] = {
1703 : 077777777ul, /* 0 */
1704 : 077777777ul, /* 1 */
1705 : 013562440ul, /* 2 */
1706 : 012402540ul, /* 3 */
1707 : 013562440ul, /* 4 */
1708 : 052662441ul, /* 5 */
1709 : 016603440ul, /* 6 */
1710 : 016463450ul, /* 7 */
1711 : 013573551ul, /* 8 */
1712 : 012462540ul, /* 9 */
1713 : 012462464ul, /* 10 */
1714 : 013462771ul, /* 11 */
1715 : 012406473ul, /* 12 */
1716 : 012463641ul, /* 13 */
1717 : 052463646ul, /* 14 */
1718 : 012503446ul, /* 15 */
1719 : 013562440ul, /* 16 */
1720 : 052466440ul, /* 17 */
1721 : 012472451ul, /* 18 */
1722 : 012462454ul, /* 19 */
1723 : 032463550ul, /* 20 */
1724 : 013403664ul, /* 21 */
1725 : 013463460ul, /* 22 */
1726 : 032562565ul, /* 23 */
1727 : 012402540ul, /* 24 */
1728 : 052662441ul, /* 25 */
1729 : 032672452ul, /* 26 */
1730 : 013573551ul, /* 27 */
1731 : 012467541ul, /* 28 */
1732 : 012567640ul, /* 29 */
1733 : 032706450ul, /* 30 */
1734 : 012762452ul, /* 31 */
1735 : 033762662ul, /* 32 */
1736 : 012502562ul, /* 33 */
1737 : 032463562ul, /* 34 */
1738 : 013563440ul, /* 35 */
1739 : 016663440ul, /* 36 */
1740 : 036662550ul, /* 37 */
1741 : 012462552ul, /* 38 */
1742 : 033502450ul, /* 39 */
1743 : 012462643ul, /* 40 */
1744 : 033467540ul, /* 41 */
1745 : 017403441ul, /* 42 */
1746 : 017463462ul, /* 43 */
1747 : 017472460ul, /* 44 */
1748 : 033462470ul, /* 45 */
1749 : 052566450ul, /* 46 */
1750 : 013562640ul, /* 47 */
1751 : 032403640ul, /* 48 */
1752 : 016463450ul, /* 49 */
1753 : 016463752ul, /* 50 */
1754 : 033402440ul, /* 51 */
1755 : 012462540ul, /* 52 */
1756 : 012472540ul, /* 53 */
1757 : 053562462ul, /* 54 */
1758 : 012463465ul, /* 55 */
1759 : 012663470ul, /* 56 */
1760 : 052607450ul, /* 57 */
1761 : 012566553ul, /* 58 */
1762 : 013466440ul, /* 59 */
1763 : 012502741ul, /* 60 */
1764 : 012762744ul, /* 61 */
1765 : 012763740ul, /* 62 */
1766 : 012763443ul, /* 63 */
1767 : 013573551ul, /* 64 */
1768 : 013462471ul, /* 65 */
1769 : 052502460ul, /* 66 */
1770 : 012662463ul, /* 67 */
1771 : 012662451ul, /* 68 */
1772 : 012403550ul, /* 69 */
1773 : 073567540ul, /* 70 */
1774 : 072463445ul, /* 71 */
1775 : 072462740ul, /* 72 */
1776 : 012472442ul, /* 73 */
1777 : 012462644ul, /* 74 */
1778 : 013406650ul, /* 75 */
1779 : 052463471ul, /* 76 */
1780 : 012563474ul, /* 77 */
1781 : 013503460ul, /* 78 */
1782 : 016462441ul, /* 79 */
1783 : 016462440ul, /* 80 */
1784 : 012462540ul, /* 81 */
1785 : 013462641ul, /* 82 */
1786 : 012463454ul, /* 83 */
1787 : 013403550ul, /* 84 */
1788 : 057563540ul, /* 85 */
1789 : 017466441ul, /* 86 */
1790 : 017606471ul, /* 87 */
1791 : 053666573ul, /* 88 */
1792 : 012562561ul, /* 89 */
1793 : 013473641ul, /* 90 */
1794 : 032573440ul, /* 91 */
1795 : 016763440ul, /* 92 */
1796 : 016702640ul, /* 93 */
1797 : 033762552ul, /* 94 */
1798 : 012562550ul, /* 95 */
1799 : 052402451ul, /* 96 */
1800 : 033563441ul, /* 97 */
1801 : 012663561ul, /* 98 */
1802 : 012677560ul, /* 99 */
1803 : 012462464ul, /* 100 */
1804 : 032562642ul, /* 101 */
1805 : 013402551ul, /* 102 */
1806 : 032462450ul, /* 103 */
1807 : 012467445ul, /* 104 */
1808 : 032403440ul, /* 105 */
1809 : };
1810 :
1811 : static int
1812 3929138 : check_res(ulong x, ulong N, int shift, ulong *mask)
1813 : {
1814 3929138 : long r = x%N; if ((ulong)r> (N>>1)) r = N - r;
1815 3929138 : *mask &= (powersmod[r] >> shift);
1816 3929138 : return *mask;
1817 : }
1818 :
1819 : /* is x mod 211*209*61*203*117*31*43*71 a 3rd, 5th or 7th power ? */
1820 : int
1821 2422203 : uis_357_powermod(ulong x, ulong *mask)
1822 : {
1823 2422203 : if ( !check_res(x, 211UL, 0, mask)) return 0;
1824 978772 : if (*mask & 3 && !check_res(x, 209UL, 3, mask)) return 0;
1825 399730 : if (*mask & 3 && !check_res(x, 61UL, 6, mask)) return 0;
1826 222393 : if (*mask & 5 && !check_res(x, 203UL, 9, mask)) return 0;
1827 56894 : if (*mask & 1 && !check_res(x, 117UL,12, mask)) return 0;
1828 32876 : if (*mask & 3 && !check_res(x, 31UL,15, mask)) return 0;
1829 24682 : if (*mask & 5 && !check_res(x, 43UL,18, mask)) return 0;
1830 7449 : if (*mask & 6 && !check_res(x, 71UL,21, mask)) return 0;
1831 3778 : return 1;
1832 : }
1833 : /* asume x > 0 and pt != NULL */
1834 : int
1835 2367393 : uis_357_power(ulong x, ulong *pt, ulong *mask)
1836 : {
1837 : double logx;
1838 2367393 : if (!odd(x))
1839 : {
1840 9081 : long v = vals(x);
1841 9081 : if (v % 7) *mask &= ~4;
1842 9081 : if (v % 5) *mask &= ~2;
1843 9081 : if (v % 3) *mask &= ~1;
1844 9081 : if (!*mask) return 0;
1845 : }
1846 2359800 : if (!uis_357_powermod(x, mask)) return 0;
1847 2974 : logx = log((double)x);
1848 3846 : while (*mask)
1849 : {
1850 : long e, b;
1851 : ulong y, ye;
1852 2974 : if (*mask & 1) { b = 1; e = 3; }
1853 873 : else if (*mask & 2) { b = 2; e = 5; }
1854 355 : else { b = 4; e = 7; }
1855 2974 : y = (ulong)(exp(logx / e) + 0.5);
1856 2974 : ye = upowuu(y,e);
1857 2974 : if (ye == x) { *pt = y; return e; }
1858 : #ifdef LONG_IS_64BIT
1859 750 : if (ye > x) y--; else y++;
1860 750 : ye = upowuu(y,e);
1861 750 : if (ye == x) { *pt = y; return e; }
1862 : #endif
1863 872 : *mask &= ~b; /* turn the bit off */
1864 : }
1865 872 : return 0;
1866 : }
1867 :
1868 : #ifndef LONG_IS_64BIT
1869 : /* as above, split in two functions */
1870 : /* is x mod 211*209*61*203 a 3rd, 5th or 7th power ? */
1871 : static int
1872 14411 : uis_357_powermod_32bit_1(ulong x, ulong *mask)
1873 : {
1874 14411 : if ( !check_res(x, 211UL, 0, mask)) return 0;
1875 7998 : if (*mask & 3 && !check_res(x, 209UL, 3, mask)) return 0;
1876 4120 : if (*mask & 3 && !check_res(x, 61UL, 6, mask)) return 0;
1877 2912 : if (*mask & 5 && !check_res(x, 203UL, 9, mask)) return 0;
1878 826 : return 1;
1879 : }
1880 : /* is x mod 117*31*43*71 a 3rd, 5th or 7th power ? */
1881 : static int
1882 826 : uis_357_powermod_32bit_2(ulong x, ulong *mask)
1883 : {
1884 826 : if (*mask & 1 && !check_res(x, 117UL,12, mask)) return 0;
1885 657 : if (*mask & 3 && !check_res(x, 31UL,15, mask)) return 0;
1886 538 : if (*mask & 5 && !check_res(x, 43UL,18, mask)) return 0;
1887 267 : if (*mask & 6 && !check_res(x, 71UL,21, mask)) return 0;
1888 176 : return 1;
1889 : }
1890 : #endif
1891 :
1892 : /* Returns 3, 5, or 7 if x is a cube (but not a 5th or 7th power), a 5th
1893 : * power (but not a 7th), or a 7th power, and in this case creates the
1894 : * base on the stack and assigns its address to *pt. Otherwise returns 0.
1895 : * x must be of type t_INT and positive; this is not checked. The *mask
1896 : * argument tells us which things to check -- bit 0: 3rd, bit 1: 5th,
1897 : * bit 2: 7th pwr; set a bit to have the corresponding power examined --
1898 : * and is updated appropriately for a possible follow-up call */
1899 : int
1900 2801629 : is_357_power(GEN x, GEN *pt, ulong *mask)
1901 : {
1902 2801629 : long lx = lgefint(x);
1903 : ulong r;
1904 : pari_sp av;
1905 : GEN y;
1906 :
1907 2801629 : if (!*mask) return 0; /* useful when running in a loop */
1908 2430242 : if (DEBUGLEVEL>4)
1909 0 : err_printf("OddPwrs: examining %ld-bit integer\n", expi(x)+1);
1910 2430242 : if (lgefint(x) == 3) {
1911 : ulong t;
1912 2353428 : long e = uis_357_power(x[2], &t, mask);
1913 2353428 : if (e)
1914 : {
1915 2076 : if (pt) *pt = utoi(t);
1916 2076 : return e;
1917 : }
1918 2351352 : return 0;
1919 : }
1920 : #ifdef LONG_IS_64BIT
1921 62403 : r = (lx == 3)? uel(x,2): umodiu(x, 6046846918939827UL);
1922 62403 : if (!uis_357_powermod(r, mask)) return 0;
1923 : #else
1924 14411 : r = (lx == 3)? uel(x,2): umodiu(x, 211*209*61*203);
1925 14411 : if (!uis_357_powermod_32bit_1(r, mask)) return 0;
1926 826 : r = (lx == 3)? uel(x,2): umodiu(x, 117*31*43*71);
1927 826 : if (!uis_357_powermod_32bit_2(r, mask)) return 0;
1928 : #endif
1929 981 : av = avma;
1930 1602 : while (*mask)
1931 : {
1932 : long e, b;
1933 : /* priority to higher powers: if we have a 21st, it is easier to rediscover
1934 : * that its 7th root is a cube than that its cube root is a 7th power */
1935 1303 : if (*mask & 4) { b = 4; e = 7; }
1936 963 : else if (*mask & 2) { b = 2; e = 5; }
1937 366 : else { b = 1; e = 3; }
1938 1303 : y = mpround( sqrtnr(itor(x, nbits2prec(64 + bit_accuracy(lx) / e)), e) );
1939 1303 : if (equalii(powiu(y,e), x))
1940 : {
1941 682 : if (!pt) return gc_int(av,e);
1942 668 : set_avma((pari_sp)y); *pt = gerepileuptoint(av, y);
1943 668 : return e;
1944 : }
1945 621 : *mask &= ~b; /* turn the bit off */
1946 621 : set_avma(av);
1947 : }
1948 299 : return 0;
1949 : }
1950 :
1951 : /* Is x a n-th power ? If pt != NULL, it receives the n-th root */
1952 : ulong
1953 469712 : is_kth_power(GEN x, ulong n, GEN *pt)
1954 : {
1955 : forprime_t T;
1956 : long j;
1957 : ulong q, residue;
1958 : GEN y;
1959 469712 : pari_sp av = avma;
1960 :
1961 469712 : (void)u_forprime_arith_init(&T, odd(n)? 2*n+1: n+1, ULONG_MAX, 1,n);
1962 : /* we'll start at q, smallest prime >= n */
1963 :
1964 : /* Modular checks, use small primes q congruent 1 mod n */
1965 : /* A non n-th power nevertheless passes the test with proba n^(-#checks),
1966 : * We'd like this < 1e-6 but let j = floor(log(1e-6) / log(n)) which
1967 : * ensures much less. */
1968 469642 : if (n < 16)
1969 117786 : j = 5;
1970 351856 : else if (n < 32)
1971 154208 : j = 4;
1972 197648 : else if (n < 101)
1973 175850 : j = 3;
1974 21798 : else if (n < 1001)
1975 5242 : j = 2;
1976 16556 : else if (n < 17886697) /* smallest such that smallest suitable q is > 2^32 */
1977 16303 : j = 1;
1978 : else
1979 275 : j = 0;
1980 513057 : for (; j > 0; j--)
1981 : {
1982 510441 : if (!(q = u_forprime_next(&T))) break;
1983 : /* q a prime = 1 mod n */
1984 510500 : residue = umodiu(x, q);
1985 510525 : if (residue == 0)
1986 : {
1987 483 : if (Z_lval(x,q) % n) return gc_ulong(av,0);
1988 49 : continue;
1989 : }
1990 : /* n-th power mod q ? */
1991 510042 : if (Fl_powu(residue, (q-1)/n, q) != 1) return gc_ulong(av,0);
1992 : }
1993 2616 : set_avma(av);
1994 :
1995 2611 : if (DEBUGLEVEL>4) err_printf("\nOddPwrs: [%lu] passed modular checks\n",n);
1996 : /* go to the horse's mouth... */
1997 2611 : y = roundr( sqrtnr(itor(x, nbits2prec(expi(x)/n + 16)), n) );
1998 2611 : if (!equalii(powiu(y, n), x)) {
1999 1662 : if (DEBUGLEVEL>4) err_printf("\tBut it wasn't a pure power.\n");
2000 1662 : return gc_ulong(av,0);
2001 : }
2002 949 : if (!pt) set_avma(av); else { set_avma((pari_sp)y); *pt = gerepileuptoint(av,y); }
2003 949 : return 1;
2004 : }
2005 :
2006 : /* is x a p^i-th power, p >= 11 prime ? Similar to is_357_power(), but instead
2007 : * of the mask, we keep the current test exponent around. Cut off when
2008 : * log_2 x^(1/k) < cutoffbits since we would have found it by trial division.
2009 : * Everything needed here (primitive roots etc.) is computed from scratch on
2010 : * the fly; compared to the size of numbers under consideration, these
2011 : * word-sized computations take negligible time.
2012 : * Any cutoffbits > 0 is safe, but direct root extraction attempts are faster
2013 : * when trial division has been used to discover very small bases. We become
2014 : * competitive at cutoffbits ~ 10 */
2015 : int
2016 69319 : is_pth_power(GEN x, GEN *pt, forprime_t *T, ulong cutoffbits)
2017 : {
2018 69319 : long cnt=0, size = expi(x) /* not +1 */;
2019 : ulong p;
2020 69319 : pari_sp av = avma;
2021 484736 : while ((p = u_forprime_next(T)) && size/p >= cutoffbits) {
2022 415455 : long v = 1;
2023 415455 : if (DEBUGLEVEL>5 && cnt++==2000)
2024 0 : { cnt=0; err_printf("%lu%% ", 100*p*cutoffbits/size); }
2025 415509 : while (is_kth_power(x, p, pt)) {
2026 56 : v *= p; x = *pt;
2027 56 : size = expi(x);
2028 : }
2029 415459 : if (v > 1)
2030 : {
2031 42 : if (DEBUGLEVEL>5) err_printf("\nOddPwrs: is a %ld power\n",v);
2032 42 : return v;
2033 : }
2034 : }
2035 69267 : if (DEBUGLEVEL>5) err_printf("\nOddPwrs: not a power\n",p);
2036 69267 : return gc_int(av,0); /* give up */
2037 : }
2038 :
2039 : /***********************************************************************/
2040 : /** FACTORIZATION (master iteration) **/
2041 : /** Driver for the various methods of finding large factors **/
2042 : /** (after trial division has cast out the very small ones). **/
2043 : /** GN1998Jun24--30 **/
2044 : /***********************************************************************/
2045 :
2046 : /* Direct use:
2047 : * ifac_start_hint(n,moebius,hint) registers with the iterative factorizer
2048 : * - an integer n (without prime factors < tridiv_bound(n))
2049 : * - registers whether or not we should terminate early if we find a square
2050 : * factor,
2051 : * - a hint about which method(s) to use.
2052 : * This must always be called first. If input is not composite, oo loop.
2053 : * The routine decomposes n nontrivially into a product of two factors except
2054 : * in squarefreeness ('Moebius') mode.
2055 : *
2056 : * ifac_start(n,moebius) same using default hint.
2057 : *
2058 : * ifac_primary_factor() returns a prime divisor (not necessarily the
2059 : * smallest) and the corresponding exponent.
2060 : *
2061 : * Encapsulated user interface: Many arithmetic functions have a 'contributor'
2062 : * ifac_xxx, to be called on any large composite cofactor left over after trial
2063 : * division by small primes: xxx is one of moebius, issquarefree, totient, etc.
2064 : *
2065 : * We never test whether the input number is prime or composite, since
2066 : * presumably it will have come out of the small factors finder stage
2067 : * (which doesn't really exist yet but which will test the left-over
2068 : * cofactor for primality once it does). */
2069 :
2070 : /* The data structure in which we preserve whatever we know about our number N
2071 : * is kept on the PARI stack, and updated as needed.
2072 : * This makes the machinery re-entrant, and avoids memory leaks when a lengthy
2073 : * factorization is interrupted. We try to keep the whole affair connected,
2074 : * and the parent object is always older than its children. This may in
2075 : * rare cases lead to some extra copying around, and knowing what is garbage
2076 : * at any given time is not trivial. See below for examples how to do it right.
2077 : * (Connectedness is destroyed if callers of ifac_main() create stuff on the
2078 : * stack in between calls. This is harmless as long as ifac_realloc() is used
2079 : * to re-create a connected object at the head of the stack just before
2080 : * collecting garbage.)
2081 : * A t_INT may well have > 10^6 distinct prime factors larger than 2^16. Since
2082 : * we need not find factors in order of increasing size, we must be prepared to
2083 : * drag a very large amount of data around. We start with a small structure
2084 : * and extend it when necessary. */
2085 :
2086 : /* The idea of the algorithm is:
2087 : * Let N0 be whatever is currently left of N after dividing off all the
2088 : * prime powers we have already returned to the caller. Then we maintain
2089 : * N0 as a product
2090 : * (1) N0 = \prod_i P_i^{e_i} * \prod_j Q_j^{f_j} * \prod_k C_k^{g_k}
2091 : * where the P_i and Q_j are distinct primes, each C_k is known composite,
2092 : * none of the P_i divides any C_k, and we also know the total ordering
2093 : * of all the P_i, Q_j and C_k; in particular, we will never try to divide
2094 : * a C_k by a larger Q_j. Some of the C_k may have common factors.
2095 : *
2096 : * Caveat implementor: Taking gcds among C_k's is very likely to cost at
2097 : * least as much time as dividing off any primes as we find them, and book-
2098 : * keeping would be tough (since D=gcd(C_1,C_2) can still have common factors
2099 : * with both C_1/D and C_2/D, and so on...).
2100 : *
2101 : * At startup, we just initialize the structure to
2102 : * (2) N = C_1^1 (composite).
2103 : *
2104 : * Whenever ifac_primary_factor() or one of the arithmetic user interface
2105 : * routines needs a primary factor, and the smallest thing in our list is P_1,
2106 : * we return that and its exponent, and remove it from our list. (When nothing
2107 : * is left, we return a sentinel value -- gen_1. And in Moebius mode, when we
2108 : * see something with exponent > 1, whether prime or composite, we return gen_0
2109 : * or 0, depending on the function). In all other cases, ifac_main() iterates
2110 : * the following steps until we have a P_1 in the smallest position.
2111 : *
2112 : * When the smallest item is C_1, as it is initially:
2113 : * (3.1) Crack C_1 into a nontrivial product U_1 * U_2 by whatever method
2114 : * comes to mind for this size. (U for 'unknown'.) Cracking will detect
2115 : * perfect powers, so we may instead see a power of some U_1 here, or even
2116 : * something of the form U_1^k*U_2^k; of course the exponent already attached
2117 : * to C_1 is taken into account in the following.
2118 : * (3.2) If we have U_1*U_2, sort the two factors (distinct: squares are caught
2119 : * in stage 3.1). N.B. U_1 and U_2 are smaller than anything else in our list.
2120 : * (3.3) Check U_1 and U_2 for primality, and flag them accordingly.
2121 : * (3.4) Iterate.
2122 : *
2123 : * When the smallest item is Q_1:
2124 : * This is the unpleasant case. We go through the entire list and try to
2125 : * divide Q_1 off each of the current C_k's, which usually fails, but may
2126 : * succeed several times. When a division was successful, the corresponding
2127 : * C_k is removed from our list, and the cofactor becomes a U_l for the moment
2128 : * unless it is 1 (which happens when C_k was a power of Q_1). When we're
2129 : * through we upgrade Q_1 to P_1 status, then do a primality check on each U_l
2130 : * and sort it back into the list either as a Q_j or as a C_k. If during the
2131 : * insertion sort we discover that some U_l equals some P_i or Q_j or C_k we
2132 : * already have, we just add U_l's exponent to that of its twin. (The sorting
2133 : * therefore happens before the primality test). Since this may produce one or
2134 : * more elements smaller than the P_1 we just confirmed, we may have to repeat
2135 : * the iteration.
2136 : * A trick avoids some Q_1 instances: just after the sweep classifying
2137 : * all current unknowns as either composites or primes, we do another downward
2138 : * sweep beginning with the largest current factor and stopping just above the
2139 : * largest current composite. Every Q_j we pass is turned into a P_i.
2140 : * (Different primes are automatically coprime among each other, and primes do
2141 : * not divide smaller composites.)
2142 : * NB: We have no use for comparing the square of a prime to N0. Normally
2143 : * we will get called after casting out only the smallest primes, and
2144 : * since we cannot guarantee that we see the large prime factors in as-
2145 : * cending order, we cannot stop when we find one larger than sqrt(N0). */
2146 :
2147 : /* Data structure: We keep everything in a single t_VEC of t_INTs. The
2148 : * first 2 components are read-only:
2149 : * 1) the first records whether we're doing full (NULL) or Moebius (gen_1)
2150 : * factorization; in the latter case subroutines return a sentinel value as
2151 : * soon as they spot an exponent > 1.
2152 : * 2) the second records the hint from factorint()'s optional flag, for use by
2153 : * ifac_crack().
2154 : *
2155 : * The remaining components (initially 15) are used in groups of three:
2156 : * [ factor (t_INT), exponent (t_INT), factor class ], where factor class is
2157 : * NULL : unknown
2158 : * gen_0: known composite C_k
2159 : * gen_1: known prime Q_j awaiting trial division
2160 : * gen_2: finished prime P_i.
2161 : * When during the division stage we re-sort a C_k-turned-U_l to a lower
2162 : * position, we rotate any intervening material upward towards its old
2163 : * slot. When a C_k was divided down to 1, its slot is left empty at
2164 : * first; similarly when the re-sorting detects a repeated factor.
2165 : * After the sorting phase, we de-fragment the list and squeeze all the
2166 : * occupied slots together to the high end, so that ifac_crack() has room
2167 : * for new factors. When this doesn't suffice, we abandon the current vector
2168 : * and allocate a somewhat larger one, defragmenting again while copying.
2169 : *
2170 : * For internal use: note that all exponents will fit into C longs, given
2171 : * PARI's lgefint field size. When we work with them, we sometimes read
2172 : * out the GEN pointer, and sometimes do an itos, whatever is more con-
2173 : * venient for the task at hand. */
2174 :
2175 : /*** Overview ***/
2176 :
2177 : /* The '*where' argument in the following points into *partial at the first of
2178 : * the three fields of the first occupied slot. It's there because the caller
2179 : * would already know where 'here' is, so we don't want to search for it again.
2180 : * We do not preserve this from one user-interface call to the next. */
2181 :
2182 : /* In the most cases, control flows from the user interface to ifac_main() and
2183 : * then to a succession of ifac_crack()s and ifac_divide()s, with (typically)
2184 : * none of the latter finding anything. */
2185 :
2186 : #define LAST(x) x+lg(x)-3
2187 : #define FIRST(x) x+3
2188 :
2189 : #define MOEBIUS(x) gel(x,1)
2190 : #define HINT(x) gel(x,2)
2191 :
2192 : /* y <- x */
2193 : INLINE void
2194 0 : SHALLOWCOPY(GEN x, GEN y) {
2195 0 : VALUE(y) = VALUE(x);
2196 0 : EXPON(y) = EXPON(x);
2197 0 : CLASS(y) = CLASS(x);
2198 0 : }
2199 : /* y <- x */
2200 : INLINE void
2201 0 : COPY(GEN x, GEN y) {
2202 0 : icopyifstack(VALUE(x), VALUE(y));
2203 0 : icopyifstack(EXPON(x), EXPON(y));
2204 0 : CLASS(y) = CLASS(x);
2205 0 : }
2206 :
2207 : /* Diagnostics */
2208 : static void
2209 0 : ifac_factor_dbg(GEN x)
2210 : {
2211 0 : GEN c = CLASS(x), v = VALUE(x);
2212 0 : if (c == gen_2) err_printf("IFAC: factor %Ps\n\tis prime (finished)\n", v);
2213 0 : else if (c == gen_1) err_printf("IFAC: factor %Ps\n\tis prime\n", v);
2214 0 : else if (c == gen_0) err_printf("IFAC: factor %Ps\n\tis composite\n", v);
2215 0 : }
2216 : static void
2217 0 : ifac_check(GEN partial, GEN where)
2218 : {
2219 0 : if (!where || where < FIRST(partial) || where > LAST(partial))
2220 0 : pari_err_BUG("ifac_check ['where' out of bounds]");
2221 0 : }
2222 : static void
2223 0 : ifac_print(GEN part, GEN where)
2224 : {
2225 0 : long l = lg(part);
2226 : GEN p;
2227 :
2228 0 : err_printf("ifac partial factorization structure: %ld slots, ", (l-3)/3);
2229 0 : if (MOEBIUS(part)) err_printf("Moebius mode, ");
2230 0 : err_printf("hint = %ld\n", itos(HINT(part)));
2231 0 : ifac_check(part, where);
2232 0 : for (p = part+3; p < part + l; p += 3)
2233 : {
2234 0 : GEN v = VALUE(p), e = EXPON(p), c = CLASS(p);
2235 0 : const char *s = "";
2236 0 : if (!v) { err_printf("[empty slot]\n"); continue; }
2237 0 : if (c == NULL) s = "unknown";
2238 0 : else if (c == gen_0) s = "composite";
2239 0 : else if (c == gen_1) s = "unfinished prime";
2240 0 : else if (c == gen_2) s = "prime";
2241 0 : else pari_err_BUG("unknown factor class");
2242 0 : err_printf("[%Ps, %Ps, %s]\n", v, e, s);
2243 : }
2244 0 : err_printf("Done.\n");
2245 0 : }
2246 :
2247 : static const long decomp_default_hint = 0;
2248 : /* assume n > 0, which we can assign to */
2249 : /* return initial data structure, see ifac_crack() for the hint argument */
2250 : static GEN
2251 6157 : ifac_start_hint(GEN n, int moebius, long hint)
2252 : {
2253 6157 : const long ifac_initial_length = 3 + 7*3;
2254 : /* codeword, moebius, hint, 7 slots -- a 512-bit product of distinct 8-bit
2255 : * primes needs at most 7 slots at a time) */
2256 6157 : GEN here, part = cgetg(ifac_initial_length, t_VEC);
2257 :
2258 6157 : MOEBIUS(part) = moebius? gen_1 : NULL;
2259 6157 : HINT(part) = stoi(hint);
2260 : /* fill first slot at the top end */
2261 6157 : here = part + ifac_initial_length - 3; /* LAST(part) */
2262 6157 : INIT(here, n,gen_1,gen_0); /* n^1: composite */
2263 43099 : while ((here -= 3) > part) ifac_delete(here);
2264 6157 : return part;
2265 : }
2266 : GEN
2267 2535 : ifac_start(GEN n, int moebius)
2268 2535 : { return ifac_start_hint(n,moebius,decomp_default_hint); }
2269 :
2270 : /* Return next nonempty slot after 'here', NULL if none exist */
2271 : static GEN
2272 15837 : ifac_find(GEN partial)
2273 : {
2274 15837 : GEN scan, end = partial + lg(partial);
2275 :
2276 : #ifdef IFAC_DEBUG
2277 : ifac_check(partial, partial);
2278 : #endif
2279 115439 : for (scan = partial+3; scan < end; scan += 3)
2280 110522 : if (VALUE(scan)) return scan;
2281 4917 : return NULL;
2282 : }
2283 :
2284 : /* Defragment: squeeze out unoccupied slots above *where. Unoccupied slots
2285 : * arise when a composite factor dissolves completely whilst dividing off a
2286 : * prime, or when ifac_resort() spots a coincidence and merges two factors.
2287 : * Update *where */
2288 : static void
2289 14 : ifac_defrag(GEN *partial, GEN *where)
2290 : {
2291 14 : GEN scan_new = LAST(*partial), scan_old;
2292 :
2293 42 : for (scan_old = scan_new; scan_old >= *where; scan_old -= 3)
2294 : {
2295 28 : if (!VALUE(scan_old)) continue; /* empty slot */
2296 28 : if (scan_old < scan_new) SHALLOWCOPY(scan_old, scan_new);
2297 28 : scan_new -= 3; /* point at next slot to be written */
2298 : }
2299 14 : scan_new += 3; /* back up to last slot written */
2300 14 : *where = scan_new;
2301 84 : while ((scan_new -= 3) > *partial) ifac_delete(scan_new); /* erase junk */
2302 14 : }
2303 :
2304 : /* Move to a larger main vector, updating *where if it points into it, and
2305 : * *partial in any case. Can be used as a specialized gcopy before
2306 : * a gerepileupto() (pass 0 as the new length). Normally, one would pass
2307 : * new_lg=1 to let this function guess the new size. To be used sparingly.
2308 : * Complex version of ifac_defrag(), combined with reallocation. If new_lg
2309 : * is 0, use the old length, so this acts just like gcopy except that the
2310 : * 'where' pointer is carried along; if it is 1, we make an educated guess.
2311 : * Exception: If new_lg is 0, the vector is full to the brim, and the first
2312 : * entry is composite, we make it longer to avoid being called again a
2313 : * microsecond later. It is safe to call this with *where = NULL:
2314 : * if it doesn't point anywhere within the old structure, it is left alone */
2315 : static void
2316 0 : ifac_realloc(GEN *partial, GEN *where, long new_lg)
2317 : {
2318 0 : long old_lg = lg(*partial);
2319 : GEN newpart, scan_new, scan_old;
2320 :
2321 0 : if (new_lg == 1)
2322 0 : new_lg = 2*old_lg - 6; /* from 7 slots to 13 to 25... */
2323 0 : else if (new_lg <= old_lg) /* includes case new_lg == 0 */
2324 : {
2325 0 : GEN first = *partial + 3;
2326 0 : new_lg = old_lg;
2327 : /* structure full and first entry composite or unknown */
2328 0 : if (VALUE(first) && (CLASS(first) == gen_0 || CLASS(first)==NULL))
2329 0 : new_lg += 6; /* give it a little more breathing space */
2330 : }
2331 0 : newpart = cgetg(new_lg, t_VEC);
2332 0 : if (DEBUGMEM >= 3)
2333 0 : err_printf("IFAC: new partial factorization structure (%ld slots)\n",
2334 0 : (new_lg - 3)/3);
2335 0 : MOEBIUS(newpart) = MOEBIUS(*partial);
2336 0 : icopyifstack(HINT(*partial), HINT(newpart));
2337 : /* Downward sweep through the old *partial. Pick up 'where' and carry it
2338 : * over if we pass it. (Only useful if it pointed at a nonempty slot.)
2339 : * Factors are COPY'd so that we again have a nice object (parent older
2340 : * than children, connected), except the one factor that may still be living
2341 : * in a clone where n originally was; exponents are similarly copied if they
2342 : * aren't global constants; class-of-factor fields are global constants so we
2343 : * need only copy them as pointers. Caller may then do a gerepileupto() */
2344 0 : scan_new = newpart + new_lg - 3; /* LAST(newpart) */
2345 0 : scan_old = *partial + old_lg - 3; /* LAST(*partial) */
2346 0 : for (; scan_old > *partial + 2; scan_old -= 3)
2347 : {
2348 0 : if (*where == scan_old) *where = scan_new;
2349 0 : if (!VALUE(scan_old)) continue; /* skip empty slots */
2350 0 : COPY(scan_old, scan_new); scan_new -= 3;
2351 : }
2352 0 : scan_new += 3; /* back up to last slot written */
2353 0 : while ((scan_new -= 3) > newpart) ifac_delete(scan_new);
2354 0 : *partial = newpart;
2355 0 : }
2356 :
2357 : /* Re-sort one (typically unknown) entry from washere to a new position,
2358 : * rotating intervening entries upward to fill the vacant space. If the new
2359 : * position is the same as the old one, or the new value of the entry coincides
2360 : * with a value already occupying a lower slot, then we just add exponents (and
2361 : * use the 'more known' class, and return 1 immediately when in Moebius mode).
2362 : * Slots between *where and washere must be in sorted order, so a sweep using
2363 : * this to re-sort several unknowns must proceed upward, see ifac_resort().
2364 : * Bubble-sort-of-thing sort. Won't be exercised frequently, so this is ok */
2365 : static void
2366 7 : ifac_sort_one(GEN *where, GEN washere)
2367 : {
2368 7 : GEN old, scan = washere - 3;
2369 : GEN value, exponent, class0, class1;
2370 : long cmp_res;
2371 :
2372 7 : if (scan < *where) return; /* nothing to do, washere==*where */
2373 7 : value = VALUE(washere);
2374 7 : exponent = EXPON(washere);
2375 7 : class0 = CLASS(washere);
2376 7 : cmp_res = -1; /* sentinel */
2377 7 : while (scan >= *where) /* at least once */
2378 : {
2379 7 : if (VALUE(scan))
2380 : { /* current slot nonempty, check against where */
2381 7 : cmp_res = cmpii(value, VALUE(scan));
2382 7 : if (cmp_res >= 0) break; /* have found where to stop */
2383 : }
2384 : /* copy current slot upward by one position and move pointers down */
2385 0 : SHALLOWCOPY(scan, scan+3);
2386 0 : scan -= 3;
2387 : }
2388 7 : scan += 3;
2389 : /* At this point there are the following possibilities:
2390 : * 1) cmp_res == -1. Either value is less than that at *where, or *where was
2391 : * pointing at vacant slots and any factors we saw en route were larger than
2392 : * value. At any rate, scan == *where now, and scan is pointing at an empty
2393 : * slot, into which we'll stash our entry.
2394 : * 2) cmp_res == 0. The entry at scan-3 is the one, we compare class0
2395 : * fields and add exponents, and put it all into the vacated scan slot,
2396 : * NULLing the one at scan-3 (and possibly updating *where).
2397 : * 3) cmp_res == 1. The slot at scan is the one to store our entry into. */
2398 7 : if (cmp_res)
2399 : {
2400 7 : if (cmp_res < 0 && scan != *where)
2401 0 : pari_err_BUG("ifact_sort_one [misaligned partial]");
2402 7 : INIT(scan, value, exponent, class0); return;
2403 : }
2404 : /* case cmp_res == 0: repeated factor detected */
2405 0 : if (DEBUGLEVEL >= 4)
2406 0 : err_printf("IFAC: repeated factor %Ps\n\tin ifac_sort_one\n", value);
2407 0 : old = scan - 3;
2408 : /* if old class0 was composite and new is prime, or vice versa, complain
2409 : * (and if one class0 was unknown and the other wasn't, use the known one) */
2410 0 : class1 = CLASS(old);
2411 0 : if (class0) /* should never be used */
2412 : {
2413 0 : if (class1)
2414 : {
2415 0 : if (class0 == gen_0 && class1 != gen_0)
2416 0 : pari_err_BUG("ifac_sort_one (composite = prime)");
2417 0 : else if (class0 != gen_0 && class1 == gen_0)
2418 0 : pari_err_BUG("ifac_sort_one (prime = composite)");
2419 0 : else if (class0 == gen_2)
2420 0 : CLASS(scan) = class0;
2421 : }
2422 : else
2423 0 : CLASS(scan) = class0;
2424 : }
2425 : /* else stay with the existing known class0 */
2426 0 : CLASS(scan) = class1;
2427 : /* in any case, add exponents */
2428 0 : if (EXPON(old) == gen_1 && exponent == gen_1)
2429 0 : EXPON(scan) = gen_2;
2430 : else
2431 0 : EXPON(scan) = addii(EXPON(old), exponent);
2432 : /* move the value over and null out the vacated slot below */
2433 0 : old = scan - 3;
2434 0 : *scan = *old;
2435 0 : ifac_delete(old);
2436 : /* finally, see whether *where should be pulled in */
2437 0 : if (old == *where) *where += 3;
2438 : }
2439 :
2440 : /* Sort all current unknowns downward to where they belong. Sweeps in the
2441 : * upward direction. Not needed after ifac_crack(), only when ifac_divide()
2442 : * returned true. Update *where. */
2443 : static void
2444 7 : ifac_resort(GEN *partial, GEN *where)
2445 : {
2446 : GEN scan, end;
2447 7 : ifac_defrag(partial, where); end = LAST(*partial);
2448 21 : for (scan = *where; scan <= end; scan += 3)
2449 14 : if (VALUE(scan) && !CLASS(scan)) ifac_sort_one(where, scan); /*unknown*/
2450 7 : ifac_defrag(partial, where); /* remove newly created gaps */
2451 7 : }
2452 :
2453 : /* Let x be a t_INT known not to have small divisors (< 661, and < 2^14 for huge
2454 : * x > 2^512). Return 0 if x is a proven composite. Return 1 if we believe it
2455 : * to be prime (fully proven prime if factor_proven is set). */
2456 : int
2457 31806 : ifac_isprime(GEN x)
2458 : {
2459 31806 : if (!BPSW_psp_nosmalldiv(x)) return 0; /* composite */
2460 20210 : if (factor_proven && ! BPSW_isprime(x))
2461 : {
2462 0 : pari_warn(warner,
2463 : "IFAC: pseudo-prime %Ps\n\tis not prime. PLEASE REPORT!\n", x);
2464 0 : return 0;
2465 : }
2466 20210 : return 1;
2467 : }
2468 :
2469 : static int
2470 11678 : ifac_checkprime(GEN x)
2471 : {
2472 11678 : int res = ifac_isprime(VALUE(x));
2473 11678 : CLASS(x) = res? gen_1: gen_0;
2474 11678 : if (DEBUGLEVEL>2) ifac_factor_dbg(x);
2475 11678 : return res;
2476 : }
2477 :
2478 : /* Determine primality or compositeness of all current unknowns, and set
2479 : * class Q primes to finished (class P) if everything larger is already
2480 : * known to be prime. When after_crack >= 0, only look at the
2481 : * first after_crack things in the list (do nothing when it's 0) */
2482 : static void
2483 5958 : ifac_whoiswho(GEN *partial, GEN *where, long after_crack)
2484 : {
2485 5958 : GEN scan, scan_end = LAST(*partial);
2486 :
2487 : #ifdef IFAC_DEBUG
2488 : ifac_check(*partial, *where);
2489 : #endif
2490 5958 : if (after_crack == 0) return;
2491 5307 : if (after_crack > 0) /* check at most after_crack entries */
2492 5300 : scan = *where + 3*(after_crack - 1); /* assert(scan <= scan_end) */
2493 : else
2494 7 : for (scan = scan_end; scan >= *where; scan -= 3)
2495 : {
2496 7 : if (CLASS(scan))
2497 : { /* known class of factor */
2498 0 : if (CLASS(scan) == gen_0) break;
2499 0 : if (CLASS(scan) == gen_1)
2500 : {
2501 0 : if (DEBUGLEVEL>=3)
2502 : {
2503 0 : err_printf("IFAC: factor %Ps\n\tis prime (no larger composite)\n",
2504 0 : VALUE(*where));
2505 0 : err_printf("IFAC: prime %Ps\n\tappears with exponent = %ld\n",
2506 0 : VALUE(*where), itos(EXPON(*where)));
2507 : }
2508 0 : CLASS(scan) = gen_2;
2509 : }
2510 0 : continue;
2511 : }
2512 7 : if (!ifac_checkprime(scan)) break; /* must disable Q-to-P */
2513 0 : CLASS(scan) = gen_2; /* P_i, finished prime */
2514 0 : if (DEBUGLEVEL>2) ifac_factor_dbg(scan);
2515 : }
2516 : /* go on, Q-to-P trick now disabled */
2517 16265 : for (; scan >= *where; scan -= 3)
2518 : {
2519 10958 : if (CLASS(scan)) continue;
2520 10923 : (void)ifac_checkprime(scan); /* Qj | Ck */
2521 : }
2522 : }
2523 :
2524 : /* Divide all current composites by first (prime, class Q) entry, updating its
2525 : * exponent, and turning it into a finished prime (class P). Return 1 if any
2526 : * such divisions succeeded (in Moebius mode, the update may then not have
2527 : * been completed), or 0 if none of them succeeded. Doesn't modify *where.
2528 : * Here we normally do not check that the first entry is a not-finished
2529 : * prime. Stack management: we may allocate a new exponent */
2530 : static long
2531 10246 : ifac_divide(GEN *partial, GEN *where, long moebius_mode)
2532 : {
2533 10246 : GEN scan, scan_end = LAST(*partial);
2534 10246 : long res = 0, exponent, newexp, otherexp;
2535 :
2536 : #ifdef IFAC_DEBUG
2537 : ifac_check(*partial, *where);
2538 : if (CLASS(*where) != gen_1)
2539 : pari_err_BUG("ifac_divide [division by composite or finished prime]");
2540 : if (!VALUE(*where)) pari_err_BUG("ifac_divide [division by nothing]");
2541 : #endif
2542 10246 : newexp = exponent = itos(EXPON(*where));
2543 10246 : if (exponent > 1 && moebius_mode) return 1;
2544 : /* should've been caught by caller */
2545 :
2546 16227 : for (scan = *where+3; scan <= scan_end; scan += 3)
2547 : {
2548 5981 : if (CLASS(scan) != gen_0) continue; /* the other thing ain't composite */
2549 144 : otherexp = 0;
2550 : /* divide in place to keep stack clutter minimal */
2551 158 : while (dvdiiz(VALUE(scan), VALUE(*where), VALUE(scan)))
2552 : {
2553 14 : if (moebius_mode) return 1; /* immediately */
2554 14 : if (!otherexp) otherexp = itos(EXPON(scan));
2555 14 : newexp += otherexp;
2556 : }
2557 144 : if (newexp > exponent) /* did anything happen? */
2558 : {
2559 7 : EXPON(*where) = (newexp == 2 ? gen_2 : utoipos(newexp));
2560 7 : exponent = newexp;
2561 7 : if (is_pm1((GEN)*scan)) /* factor dissolved completely */
2562 : {
2563 0 : ifac_delete(scan);
2564 0 : if (DEBUGLEVEL >= 4)
2565 0 : err_printf("IFAC: a factor was a power of another prime factor\n");
2566 : } else {
2567 7 : CLASS(scan) = NULL; /* at any rate it's Unknown now */
2568 7 : if (DEBUGLEVEL >= 4)
2569 0 : err_printf("IFAC: a factor was divisible by another prime factor,\n"
2570 : "\tleaving a cofactor = %Ps\n", VALUE(scan));
2571 : }
2572 7 : res = 1;
2573 7 : if (DEBUGLEVEL >= 5)
2574 0 : err_printf("IFAC: prime %Ps\n\tappears at least to the power %ld\n",
2575 0 : VALUE(*where), newexp);
2576 : }
2577 : } /* for */
2578 10246 : CLASS(*where) = gen_2; /* make it a finished prime */
2579 10246 : if (DEBUGLEVEL >= 3)
2580 0 : err_printf("IFAC: prime %Ps\n\tappears with exponent = %ld\n",
2581 0 : VALUE(*where), newexp);
2582 10246 : return res;
2583 : }
2584 :
2585 : /* found out our integer was factor^exp. Update */
2586 : static void
2587 944 : update_pow(GEN where, GEN factor, long exp, pari_sp *av)
2588 : {
2589 944 : GEN ex = EXPON(where);
2590 944 : if (DEBUGLEVEL>3)
2591 0 : err_printf("IFAC: found %Ps =\n\t%Ps ^%ld\n", *where, factor, exp);
2592 944 : affii(factor, VALUE(where)); set_avma(*av);
2593 944 : if (ex == gen_1)
2594 741 : { EXPON(where) = exp == 2? gen_2: utoipos(exp); *av = avma; }
2595 203 : else if (ex == gen_2)
2596 182 : { EXPON(where) = utoipos(exp<<1); *av = avma; }
2597 : else
2598 21 : affsi(exp * itos(ex), EXPON(where));
2599 944 : }
2600 : /* hint = 0 : Use a default strategy
2601 : * hint & 1 : avoid MPQS
2602 : * hint & 2 : avoid first-stage ECM (may fall back to ECM if MPQS gives up)
2603 : * hint & 4 : avoid Pollard and SQUFOF stages.
2604 : * hint & 8 : avoid final ECM; may flag a composite as prime. */
2605 : #define get_hint(partial) (itos(HINT(*partial)) & 15)
2606 :
2607 : /* Complete ifac_crack's job when a factoring engine splits the current factor
2608 : * into a product of three or more new factors. Makes room for them if
2609 : * necessary, sorts them, gives them the right exponents and class. Returns the
2610 : * number of factors actually written, which may be less than #facvec if there
2611 : * are duplicates. Vectors of factors (cf pollardbrent() or mpqs()) contain
2612 : * 'slots' of 3 GENs per factor, interpreted as in our partial factorization
2613 : * data structure. Thus engines can tell us what they already know about
2614 : * factors being prime/composite or appearing to a power larger than thefirst.
2615 : * Don't collect garbage. No diagnostics: engine has printed what it found.
2616 : * facvec contains slots of three components per factor; repeated factors are
2617 : * allowed (their classes shouldn't contradict each other whereas their
2618 : * exponents will be added up) */
2619 : static long
2620 3318 : ifac_insert_multiplet(GEN *partial, GEN *where, GEN facvec, long moebius_mode)
2621 : {
2622 3318 : long j,k=1, lfv=lg(facvec)-1, nf=lfv/3, room=(long)(*where-*partial);
2623 : /* one of the factors will go into the *where slot, so room is now 3 times
2624 : * the number of slots we can use */
2625 3318 : long needroom = lfv - room;
2626 3318 : GEN newexp, cur, sorted, auxvec = cgetg(nf+1, t_VEC), factor;
2627 3318 : long E = itos(EXPON(*where)); /* the old exponent */
2628 :
2629 3318 : if (DEBUGLEVEL >= 5) /* squfof may return a single squared factor as a set */
2630 0 : err_printf("IFAC: incorporating set of %ld factor(s)\n", nf);
2631 3318 : if (needroom > 0) ifac_realloc(partial, where, lg(*partial) + needroom);
2632 :
2633 : /* create sort permutation from the values of the factors */
2634 10305 : for (j=nf; j; j--) gel(auxvec,j) = gel(facvec,3*j-2);
2635 3318 : sorted = ZV_indexsort(auxvec);
2636 : /* store factors, beginning at *where, and catching any duplicates */
2637 3318 : cur = facvec + 3*sorted[nf]-2; /* adjust for triple spacing */
2638 3318 : VALUE(*where) = VALUE(cur);
2639 3318 : newexp = EXPON(cur);
2640 : /* if new exponent is 1, the old exponent already in place will do */
2641 3318 : if (newexp != gen_1) EXPON(*where) = mului(E,newexp);
2642 3318 : CLASS(*where) = CLASS(cur);
2643 3318 : if (DEBUGLEVEL >= 6) err_printf("\tstored (largest) factor no. %ld...\n", nf);
2644 :
2645 6987 : for (j=nf-1; j; j--)
2646 : {
2647 : GEN e;
2648 3669 : cur = facvec + 3*sorted[j]-2;
2649 3669 : factor = VALUE(cur);
2650 3669 : if (equalii(factor, VALUE(*where)))
2651 : {
2652 0 : if (DEBUGLEVEL >= 6)
2653 0 : err_printf("\tfactor no. %ld is a duplicate%s\n", j, (j>1? "...": ""));
2654 : /* update exponent, ignore class which would already have been set,
2655 : * then forget current factor */
2656 0 : newexp = EXPON(cur);
2657 0 : if (newexp != gen_1) /* new exp > 1 */
2658 0 : e = addiu(EXPON(*where), E * itou(newexp));
2659 0 : else if (EXPON(*where) == gen_1 && E == 1)
2660 0 : e = gen_2;
2661 : else
2662 0 : e = addiu(EXPON(*where), E);
2663 0 : EXPON(*where) = e;
2664 :
2665 0 : if (moebius_mode) return 0; /* stop now, with exponent updated */
2666 0 : continue;
2667 : }
2668 :
2669 3669 : *where -= 3;
2670 3669 : CLASS(*where) = CLASS(cur); /* class as given */
2671 3669 : newexp = EXPON(cur);
2672 3669 : if (newexp != gen_1) /* new exp > 1 */
2673 147 : e = (E == 1 && newexp == gen_2)? gen_2: mului(E, newexp);
2674 : else /* inherit parent's exponent */
2675 3522 : e = (E == 1 ? gen_1 : (E == 2 ? gen_2 : utoipos(E)));
2676 3669 : EXPON(*where) = e;
2677 : /* keep components younger than *partial */
2678 3669 : icopyifstack(factor, VALUE(*where));
2679 3669 : k++;
2680 3669 : if (DEBUGLEVEL >= 6)
2681 0 : err_printf("\tfactor no. %ld was unique%s\n", j, j>1? " (so far)...": "");
2682 : }
2683 3318 : return k;
2684 : }
2685 :
2686 : /* Split the first (composite) entry. There _must_ already be room for another
2687 : * factor below *where, and *where is updated. Two cases:
2688 : * - entry = factor^k is a pure power: factor^k is inserted, leaving *where
2689 : * unchanged;
2690 : * - entry = factor * cofactor (not necessarily coprime): both factors are
2691 : * inserted in the correct order, updating *where
2692 : * The inserted factors class is set to unknown, they inherit the exponent
2693 : * (or a multiple thereof) of their ancestor.
2694 : *
2695 : * Returns number of factors written into the structure, normally 2 (1 if pure
2696 : * power, maybe > 2 if a factoring engine returned a vector of factors instead
2697 : * of a single factor). Can reallocate the data structure in the
2698 : * vector-of-factors case, not in the most common single-factor case.
2699 : * Stack housekeeping: this routine may create one or more objects (a new
2700 : * factor, or possibly several, and perhaps one or more new exponents > 2) */
2701 : static long
2702 5965 : ifac_crack(GEN *partial, GEN *where, long moebius_mode)
2703 : {
2704 5965 : long cmp_res, hint = get_hint(partial);
2705 : GEN factor, exponent;
2706 :
2707 : #ifdef IFAC_DEBUG
2708 : ifac_check(*partial, *where);
2709 : if (*where < *partial + 6)
2710 : pari_err_BUG("ifac_crack ['*where' out of bounds]");
2711 : if (!(VALUE(*where)) || typ(VALUE(*where)) != t_INT)
2712 : pari_err_BUG("ifac_crack [incorrect VALUE(*where)]");
2713 : if (CLASS(*where) != gen_0)
2714 : pari_err_BUG("ifac_crack [operand not known composite]");
2715 : #endif
2716 :
2717 5965 : if (DEBUGLEVEL>2) {
2718 0 : err_printf("IFAC: cracking composite\n\t%Ps\n", **where);
2719 0 : if (DEBUGLEVEL>3) err_printf("IFAC: checking for pure square\n");
2720 : }
2721 : /* MPQS cannot factor prime powers. Look for pure powers even if MPQS is
2722 : * blocked by hint: fast and useful in bounded factorization */
2723 : {
2724 : forprime_t T;
2725 5965 : ulong exp = 1, mask = 7;
2726 5965 : long good = 0;
2727 5965 : pari_sp av = avma;
2728 5965 : (void)u_forprime_init(&T, 11, ULONG_MAX);
2729 : /* crack squares */
2730 6860 : while (Z_issquareall(VALUE(*where), &factor))
2731 : {
2732 902 : good = 1; /* remember we succeeded once */
2733 902 : update_pow(*where, factor, 2, &av);
2734 1553 : if (moebius_mode) return 0; /* no need to carry on */
2735 : }
2736 6000 : while ( (exp = is_357_power(VALUE(*where), &factor, &mask)) )
2737 : {
2738 42 : good = 1; /* remember we succeeded once */
2739 42 : update_pow(*where, factor, exp, &av);
2740 42 : if (moebius_mode) return 0; /* no need to carry on */
2741 : }
2742 : /* cutoff at 14 bits: OK if tridiv_bound >= 2^14 OR if >= 661 for
2743 : * an integer < 701^11 (103 bits). */
2744 5958 : while ( (exp = is_pth_power(VALUE(*where), &factor, &T, 15)) )
2745 : {
2746 0 : good = 1; /* remember we succeeded once */
2747 0 : update_pow(*where, factor, exp, &av);
2748 0 : if (moebius_mode) return 0; /* no need to carry on */
2749 : }
2750 :
2751 5958 : if (good && hint != 15 && ifac_checkprime(*where))
2752 : { /* our composite was a prime power */
2753 651 : if (DEBUGLEVEL>3)
2754 0 : err_printf("IFAC: factor %Ps\n\tis prime\n", VALUE(*where));
2755 651 : return 0; /* bypass subsequent ifac_whoiswho() call */
2756 : }
2757 : } /* pure power stage */
2758 :
2759 5307 : factor = NULL;
2760 5307 : if (!(hint & 4))
2761 : { /* pollardbrent() Rho usually gets a first chance */
2762 5258 : if (DEBUGLEVEL >= 4) err_printf("IFAC: trying Pollard-Brent rho method\n");
2763 5258 : factor = pollardbrent(VALUE(*where));
2764 5258 : if (!factor)
2765 : { /* Shanks' squfof() */
2766 4974 : if (DEBUGLEVEL >= 4)
2767 0 : err_printf("IFAC: trying Shanks' SQUFOF, will fail silently if input\n"
2768 : " is too large for it.\n");
2769 4974 : factor = squfof(VALUE(*where));
2770 : }
2771 : }
2772 5307 : if (!factor && !(hint & 2))
2773 : { /* First ECM stage */
2774 3273 : if (DEBUGLEVEL >= 4) err_printf("IFAC: trying Lenstra-Montgomery ECM\n");
2775 3273 : factor = ellfacteur(VALUE(*where), 0); /* do not insist */
2776 : }
2777 5307 : if (!factor && !(hint & 1))
2778 : { /* MPQS stage */
2779 3297 : if (DEBUGLEVEL >= 4) err_printf("IFAC: trying MPQS\n");
2780 3297 : factor = mpqs(VALUE(*where));
2781 : }
2782 5307 : if (!factor)
2783 : {
2784 15 : if (!(hint & 8))
2785 : { /* still no luck? Final ECM stage, guaranteed to succeed */
2786 8 : if (DEBUGLEVEL >= 4)
2787 0 : err_printf("IFAC: forcing ECM, may take some time\n");
2788 8 : factor = ellfacteur(VALUE(*where), 1);
2789 : }
2790 : else
2791 : { /* limited factorization */
2792 7 : if (DEBUGLEVEL >= 2)
2793 : {
2794 0 : if (hint != 15)
2795 0 : pari_warn(warner, "IFAC: unfactored composite declared prime");
2796 : else
2797 0 : pari_warn(warner, "IFAC: untested integer declared prime");
2798 :
2799 : /* don't print it out at level 3 or above, where it would appear
2800 : * several times before and after this message already */
2801 0 : if (DEBUGLEVEL == 2) err_printf("\t%Ps\n", VALUE(*where));
2802 : }
2803 7 : CLASS(*where) = gen_1; /* might as well trial-divide by it... */
2804 7 : return 1;
2805 : }
2806 : }
2807 5300 : if (typ(factor) == t_VEC) /* delegate this case */
2808 3318 : return ifac_insert_multiplet(partial, where, factor, moebius_mode);
2809 : /* typ(factor) == t_INT */
2810 : /* got single integer back: work out the cofactor (in place) */
2811 1982 : if (!dvdiiz(VALUE(*where), factor, VALUE(*where)))
2812 : {
2813 0 : err_printf("IFAC: factoring %Ps\n", VALUE(*where));
2814 0 : err_printf("\tyielded 'factor' %Ps\n\twhich isn't!\n", factor);
2815 0 : pari_err_BUG("factoring");
2816 : }
2817 : /* factoring engines report the factor found; tell about the cofactor */
2818 1982 : if (DEBUGLEVEL >= 4) err_printf("IFAC: cofactor = %Ps\n", VALUE(*where));
2819 :
2820 : /* The two factors are 'factor' and VALUE(*where), find out which is larger */
2821 1982 : cmp_res = cmpii(factor, VALUE(*where));
2822 1982 : CLASS(*where) = NULL; /* mark factor /cofactor 'unknown' */
2823 1982 : exponent = EXPON(*where);
2824 1982 : *where -= 3;
2825 1982 : CLASS(*where) = NULL; /* mark factor /cofactor 'unknown' */
2826 1982 : icopyifstack(exponent, EXPON(*where));
2827 1982 : if (cmp_res < 0)
2828 1773 : VALUE(*where) = factor; /* common case */
2829 209 : else if (cmp_res > 0)
2830 : { /* factor > cofactor, rearrange */
2831 209 : GEN old = *where + 3;
2832 209 : VALUE(*where) = VALUE(old); /* move cofactor pointer to lowest slot */
2833 209 : VALUE(old) = factor; /* save factor */
2834 : }
2835 0 : else pari_err_BUG("ifac_crack [Z_issquareall miss]");
2836 1982 : return 2;
2837 : }
2838 :
2839 : /* main loop: iterate until smallest entry is a finished prime; returns
2840 : * a 'where' pointer, or NULL if nothing left, or gen_0 in Moebius mode if
2841 : * we aren't squarefree */
2842 : static GEN
2843 14847 : ifac_main(GEN *partial)
2844 : {
2845 14847 : const long moebius_mode = !!MOEBIUS(*partial);
2846 14847 : GEN here = ifac_find(*partial);
2847 : long nf;
2848 :
2849 14847 : if (!here) return NULL; /* nothing left */
2850 : /* loop until first entry is a finished prime. May involve reallocations,
2851 : * thus updates of *partial */
2852 26457 : while (CLASS(here) != gen_2)
2853 : {
2854 16211 : if (CLASS(here) == gen_0) /* composite: crack it */
2855 : { /* make sure there's room for another factor */
2856 5965 : if (here < *partial + 6)
2857 : {
2858 0 : ifac_defrag(partial, &here);
2859 0 : if (here < *partial + 6) ifac_realloc(partial, &here, 1); /* no luck */
2860 : }
2861 5965 : nf = ifac_crack(partial, &here, moebius_mode);
2862 5965 : if (moebius_mode && EXPON(here) != gen_1) /* that was a power */
2863 : {
2864 14 : if (DEBUGLEVEL >= 3)
2865 0 : err_printf("IFAC: main loop: repeated new factor\n\t%Ps\n", *here);
2866 14 : return gen_0;
2867 : }
2868 : /* deal with the new unknowns. No sort: ifac_crack did it */
2869 5951 : ifac_whoiswho(partial, &here, nf);
2870 5951 : continue;
2871 : }
2872 10246 : if (CLASS(here) == gen_1) /* prime but not yet finished: finish it */
2873 : {
2874 10246 : if (ifac_divide(partial, &here, moebius_mode))
2875 : {
2876 7 : if (moebius_mode)
2877 : {
2878 0 : if (DEBUGLEVEL >= 3)
2879 0 : err_printf("IFAC: main loop: another factor was divisible by\n"
2880 : "\t%Ps\n", *here);
2881 0 : return gen_0;
2882 : }
2883 7 : ifac_resort(partial, &here); /* sort new cofactors down */
2884 7 : ifac_whoiswho(partial, &here, -1);
2885 : }
2886 10246 : continue;
2887 : }
2888 0 : pari_err_BUG("ifac_main [nonexistent factor class]");
2889 : } /* while */
2890 10246 : if (moebius_mode && EXPON(here) != gen_1)
2891 : {
2892 0 : if (DEBUGLEVEL >= 3)
2893 0 : err_printf("IFAC: after main loop: repeated old factor\n\t%Ps\n", *here);
2894 0 : return gen_0;
2895 : }
2896 10246 : if (DEBUGLEVEL >= 4)
2897 : {
2898 0 : nf = (*partial + lg(*partial) - here - 3)/3;
2899 0 : if (nf)
2900 0 : err_printf("IFAC: main loop: %ld factor%s left\n", nf, (nf>1)? "s": "");
2901 : else
2902 0 : err_printf("IFAC: main loop: this was the last factor\n");
2903 : }
2904 10246 : if (factor_add_primes && !(get_hint(partial) & 8))
2905 : {
2906 0 : GEN p = VALUE(here);
2907 0 : if (lgefint(p)>3 || uel(p,2) > 0x1000000UL) (void)addprimes(p);
2908 : }
2909 10246 : return here;
2910 : }
2911 :
2912 : /* Encapsulated routines */
2913 :
2914 : /* prime/exponent pairs need to appear contiguously on the stack, but we also
2915 : * need our data structure somewhere, and we don't know in advance how many
2916 : * primes will turn up. The following discipline achieves this: When
2917 : * ifac_decomp() is called, n should point at an object older than the oldest
2918 : * small prime/exponent pair (ifactor() guarantees this).
2919 : * We allocate sufficient space to accommodate several pairs -- eleven pairs
2920 : * ought to fit in a space not much larger than n itself -- before calling
2921 : * ifac_start(). If we manage to complete the factorization before we run out
2922 : * of space, we free the data structure and cull the excess reserved space
2923 : * before returning. When we do run out, we have to leapfrog to generate more
2924 : * (guesstimating the requirements from what is left in the partial
2925 : * factorization structure); room for fresh pairs is allocated at the head of
2926 : * the stack, followed by an ifac_realloc() to reconnect the data structure and
2927 : * move it out of the way, followed by a few pointer tweaks to connect the new
2928 : * pairs space to the old one. This whole affair translates into a surprisingly
2929 : * compact routine. */
2930 :
2931 : /* find primary factors of n; destroy n */
2932 : static long
2933 2634 : ifac_decomp(GEN n, long hint)
2934 : {
2935 2634 : pari_sp av = avma;
2936 2634 : long nb = 0;
2937 2634 : GEN part, here, workspc, pairs = (GEN)av;
2938 :
2939 : /* workspc will be doled out in pairs of smaller t_INTs. For n = prod p^{e_p}
2940 : * (p not necessarily prime), need room to store all p and e_p [ cgeti(3) ],
2941 : * bounded by
2942 : * sum_{p | n} ( log_{2^BIL} (p) + 6 ) <= log_{2^BIL} n + 6 log_2 n */
2943 2634 : workspc = new_chunk((expi(n) + 1) * 7);
2944 2634 : part = ifac_start_hint(n, 0, hint);
2945 : for (;;)
2946 : {
2947 7753 : here = ifac_main(&part);
2948 7753 : if (!here) break;
2949 5119 : if (gc_needed(av,1))
2950 : {
2951 : long offset;
2952 0 : if(DEBUGMEM>1)
2953 : {
2954 0 : pari_warn(warnmem,"[2] ifac_decomp");
2955 0 : ifac_print(part, here);
2956 : }
2957 0 : ifac_realloc(&part, &here, 0);
2958 0 : offset = here - part;
2959 0 : part = gerepileupto((pari_sp)workspc, part);
2960 0 : here = part + offset;
2961 : }
2962 5119 : nb++;
2963 5119 : pairs = icopy_avma(VALUE(here), (pari_sp)pairs);
2964 5119 : pairs = icopy_avma(EXPON(here), (pari_sp)pairs);
2965 5119 : ifac_delete(here);
2966 : }
2967 2634 : set_avma((pari_sp)pairs);
2968 2634 : if (DEBUGLEVEL >= 3)
2969 0 : err_printf("IFAC: found %ld large prime (power) factor%s.\n",
2970 : nb, (nb>1? "s": ""));
2971 2634 : return nb;
2972 : }
2973 :
2974 : /***********************************************************************/
2975 : /** ARITHMETIC FUNCTIONS WITH EARLY-ABORT **/
2976 : /** needing direct access to the factoring machinery to avoid work: **/
2977 : /** e.g. if we find a square factor, moebius returns 0, core doesn't **/
2978 : /** need to factor it, etc. **/
2979 : /***********************************************************************/
2980 : /* memory management */
2981 : static void
2982 0 : ifac_GC(pari_sp av, GEN *part)
2983 : {
2984 0 : GEN here = NULL;
2985 0 : if(DEBUGMEM>1) pari_warn(warnmem,"ifac_xxx");
2986 0 : ifac_realloc(part, &here, 0);
2987 0 : *part = gerepileupto(av, *part);
2988 0 : }
2989 :
2990 : /* destroys n */
2991 : static long
2992 211 : ifac_moebius(GEN n)
2993 : {
2994 211 : long mu = 1;
2995 211 : pari_sp av = avma;
2996 211 : GEN part = ifac_start(n, 1);
2997 : for(;;)
2998 394 : {
2999 : long v;
3000 : GEN p;
3001 605 : if (!ifac_next(&part,&p,&v)) return v? 0: mu;
3002 394 : mu = -mu;
3003 394 : if (gc_needed(av,1)) ifac_GC(av,&part);
3004 : }
3005 : }
3006 :
3007 : int
3008 682 : ifac_read(GEN part, GEN *p, long *e)
3009 : {
3010 682 : GEN here = ifac_find(part);
3011 682 : if (!here) return 0;
3012 352 : *p = VALUE(here);
3013 352 : *e = EXPON(here)[2];
3014 352 : return 1;
3015 : }
3016 : void
3017 308 : ifac_skip(GEN part)
3018 : {
3019 308 : GEN here = ifac_find(part);
3020 308 : if (here) ifac_delete(here);
3021 308 : }
3022 :
3023 : /* destroys n */
3024 : static int
3025 7 : ifac_ispowerful(GEN n)
3026 : {
3027 7 : pari_sp av = avma;
3028 7 : GEN part = ifac_start(n, 0);
3029 : for(;;)
3030 7 : {
3031 : long e;
3032 : GEN p;
3033 14 : if (!ifac_read(part,&p,&e)) return 1;
3034 : /* power: skip */
3035 7 : if (e != 1 || Z_isanypower(p,NULL)) { ifac_skip(part); continue; }
3036 0 : if (!ifac_next(&part,&p,&e)) return 1;
3037 0 : if (e == 1) return 0;
3038 0 : if (gc_needed(av,1)) ifac_GC(av,&part);
3039 : }
3040 : }
3041 : /* destroys n */
3042 : static GEN
3043 323 : ifac_core(GEN n)
3044 : {
3045 323 : GEN m = gen_1, c = cgeti(lgefint(n));
3046 323 : pari_sp av = avma;
3047 323 : GEN part = ifac_start(n, 0);
3048 : for(;;)
3049 345 : {
3050 : long e;
3051 : GEN p;
3052 668 : if (!ifac_read(part,&p,&e)) return m;
3053 : /* square: skip */
3054 345 : if (!odd(e) || Z_issquare(p)) { ifac_skip(part); continue; }
3055 44 : if (!ifac_next(&part,&p,&e)) return m;
3056 44 : if (odd(e)) m = mulii(m, p);
3057 44 : if (gc_needed(av,1)) { affii(m,c); m=c; ifac_GC(av,&part); }
3058 : }
3059 : }
3060 :
3061 : /* must be >= 661 (various functions assume it in order to call uisprime_661
3062 : * instead of uisprime, and Z_isanypower_nosmalldiv instead of Z_isanypower) */
3063 : ulong
3064 11832512 : tridiv_boundu(ulong n)
3065 : {
3066 11832512 : long e = expu(n);
3067 11831322 : if(e<30) return 1UL<<12;
3068 : #ifdef LONG_IS_64BIT
3069 403387 : if(e<34) return 1UL<<13;
3070 241214 : if(e<37) return 1UL<<14;
3071 146611 : if(e<42) return 1UL<<15;
3072 61431 : if(e<47) return 1UL<<16;
3073 33530 : if(e<56) return 1UL<<17;
3074 8433 : if(e<56) return 1UL<<18;
3075 8433 : if(e<62) return 1UL<<19;
3076 1742 : return 1UL<<18;
3077 : #else
3078 14357 : return 1UL<<13;
3079 : #endif
3080 : }
3081 :
3082 : /* Where to stop trial dividing in factorization. Must be >= 661.
3083 : * If further n > 2^512, must be >= 2^14 */
3084 : ulong
3085 881982 : tridiv_bound(GEN n)
3086 : {
3087 881982 : if (lgefint(n)==3) return tridiv_boundu(n[2]);
3088 : else
3089 : {
3090 88482 : ulong l = (ulong)expi(n) + 1;
3091 88482 : if (l <= 512) return (l-16) << 10;
3092 1196 : return 1UL<<19; /* Rho is generally faster above this */
3093 : }
3094 : }
3095 :
3096 : /* destroys n */
3097 : static void
3098 988 : ifac_factoru(GEN n, long hint, GEN P, GEN E, long *pi)
3099 : {
3100 988 : GEN part = ifac_start_hint(n, 0, hint);
3101 : for(;;)
3102 1892 : {
3103 : long v;
3104 : GEN p;
3105 2880 : if (!ifac_next(&part,&p,&v)) return;
3106 1892 : P[*pi] = itou(p);
3107 1892 : E[*pi] = v;
3108 1892 : (*pi)++;
3109 : }
3110 : }
3111 : /* destroys n */
3112 : static long
3113 684 : ifac_moebiusu(GEN n)
3114 : {
3115 684 : GEN part = ifac_start(n, 1);
3116 684 : long s = 1;
3117 : for(;;)
3118 1368 : {
3119 : long v;
3120 : GEN p;
3121 2052 : if (!ifac_next(&part,&p,&v)) return v? 0: s;
3122 1368 : s = -s;
3123 : }
3124 : }
3125 :
3126 : INLINE ulong
3127 198603637 : u_forprime_next_fast(forprime_t *T)
3128 : {
3129 198603637 : if (++T->n <= pari_PRIMES[0])
3130 : {
3131 198604029 : T->p = pari_PRIMES[T->n];
3132 198604029 : return T->p > T->b ? 0: T->p;
3133 : }
3134 0 : return u_forprime_next(T);
3135 : }
3136 :
3137 : /* uisprime(n) knowing n has no prime divisor <= lim */
3138 : static int
3139 18763 : uisprime_nosmall(ulong n, ulong lim)
3140 18763 : { return (lim >= 661)? uisprime_661(n): uisprime(n); }
3141 :
3142 : /* Factor n and output [p,e] where
3143 : * p, e are vecsmall with n = prod{p[i]^e[i]}. If all != 0:
3144 : * if pU1 is not NULL, set *pU1 and *pU2 so that unfactored composite is
3145 : * U1^U2 with U1 not a pure power; else include it in factorization */
3146 : static GEN
3147 20482074 : factoru_sign(ulong n, ulong all, long hint, ulong *pU1, ulong *pU2)
3148 : {
3149 : GEN f, E, E2, P, P2;
3150 : pari_sp av;
3151 20482074 : ulong p, lim = 0;
3152 20482074 : long i, oldi = -1;
3153 : forprime_t S;
3154 :
3155 20482074 : if (pU1) *pU1 = *pU2 = 1;
3156 20482074 : if (n == 0) retmkvec2(mkvecsmall(0), mkvecsmall(1));
3157 20482074 : if (n == 1) retmkvec2(cgetg(1,t_VECSMALL), cgetg(1,t_VECSMALL));
3158 :
3159 20032351 : f = cgetg(3,t_VEC); av = avma;
3160 : /* enough room to store <= 15 primes and exponents (OK if n < 2^64) */
3161 20032241 : (void)new_chunk(16*2);
3162 20031991 : P = cgetg(16, t_VECSMALL); i = 1;
3163 20031617 : E = cgetg(16, t_VECSMALL);
3164 20031634 : if (!all || all > 2)
3165 : {
3166 : ulong maxp;
3167 20031633 : long v = vals(n);
3168 20031604 : if (v)
3169 : {
3170 11787167 : P[1] = 2; E[1] = v; i = 2;
3171 11787167 : n >>= v; if (n == 1) goto END;
3172 : }
3173 16758849 : maxp = maxprime();
3174 16759331 : if (n <= maxp && PRIMES_search(n) > 0) { P[i] = n; E[i] = 1; i++; goto END; }
3175 10319703 : lim = minss(usqrt(n), all? all-1: tridiv_boundu(n));
3176 10319675 : if (!(hint & 16) && lim >= 128) /* expu(lim) >= 7 */
3177 18762 : { /* fast trial division */
3178 2564382 : GEN PR = prodprimes();
3179 2564381 : long nPR = lg(PR)-1, b = minss(nPR, expu(lim)-6);
3180 2564380 : ulong nr = ugcd(n, umodiu(gel(PR,b), n));
3181 2564387 : if (nr != 1)
3182 : {
3183 2557943 : GEN F = factoru_sign(nr, all, 1 + 2 + 16, NULL, NULL), Q = gel(F,1);
3184 2557952 : long j, l = lg(Q);
3185 8346314 : for (j = 1; j < l; j++)
3186 : {
3187 5788366 : ulong p = uel(Q,j);
3188 5788366 : if (all && p >= all) break; /* may occur for last p */
3189 5788360 : E[i] = u_lvalrem(n, p, &n); /* > 0 */
3190 5788362 : P[i] = p; i++;
3191 : }
3192 2557954 : if (n == 1) goto END;
3193 783620 : if (n <= maxp
3194 771301 : && PRIMES_search(n) > 0) { P[i] = n; E[i] = 1; i++; goto END; }
3195 : }
3196 18762 : maxp = GP_DATA->factorlimit;
3197 : }
3198 : else
3199 : {
3200 7755293 : maxp = lim;
3201 7755293 : u_forprime_init(&S, 3, lim);
3202 67567696 : while ( (p = u_forprime_next_fast(&S)) )
3203 : {
3204 : int stop;
3205 : /* tiny integers without small factors are often primes */
3206 67567488 : if (p == 673)
3207 : {
3208 7807875 : if (uisprime_661(n)) { P[i] = n; E[i] = 1; i++; goto END; }
3209 52774 : oldi = i;
3210 : }
3211 67567142 : v = u_lvalrem_stop(&n, p, &stop);
3212 67567150 : if (v) {
3213 10547644 : P[i] = p;
3214 10547644 : E[i] = v; i++;
3215 : }
3216 67567150 : if (stop) {
3217 7754755 : if (n != 1) { P[i] = n; E[i] = 1; i++; }
3218 7754755 : goto END;
3219 : }
3220 : }
3221 : }
3222 19344 : if (lim > maxp)
3223 : { /* second pass usually empty, outside fast trial division range */
3224 : long v;
3225 6 : u_forprime_init(&S, maxp+1, lim);
3226 5296866 : while ((p = u_forprime_next(&S)))
3227 : {
3228 : int stop;
3229 5296866 : v = u_lvalrem_stop(&n, p, &stop);
3230 5296866 : if (v) {
3231 6 : P[i] = p;
3232 6 : E[i] = v; i++;
3233 : }
3234 5296866 : if (stop) {
3235 6 : if (n != 1) { P[i] = n; E[i] = 1; i++; }
3236 6 : goto END;
3237 : }
3238 : }
3239 : }
3240 : }
3241 : /* if i > oldi (includes oldi = -1) we don't know that n is composite */
3242 19339 : if (all)
3243 : { /* smallfact: look for easy pure powers then stop */
3244 : #ifdef LONG_IS_64BIT
3245 1206 : ulong mask = all > 563 ? (all > 7129 ? 1: 3): 7;
3246 : #else
3247 25 : ulong mask = all > 22 ? (all > 83 ? 1: 3): 7;
3248 : #endif
3249 1231 : long k = 1, ex;
3250 1724 : while (uissquareall(n, &n)) k <<= 1;
3251 1244 : while ( (ex = uis_357_power(n, &n, &mask)) ) k *= ex;
3252 1231 : if (pU1 && (i == oldi || !uisprime_nosmall(n, lim)))
3253 254 : { *pU1 = n; *pU2 = (ulong)k; }
3254 : else
3255 977 : { P[i] = n; E[i] = k; i++; }
3256 1231 : goto END;
3257 : }
3258 : /* we don't known that n is composite ? */
3259 18108 : if (oldi != i && uisprime_nosmall(n, lim)) { P[i]=n; E[i]=1; i++; goto END; }
3260 :
3261 : {
3262 : GEN perm;
3263 988 : ifac_factoru(utoipos(n), hint, P, E, &i);
3264 988 : setlg(P, i);
3265 988 : perm = vecsmall_indexsort(P);
3266 988 : P = vecsmallpermute(P, perm);
3267 988 : E = vecsmallpermute(E, perm);
3268 : }
3269 20033196 : END:
3270 20033196 : set_avma(av);
3271 20032602 : P2 = cgetg(i, t_VECSMALL); gel(f,1) = P2;
3272 20032294 : E2 = cgetg(i, t_VECSMALL); gel(f,2) = E2;
3273 61674319 : while (--i >= 1) { P2[i] = P[i]; E2[i] = E[i]; }
3274 20032025 : return f;
3275 : }
3276 : GEN
3277 3803475 : factoru(ulong n)
3278 3803475 : { return factoru_sign(n, 0, decomp_default_hint, NULL, NULL); }
3279 :
3280 : ulong
3281 0 : radicalu(ulong n)
3282 : {
3283 0 : pari_sp av = avma;
3284 0 : return gc_long(av, zv_prod(gel(factoru(n),1)));
3285 : }
3286 :
3287 : long
3288 54194 : moebiusu_fact(GEN f)
3289 : {
3290 54194 : GEN E = gel(f,2);
3291 54194 : long i, l = lg(E);
3292 93569 : for (i = 1; i < l; i++)
3293 57834 : if (E[i] > 1) return 0;
3294 35735 : return odd(l)? 1: -1;
3295 : }
3296 :
3297 : long
3298 2512109 : moebiusu(ulong n)
3299 : {
3300 : pari_sp av;
3301 : ulong p;
3302 : long s, v, test_prime;
3303 : forprime_t S;
3304 :
3305 2512109 : switch(n)
3306 : {
3307 0 : case 0: (void)check_arith_non0(gen_0,"moebius");/*error*/
3308 570898 : case 1: return 1;
3309 106777 : case 2: return -1;
3310 : }
3311 1849692 : v = vals(n);
3312 1849261 : if (v == 0)
3313 1736996 : s = 1;
3314 : else
3315 : {
3316 112265 : if (v > 1) return 0;
3317 23800 : n >>= 1;
3318 23800 : s = -1;
3319 : }
3320 1760796 : av = avma;
3321 1760796 : u_forprime_init(&S, 3, tridiv_boundu(n));
3322 1772844 : test_prime = 0;
3323 40583020 : while ((p = u_forprime_next_fast(&S)))
3324 : {
3325 : int stop;
3326 : /* tiny integers without small factors are often primes */
3327 40579947 : if (p == 673)
3328 : {
3329 3752 : test_prime = 0;
3330 1771403 : if (uisprime_661(n)) return gc_long(av,-s);
3331 : }
3332 40578499 : v = u_lvalrem_stop(&n, p, &stop);
3333 40575460 : if (v) {
3334 1318309 : if (v > 1) return gc_long(av,0);
3335 1136714 : test_prime = 1;
3336 1136714 : s = -s;
3337 : }
3338 40393865 : if (stop) return gc_long(av, n==1? s: -s);
3339 : }
3340 872 : set_avma(av);
3341 1020 : if (test_prime && uisprime_661(n)) return -s;
3342 : else
3343 : {
3344 684 : long t = ifac_moebiusu(utoipos(n));
3345 684 : set_avma(av);
3346 684 : if (t == 0) return 0;
3347 684 : return (s == t)? 1: -1;
3348 : }
3349 : }
3350 :
3351 : long
3352 56055 : moebius(GEN n)
3353 : {
3354 56055 : pari_sp av = avma;
3355 : GEN F;
3356 : ulong p;
3357 : long i, l, s, v;
3358 : forprime_t S;
3359 :
3360 56055 : if ((F = check_arith_non0(n,"moebius")))
3361 : {
3362 : GEN E;
3363 749 : F = clean_Z_factor(F);
3364 728 : E = gel(F,2);
3365 728 : l = lg(E);
3366 1428 : for(i = 1; i < l; i++)
3367 980 : if (!equali1(gel(E,i))) return gc_long(av,0);
3368 448 : return gc_long(av, odd(l)? 1: -1);
3369 : }
3370 55172 : if (lgefint(n) == 3) return moebiusu(uel(n,2));
3371 808 : p = mod4(n); if (!p) return 0;
3372 808 : if (p == 2) { s = -1; n = shifti(n, -1); } else { s = 1; n = icopy(n); }
3373 808 : setabssign(n);
3374 :
3375 808 : u_forprime_init(&S, 3, tridiv_bound(n));
3376 2322026 : while ((p = u_forprime_next_fast(&S)))
3377 : {
3378 : int stop;
3379 2321446 : v = Z_lvalrem_stop(&n, p, &stop);
3380 2321446 : if (v)
3381 : {
3382 1540 : if (v > 1) return gc_long(av,0);
3383 1312 : s = -s;
3384 1312 : if (stop) return gc_long(av, is_pm1(n)? s: -s);
3385 : }
3386 : }
3387 580 : l = lg(primetab);
3388 590 : for (i = 1; i < l; i++)
3389 : {
3390 25 : v = Z_pvalrem(n, gel(primetab,i), &n);
3391 25 : if (v)
3392 : {
3393 25 : if (v > 1) return gc_long(av,0);
3394 11 : s = -s;
3395 11 : if (is_pm1(n)) return gc_long(av,s);
3396 : }
3397 : }
3398 565 : if (ifac_isprime(n)) return gc_long(av,-s);
3399 : /* large composite without small factors */
3400 211 : v = ifac_moebius(n);
3401 211 : return gc_long(av, s < 0? -v: v); /* correct also if v==0 */
3402 : }
3403 :
3404 : long
3405 1708 : ispowerful(GEN n)
3406 : {
3407 1708 : pari_sp av = avma;
3408 : GEN F;
3409 : ulong p, bound;
3410 : long i, l, v;
3411 : forprime_t S;
3412 :
3413 1708 : if ((F = check_arith_all(n, "ispowerful")))
3414 : {
3415 742 : GEN p, P = gel(F,1), E = gel(F,2);
3416 742 : if (lg(P) == 1) return 1; /* 1 */
3417 728 : p = gel(P,1);
3418 728 : if (!signe(p)) return 1; /* 0 */
3419 707 : i = is_pm1(p)? 2: 1; /* skip -1 */
3420 707 : l = lg(E);
3421 980 : for (; i < l; i++)
3422 847 : if (equali1(gel(E,i))) return 0;
3423 133 : return 1;
3424 : }
3425 966 : if (!signe(n)) return 1;
3426 :
3427 952 : if (mod4(n) == 2) return 0;
3428 623 : n = shifti(n, -vali(n));
3429 623 : if (is_pm1(n)) return 1;
3430 546 : setabssign(n);
3431 546 : bound = tridiv_bound(n);
3432 546 : u_forprime_init(&S, 3, bound);
3433 307797 : while ((p = u_forprime_next_fast(&S)))
3434 : {
3435 : int stop;
3436 307790 : v = Z_lvalrem_stop(&n, p, &stop);
3437 307790 : if (v)
3438 : {
3439 742 : if (v == 1) return gc_long(av,0);
3440 203 : if (stop) return gc_long(av, is_pm1(n));
3441 : }
3442 : }
3443 7 : l = lg(primetab);
3444 7 : for (i = 1; i < l; i++)
3445 : {
3446 0 : v = Z_pvalrem(n, gel(primetab,i), &n);
3447 0 : if (v)
3448 : {
3449 0 : if (v == 1) return gc_long(av,0);
3450 0 : if (is_pm1(n)) return gc_long(av,1);
3451 : }
3452 : }
3453 : /* no need to factor: must be p^2 or not powerful */
3454 7 : if (cmpii(powuu(bound+1, 3), n) > 0) return gc_long(av, Z_issquare(n));
3455 :
3456 7 : if (ifac_isprime(n)) return gc_long(av,0);
3457 : /* large composite without small factors */
3458 7 : return gc_long(av, ifac_ispowerful(n));
3459 : }
3460 :
3461 : ulong
3462 62031 : coreu_fact(GEN f)
3463 : {
3464 62031 : GEN P = gel(f,1), E = gel(f,2);
3465 62031 : long i, l = lg(P), m = 1;
3466 112281 : for (i = 1; i < l; i++)
3467 : {
3468 50250 : ulong p = P[i], e = E[i];
3469 50250 : if (e & 1) m *= p;
3470 : }
3471 62031 : return m;
3472 : }
3473 : ulong
3474 62031 : coreu(ulong n)
3475 : {
3476 : pari_sp av;
3477 62031 : if (n == 0) return 0;
3478 62031 : av = avma; return gc_ulong(av, coreu_fact(factoru(n)));
3479 : }
3480 : GEN
3481 708670 : core(GEN n)
3482 : {
3483 708670 : pari_sp av = avma;
3484 : GEN m, F;
3485 : ulong p;
3486 : long i, l, v;
3487 : forprime_t S;
3488 :
3489 708670 : if ((F = check_arith_all(n, "core")))
3490 : {
3491 646236 : GEN p, x, P = gel(F,1), E = gel(F,2);
3492 646236 : long j = 1;
3493 646236 : if (lg(P) == 1) return gen_1;
3494 646208 : p = gel(P,1);
3495 646208 : if (!signe(p)) return gen_0;
3496 646166 : l = lg(P); x = cgetg(l, t_VEC);
3497 2283011 : for (i = 1; i < l; i++)
3498 1636857 : if (mpodd(gel(E,i))) gel(x,j++) = gel(P,i);
3499 646154 : setlg(x, j); return ZV_prod(x);
3500 : }
3501 62410 : switch(lgefint(n))
3502 : {
3503 28 : case 2: return gen_0;
3504 61975 : case 3:
3505 61975 : p = coreu(uel(n,2));
3506 61975 : return signe(n) > 0? utoipos(p): utoineg(p);
3507 : }
3508 :
3509 407 : m = signe(n) < 0? gen_m1: gen_1;
3510 407 : n = absi_shallow(n);
3511 408 : u_forprime_init(&S, 2, tridiv_bound(n));
3512 5081673 : while ((p = u_forprime_next_fast(&S)))
3513 : {
3514 : int stop;
3515 5081332 : v = Z_lvalrem_stop(&n, p, &stop);
3516 5081332 : if (v)
3517 : {
3518 1577 : if (v & 1) m = muliu(m, p);
3519 1577 : if (stop)
3520 : {
3521 67 : if (!is_pm1(n)) m = mulii(m, n);
3522 67 : return gerepileuptoint(av, m);
3523 : }
3524 : }
3525 : }
3526 341 : l = lg(primetab);
3527 835 : for (i = 1; i < l; i++)
3528 : {
3529 502 : GEN q = gel(primetab,i);
3530 502 : v = Z_pvalrem(n, q, &n);
3531 502 : if (v)
3532 : {
3533 32 : if (v & 1) m = mulii(m, q);
3534 32 : if (is_pm1(n)) return gerepileuptoint(av, m);
3535 : }
3536 : }
3537 333 : if (ifac_isprime(n)) { m = mulii(m, n); return gerepileuptoint(av, m); }
3538 323 : if (m == gen_1) n = icopy(n); /* ifac_core destroys n */
3539 : /* large composite without small factors */
3540 323 : return gerepileuptoint(av, mulii(m, ifac_core(n)));
3541 : }
3542 :
3543 : long
3544 0 : Z_issmooth(GEN m, ulong lim)
3545 : {
3546 0 : pari_sp av = avma;
3547 0 : ulong p = 2;
3548 : forprime_t S;
3549 0 : u_forprime_init(&S, 2, lim);
3550 0 : while ((p = u_forprime_next_fast(&S)))
3551 : {
3552 : int stop;
3553 0 : (void)Z_lvalrem_stop(&m, p, &stop);
3554 0 : if (stop) return gc_long(av, abscmpiu(m,lim) <= 0);
3555 : }
3556 0 : return gc_long(av,0);
3557 : }
3558 :
3559 : GEN
3560 179041 : Z_issmooth_fact(GEN m, ulong lim)
3561 : {
3562 179041 : pari_sp av = avma;
3563 : GEN F, P, E;
3564 : ulong p;
3565 179041 : long i = 1, l = expi(m)+1;
3566 : forprime_t S;
3567 179013 : P = cgetg(l, t_VECSMALL);
3568 178992 : E = cgetg(l, t_VECSMALL); F = mkmat2(P,E);
3569 178959 : if (l == 1) return F; /* m == 1 */
3570 178917 : u_forprime_init(&S, 2, lim);
3571 47519401 : while ((p = u_forprime_next_fast(&S)))
3572 : {
3573 : long v;
3574 : int stop;
3575 47463725 : if ((v = Z_lvalrem_stop(&m, p, &stop)))
3576 : {
3577 653845 : P[i] = p;
3578 653845 : E[i] = v; i++;
3579 653845 : if (stop)
3580 : {
3581 124231 : if (abscmpiu(m,lim) > 0) break;
3582 111695 : if (m[2] > 1) { P[i] = m[2]; E[i] = 1; i++; }
3583 111695 : setlg(P, i);
3584 111728 : setlg(E, i); return gc_const((pari_sp)F, F);
3585 : }
3586 : }
3587 : }
3588 65482 : return gc_NULL(av);
3589 : }
3590 :
3591 : /***********************************************************************/
3592 : /** **/
3593 : /** COMPUTING THE MATRIX OF PRIME DIVISORS AND EXPONENTS **/
3594 : /** **/
3595 : /***********************************************************************/
3596 : static GEN
3597 147389 : aux_end(GEN M, GEN n, long nb)
3598 : {
3599 147389 : GEN P,E, z = (GEN)avma;
3600 : long i;
3601 :
3602 147389 : guncloneNULL(n);
3603 147389 : P = cgetg(nb+1,t_COL);
3604 147389 : E = cgetg(nb+1,t_COL);
3605 1118997 : for (i=nb; i; i--)
3606 : { /* allow a stackdummy in the middle */
3607 1046696 : while (typ(z) != t_INT) z += lg(z);
3608 971608 : gel(E,i) = z; z += lg(z);
3609 971608 : gel(P,i) = z; z += lg(z);
3610 : }
3611 147389 : gel(M,1) = P;
3612 147389 : gel(M,2) = E;
3613 147389 : return sort_factor(M, (void*)&abscmpii, cmp_nodata);
3614 : }
3615 :
3616 : static void
3617 966489 : STORE(long *nb, GEN x, long e) { (*nb)++; (void)x; (void)utoipos(e); }
3618 : static void
3619 916170 : STOREu(long *nb, ulong x, long e) { STORE(nb, utoipos(x), e); }
3620 : static void
3621 50167 : STOREi(long *nb, GEN x, long e) { STORE(nb, icopy(x), e); }
3622 : /* no prime less than p divides n; return 1 if factored completely */
3623 : static int
3624 39453 : special_primes(GEN n, ulong p, long *nb, GEN T)
3625 : {
3626 39453 : long i, l = lg(T);
3627 39453 : if (l > 1)
3628 : { /* pp = square of biggest p tried so far */
3629 540 : long pp[] = { evaltyp(t_INT)|_evallg(4), 0,0,0 };
3630 540 : pari_sp av = avma; affii(sqru(p), pp); set_avma(av);
3631 :
3632 1184 : for (i = 1; i < l; i++)
3633 777 : if (dvdiiz(n, gel(T,i), n))
3634 : {
3635 329 : long k = 1; while (dvdiiz(n, gel(T,i), n)) k++;
3636 231 : STOREi(nb, gel(T,i), k);
3637 231 : if (abscmpii(pp, n) > 0)
3638 : {
3639 133 : if (!is_pm1(n)) STOREi(nb, n, 1);
3640 133 : return 1;
3641 : }
3642 : }
3643 : }
3644 39320 : return 0;
3645 : }
3646 :
3647 : /* factor(sn*|n|), where sn = -1 or 1.
3648 : * all != 0 : only look for prime divisors < all. If pU is not NULL,
3649 : * set it to unfactored composite */
3650 : static GEN
3651 14267580 : ifactor_sign(GEN n, ulong all, long hint, long sn, GEN *pU)
3652 : {
3653 : GEN M, N;
3654 : pari_sp av;
3655 14267580 : long nb = 0, nb0 = -1, i;
3656 : ulong lim;
3657 : forprime_t T;
3658 :
3659 14267580 : if (lgefint(n) == 3)
3660 : { /* small integer */
3661 14120199 : GEN f, Pf, Ef, P, E, F = cgetg(3, t_MAT);
3662 : ulong U1, U2;
3663 : long l;
3664 14120542 : av = avma;
3665 : /* enough room to store <= 15 primes and exponents (OK if n < 2^64) */
3666 14120542 : (void)new_chunk((15*3 + 15 + 1) * 2);
3667 14120537 : f = factoru_sign(uel(n,2), all, hint, pU? &U1: NULL, pU? &U2: NULL);
3668 14120792 : set_avma(av);
3669 14120773 : Pf = gel(f,1);
3670 14120773 : Ef = gel(f,2);
3671 14120773 : l = lg(Pf);
3672 14120773 : if (sn < 0)
3673 : { /* add sign */
3674 6519 : long L = l+1;
3675 6519 : gel(F,1) = P = cgetg(L, t_COL);
3676 6519 : gel(F,2) = E = cgetg(L, t_COL);
3677 6519 : gel(P,1) = gen_m1; P++;
3678 6519 : gel(E,1) = gen_1; E++;
3679 : }
3680 : else
3681 : {
3682 14114254 : gel(F,1) = P = cgetg(l, t_COL);
3683 14114217 : gel(F,2) = E = cgetg(l, t_COL);
3684 : }
3685 43490738 : for (i = 1; i < l; i++)
3686 : {
3687 29370084 : gel(P,i) = utoipos(Pf[i]);
3688 29370108 : gel(E,i) = utoipos(Ef[i]);
3689 : }
3690 14120654 : if (pU) *pU = U1 == 1? NULL: mkvec2(utoipos(U1), utoipos(U2));
3691 14120654 : return F;
3692 : }
3693 147381 : if (pU) *pU = NULL;
3694 147381 : M = cgetg(3,t_MAT);
3695 147389 : if (sn < 0) STORE(&nb, utoineg(1), 1);
3696 147389 : if (is_pm1(n)) return aux_end(M,NULL,nb);
3697 :
3698 147389 : n = N = gclone(n); setabssign(n);
3699 : /* trial division bound; look for primes <= lim; nb is the number of
3700 : * distinct prime factors so far; if nb0 >= 0, it records the value of nb
3701 : * for which we made a successful compositeness test: if later nb = nb0,
3702 : * we know that n is composite */
3703 147389 : lim = 1;
3704 147389 : if (!all || all > 2)
3705 : { /* trial divide p < all if all != 0, else p <= tridiv_bound() */
3706 : ulong maxp, p;
3707 : pari_sp av2;
3708 147375 : i = vali(n);
3709 147375 : if (i)
3710 : {
3711 76572 : STOREu(&nb, 2, i);
3712 76572 : av = avma; affii(shifti(n,-i), n); set_avma(av);
3713 : }
3714 147375 : if (is_pm1(n)) return aux_end(M,n,nb);
3715 147240 : lim = all? all-1: tridiv_bound(n);
3716 147240 : if (!(hint & 16) && lim >= 128) /* expu(lim) >= 7 */
3717 38298 : { /* fast trial division */
3718 126960 : GEN nr, PR = prodprimes();
3719 126960 : long nPR = lg(PR)-1, b = minss(nPR, expu(lim)-6);
3720 126960 : av = avma; maxp = GP_DATA->factorlimit;
3721 126960 : nr = gcdii(gel(PR,b), n);
3722 126960 : if (is_pm1(nr)) { set_avma(av); av2 = av; }
3723 : else
3724 : {
3725 124165 : GEN F = ifactor_sign(nr, all, 1 + 2 + 16, 1, NULL), P = gel(F,1);
3726 124165 : long l = lg(P);
3727 124165 : av2 = avma;
3728 725322 : for (i = 1; i < l; i++)
3729 : {
3730 602142 : pari_sp av3 = avma;
3731 602142 : GEN gp = gel(P,i);
3732 602142 : ulong p = gp[2];
3733 : long k;
3734 602142 : if (lgefint(gp)>3 || (all && p>=all)) break; /* may occur for last p */
3735 601157 : k = Z_lvalrem(n, p, &n); /* > 0 */
3736 601157 : affii(n, N); n = N; set_avma(av3);
3737 601157 : STOREu(&nb, p, k);
3738 : }
3739 124165 : if (is_pm1(n))
3740 : {
3741 88662 : stackdummy(av, av2);
3742 88662 : return aux_end(M,n,nb);
3743 : }
3744 : }
3745 : }
3746 : else
3747 : { /* naive trial division */
3748 20280 : av = avma; maxp = maxprime();
3749 20280 : u_forprime_init(&T, 3, minss(lim, maxp)); av2 = avma;
3750 : /* first pass: known to fit in private prime table */
3751 35229334 : while ((p = u_forprime_next_fast(&T)))
3752 : {
3753 35229169 : pari_sp av3 = avma;
3754 : int stop;
3755 35229169 : long k = Z_lvalrem_stop(&n, p, &stop);
3756 35228057 : if (k)
3757 : {
3758 238440 : affii(n, N); n = N; set_avma(av3);
3759 238440 : STOREu(&nb, p, k);
3760 : }
3761 : /* prodeuler(p=2,16381,1-1/p) ~ 0.0578; if probability of being prime
3762 : * knowing P^-(n) > 16381 is at least 10%, try BPSW */
3763 35228192 : if (!stop && p == 16381)
3764 : {
3765 3174 : if (bit_accuracy_mul(lgefint(n), 0.0578 * M_LN2) < 10)
3766 3174 : { nb0 = nb; stop = ifac_isprime(n); }
3767 : }
3768 35228192 : if (stop)
3769 : {
3770 19138 : if (!is_pm1(n)) STOREi(&nb, n, 1);
3771 19138 : stackdummy(av, av2);
3772 19138 : return aux_end(M,n,nb);
3773 : }
3774 : }
3775 : }
3776 39435 : stackdummy(av, av2);
3777 39440 : if (lim > maxp)
3778 : { /* second pass usually empty, outside fast trial division range */
3779 1 : av = avma; u_forprime_init(&T, maxp+1, lim); av2 = avma;
3780 882811 : while ((p = u_forprime_next(&T)))
3781 : {
3782 882811 : pari_sp av3 = avma;
3783 : int stop;
3784 882811 : long k = Z_lvalrem_stop(&n, p, &stop);
3785 882811 : if (k)
3786 : {
3787 1 : affii(n, N); n = N; set_avma(av3);
3788 1 : STOREu(&nb, p, k);
3789 : }
3790 882811 : if (stop)
3791 : {
3792 1 : if (!is_pm1(n)) STOREi(&nb, n, 1);
3793 1 : stackdummy(av, av2);
3794 1 : return aux_end(M,n,nb);
3795 : }
3796 : }
3797 0 : stackdummy(av, av2);
3798 : }
3799 : }
3800 39453 : if (special_primes(n, lim, &nb, primetab)) return aux_end(M,n, nb);
3801 : /* if nb > nb0 (includes nb0 = -1) we don't know that n is composite */
3802 39320 : if (all)
3803 : { /* smallfact: look for easy pure powers then stop. Cf Z_isanypower */
3804 : GEN x;
3805 31252 : long k, e = expu(lim);
3806 31252 : av = avma;
3807 30144 : k = e >= 10? Z_isanypower_nosmalldiv(n, e, &x)
3808 31252 : : Z_isanypower(n, &x);
3809 31252 : if (k > 1) { affii(x, n); nb0 = -1; } else if (k < 1) k = 1;
3810 31252 : if (pU)
3811 : {
3812 : GEN F;
3813 12936 : if (abscmpiu(n, lim) <= 0
3814 12936 : || cmpii(n, sqru(lim)) <= 0
3815 8617 : || ((e >= 14) &&
3816 7856 : (nb>nb0 && bit_accuracy(lgefint(n))<2048 && ifac_isprime(n))))
3817 12936 : { set_avma(av); STOREi(&nb, n, k); return aux_end(M,n, nb); }
3818 5747 : set_avma(av); F = aux_end(M, NULL, nb); /* don't destroy n */
3819 5747 : *pU = mkvec2(icopy(n), utoipos(k)); /* composite cofactor */
3820 5747 : gunclone(n); return F;
3821 : }
3822 18316 : set_avma(av); STOREi(&nb, n, k);
3823 18316 : if (DEBUGLEVEL >= 2) {
3824 0 : pari_warn(warner,
3825 0 : "IFAC: untested %ld-bit integer declared prime", expi(n)+1);
3826 0 : if (expi(n) <= 256) err_printf("\t%Ps\n", n);
3827 : }
3828 : }
3829 8068 : else if (nb > nb0 && ifac_isprime(n)) STOREi(&nb, n, 1);
3830 2634 : else nb += ifac_decomp(n, hint);
3831 26384 : return aux_end(M,n, nb);
3832 : }
3833 :
3834 : static GEN
3835 9686995 : ifactor(GEN n, ulong all, long hint)
3836 : {
3837 9686995 : long s = signe(n);
3838 9686995 : if (!s) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3839 9686946 : return ifactor_sign(n, all, hint, s, NULL);
3840 : }
3841 :
3842 : int
3843 7094 : ifac_next(GEN *part, GEN *p, long *e)
3844 : {
3845 7094 : GEN here = ifac_main(part);
3846 7094 : if (here == gen_0) { *p = NULL; *e = 1; return 0; }
3847 7080 : if (!here) { *p = NULL; *e = 0; return 0; }
3848 5127 : *p = VALUE(here);
3849 5127 : *e = EXPON(here)[2];
3850 5127 : ifac_delete(here); return 1;
3851 : }
3852 :
3853 : /* see before ifac_crack for current semantics of 'hint' (factorint's 'flag') */
3854 : GEN
3855 10290 : factorint(GEN n, long flag)
3856 : {
3857 : GEN F;
3858 10290 : if ((F = check_arith_all(n,"factorint"))) return gcopy(F);
3859 10276 : return ifactor(n,0,flag);
3860 : }
3861 :
3862 : GEN
3863 49453 : Z_factor_limit(GEN n, ulong all)
3864 : {
3865 49453 : if (!all) all = GP_DATA->factorlimit + 1;
3866 49453 : return ifactor(n, all, decomp_default_hint);
3867 : }
3868 : GEN
3869 886431 : absZ_factor_limit_strict(GEN n, ulong all, GEN *pU)
3870 : {
3871 : GEN F, U;
3872 886431 : if (!signe(n))
3873 : {
3874 0 : if (pU) *pU = NULL;
3875 0 : retmkmat2(mkcol(gen_0), mkcol(gen_1));
3876 : }
3877 886431 : if (!all) all = GP_DATA->factorlimit + 1;
3878 886431 : F = ifactor_sign(n, all, decomp_default_hint, 1, &U);
3879 886461 : if (pU) *pU = U;
3880 886461 : return F;
3881 : }
3882 : GEN
3883 290566 : absZ_factor_limit(GEN n, ulong all)
3884 : {
3885 290566 : if (!signe(n)) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3886 290566 : if (!all) all = GP_DATA->factorlimit + 1;
3887 290566 : return ifactor_sign(n, all, decomp_default_hint, 1, NULL);
3888 : }
3889 : GEN
3890 9627238 : Z_factor(GEN n)
3891 9627238 : { return ifactor(n,0,decomp_default_hint); }
3892 : GEN
3893 3276492 : absZ_factor(GEN n)
3894 : {
3895 3276492 : if (!signe(n)) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3896 3276478 : return ifactor_sign(n, 0, decomp_default_hint, 1, NULL);
3897 : }
3898 : /* Factor until the unfactored part is smaller than limit. Return the
3899 : * factored part. Hence factorback(output) may be smaller than n */
3900 : GEN
3901 3045 : Z_factor_until(GEN n, GEN limit)
3902 : {
3903 3045 : pari_sp av = avma;
3904 3045 : long s = signe(n), eq;
3905 : GEN q, F, U;
3906 :
3907 3045 : if (!s) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3908 3045 : F = ifactor_sign(n, tridiv_bound(n), decomp_default_hint, s, &U);
3909 3045 : if (!U) return F;
3910 1155 : q = gel(U,1); /* composite, q^eq = unfactored part */
3911 1155 : eq = itou(gel(U,2));
3912 1155 : if (cmpii(eq == 1? q: powiu(q, eq), limit) > 0)
3913 : { /* factor further */
3914 1022 : long l2 = expi(q)+1;
3915 : GEN P2, E2, F2, part;
3916 1022 : if (eq > 1) limit = sqrtnint(limit, eq);
3917 1022 : P2 = coltrunc_init(l2);
3918 1022 : E2 = coltrunc_init(l2); F2 = mkmat2(P2,E2);
3919 1022 : part = ifac_start(icopy(q), 0); /* ifac_next would destroy q */
3920 : for(;;)
3921 70 : {
3922 : long e;
3923 : GEN p;
3924 1092 : if (!ifac_next(&part,&p,&e)) break;
3925 1092 : vectrunc_append(P2, p);
3926 1092 : vectrunc_append(E2, utoipos(e * eq));
3927 1092 : q = diviiexact(q, powiu(p, e));
3928 1092 : if (cmpii(q, limit) <= 0) break;
3929 : }
3930 1022 : F2 = sort_factor(F2, (void*)&abscmpii, cmp_nodata);
3931 1022 : F = merge_factor(F, F2, (void*)&abscmpii, cmp_nodata);
3932 : }
3933 1155 : return gerepilecopy(av, F);
3934 : }
3935 :
3936 : static void
3937 96883748 : matsmalltrunc_append(GEN m, ulong p, ulong e)
3938 : {
3939 96883748 : GEN P = gel(m,1), E = gel(m,2);
3940 96883748 : long l = lg(P);
3941 96883748 : P[l] = p; lg_increase(P);
3942 96873459 : E[l] = e; lg_increase(E);
3943 96880930 : }
3944 : static GEN
3945 37953935 : matsmalltrunc_init(long l)
3946 : {
3947 37953935 : GEN P = vecsmalltrunc_init(l);
3948 37907417 : GEN E = vecsmalltrunc_init(l); return mkvec2(P,E);
3949 : }
3950 :
3951 : /* return optimal N s.t. omega(b) <= N for all b <= x */
3952 : long
3953 71608 : maxomegau(ulong x)
3954 : { /* P=primes(15); for(i=1,15, print([i, vecprod(P[1..i])])) */
3955 71608 : if (x < 30030UL)/* rare trivial cases */
3956 : {
3957 37457 : if (x < 2UL) return 0;
3958 19201 : if (x < 6UL) return 1;
3959 13601 : if (x < 30UL) return 2;
3960 12894 : if (x < 210UL) return 3;
3961 12635 : if (x < 2310UL) return 4;
3962 11606 : return 5;
3963 : }
3964 34151 : if (x < 510510UL) return 6; /* most frequent case */
3965 18753 : if (x < 9699690UL) return 7;
3966 7 : if (x < 223092870UL) return 8;
3967 : #ifdef LONG_IS_64BIT
3968 6 : if (x < 6469693230UL) return 9;
3969 0 : if (x < 200560490130UL) return 10;
3970 0 : if (x < 7420738134810UL) return 11;
3971 0 : if (x < 304250263527210UL) return 12;
3972 0 : if (x < 13082761331670030UL) return 13;
3973 0 : if (x < 614889782588491410UL) return 14;
3974 0 : return 15;
3975 : #else
3976 1 : return 9;
3977 : #endif
3978 : }
3979 : /* return optimal N s.t. omega(b) <= N for all odd b <= x */
3980 : long
3981 2229 : maxomegaoddu(ulong x)
3982 : { /* P=primes(15+1); for(i=1,15, print([i, vecprod(P[2..i+1])])) */
3983 2229 : if (x < 255255UL)/* rare trivial cases */
3984 : {
3985 1355 : if (x < 3UL) return 0;
3986 1355 : if (x < 15UL) return 1;
3987 1355 : if (x < 105UL) return 2;
3988 1355 : if (x < 1155UL) return 3;
3989 1327 : if (x < 15015UL) return 4;
3990 1327 : return 5;
3991 : }
3992 874 : if (x < 4849845UL) return 6; /* most frequent case */
3993 0 : if (x < 111546435UL) return 7;
3994 0 : if (x < 3234846615UL) return 8;
3995 : #ifdef LONG_IS_64BIT
3996 0 : if (x < 100280245065UL) return 9;
3997 0 : if (x < 3710369067405UL) return 10;
3998 0 : if (x < 152125131763605UL) return 11;
3999 0 : if (x < 6541380665835015UL) return 12;
4000 0 : if (x < 307444891294245705UL) return 13;
4001 0 : if (x < 16294579238595022365UL) return 14;
4002 0 : return 15;
4003 : #else
4004 0 : return 9;
4005 : #endif
4006 : }
4007 :
4008 : /* If a <= c <= b , factoru(c) = L[c-a+1] */
4009 : GEN
4010 31925 : vecfactoru_i(ulong a, ulong b)
4011 : {
4012 31925 : ulong k, p, n = b-a+1, N = maxomegau(b) + 1;
4013 31925 : GEN v = const_vecsmall(n, 1);
4014 31925 : GEN L = cgetg(n+1, t_VEC);
4015 : forprime_t T;
4016 20766239 : for (k = 1; k <= n; k++) gel(L,k) = matsmalltrunc_init(N);
4017 31925 : u_forprime_init(&T, 2, usqrt(b));
4018 888086 : while ((p = u_forprime_next(&T)))
4019 : { /* p <= sqrt(b) */
4020 853169 : ulong pk = p, K = ulogint(b, p);
4021 2972300 : for (k = 1; k <= K; k++)
4022 : {
4023 2116139 : ulong j, t = a / pk, ap = t * pk;
4024 2116139 : if (ap < a) { ap += pk; t++; }
4025 : /* t = (j+a-1) \ pk */
4026 2116139 : t %= p;
4027 61049507 : for (j = ap-a+1; j <= n; j += pk)
4028 : {
4029 58930382 : if (t) { v[j] *= pk; matsmalltrunc_append(gel(L,j), p,k); }
4030 58933368 : if (++t == p) t = 0;
4031 : }
4032 2119125 : pk *= p;
4033 : }
4034 : }
4035 : /* complete factorisation of non-sqrt(b)-smooth numbers */
4036 21040779 : for (k = 1, N = a; k <= n; k++, N++)
4037 20989852 : if (uel(v,k) != N) matsmalltrunc_append(gel(L,k), N/uel(v,k),1UL);
4038 50927 : return L;
4039 : }
4040 : GEN
4041 0 : vecfactoru(ulong a, ulong b)
4042 : {
4043 0 : pari_sp av = avma;
4044 0 : return gerepilecopy(av, vecfactoru_i(a,b));
4045 : }
4046 :
4047 : /* Assume a and b odd, return L s.t. L[k] = factoru(a + 2*(k-1))
4048 : * If a <= c <= b odd, factoru(c) = L[(c-a)>>1 + 1] */
4049 : GEN
4050 2229 : vecfactoroddu_i(ulong a, ulong b)
4051 : {
4052 2229 : ulong k, p, n = ((b-a)>>1) + 1, N = maxomegaoddu(b) + 1;
4053 2229 : GEN v = const_vecsmall(n, 1);
4054 2229 : GEN L = cgetg(n+1, t_VEC);
4055 : forprime_t T;
4056 :
4057 17420964 : for (k = 1; k <= n; k++) gel(L,k) = matsmalltrunc_init(N);
4058 2229 : u_forprime_init(&T, 3, usqrt(b));
4059 182374 : while ((p = u_forprime_next(&T)))
4060 : { /* p <= sqrt(b) */
4061 183105 : ulong pk = p, K = ulogint(b, p);
4062 620805 : for (k = 1; k <= K; k++)
4063 : {
4064 440660 : ulong j, t = (a / pk) | 1UL, ap = t * pk;
4065 : /* t and ap are odd, ap multiple of pk = p^k */
4066 440660 : if (ap < a) { ap += pk<<1; t+=2; }
4067 : /* c=t*p^k by steps of 2*p^k; factorization of c*=p^k if (t,p)=1 */
4068 440660 : t %= p;
4069 32506072 : for (j = ((ap-a)>>1)+1; j <= n; j += pk)
4070 : {
4071 32068489 : if (t) { v[j] *= pk; matsmalltrunc_append(gel(L,j), p,k); }
4072 32065412 : t += 2; if (t >= p) t -= p;
4073 : }
4074 437583 : pk *= p;
4075 : }
4076 : }
4077 : /* complete factorisation of non-sqrt(b)-smooth numbers */
4078 17643040 : for (k = 1, N = a; k <= n; k++, N+=2)
4079 17641021 : if (uel(v,k) != N) matsmalltrunc_append(gel(L,k), N/uel(v,k),1UL);
4080 2019 : return L;
4081 : }
4082 : GEN
4083 0 : vecfactoroddu(ulong a, ulong b)
4084 : {
4085 0 : pari_sp av = avma;
4086 0 : return gerepilecopy(av, vecfactoroddu_i(a,b));
4087 : }
4088 :
4089 : /* If 0 <= a <= c <= b; L[c-a+1] = factoru(c)[,1] if c squarefree, else NULL */
4090 : GEN
4091 7014 : vecfactorsquarefreeu(ulong a, ulong b)
4092 : {
4093 7014 : ulong k, p, n = b-a+1, N = maxomegau(b) + 1;
4094 7014 : GEN v = const_vecsmall(n, 1);
4095 7014 : GEN L = cgetg(n+1, t_VEC);
4096 : forprime_t T;
4097 14007238 : for (k = 1; k <= n; k++) gel(L,k) = vecsmalltrunc_init(N);
4098 7014 : u_forprime_init(&T, 2, usqrt(b));
4099 838334 : while ((p = u_forprime_next(&T)))
4100 : { /* p <= sqrt(b), kill nonsquarefree */
4101 831320 : ulong j, pk = p*p, t = a / pk, ap = t * pk;
4102 831320 : if (ap < a) ap += pk;
4103 7160090 : for (j = ap-a+1; j <= n; j += pk) gel(L,j) = NULL;
4104 :
4105 831320 : t = a / p; ap = t * p;
4106 831320 : if (ap < a) { ap += p; t++; }
4107 30551556 : for (j = ap-a+1; j <= n; j += p, t++)
4108 29720236 : if (gel(L,j)) { v[j] *= p; vecsmalltrunc_append(gel(L,j), p); }
4109 : }
4110 : /* complete factorisation of non-sqrt(b)-smooth numbers */
4111 14007238 : for (k = 1, N = a; k <= n; k++, N++)
4112 14000224 : if (gel(L,k) && uel(v,k) != N) vecsmalltrunc_append(gel(L,k), N/uel(v,k));
4113 7014 : return L;
4114 : }
4115 : /* If 0 <= a <= c <= b; L[c-a+1] = factoru(c)[,1] if c squarefree and coprime
4116 : * to all the primes in sorted zv P, else NULL */
4117 : GEN
4118 32592 : vecfactorsquarefreeu_coprime(ulong a, ulong b, GEN P)
4119 : {
4120 32592 : ulong k, p, n = b-a+1, sqb = usqrt(b), N = maxomegau(b) + 1;
4121 32592 : GEN v = const_vecsmall(n, 1);
4122 32592 : GEN L = cgetg(n+1, t_VEC);
4123 : forprime_t T;
4124 90668116 : for (k = 1; k <= n; k++) gel(L,k) = vecsmalltrunc_init(N);
4125 32592 : u_forprime_init(&T, 2, sqb);
4126 3679351 : while ((p = u_forprime_next(&T)))
4127 : { /* p <= sqrt(b), kill nonsquarefree */
4128 3646561 : ulong j, t, ap, bad = zv_search(P, p), pk = bad ? p: p * p;
4129 3646693 : t = a / pk; ap = t * pk; if (ap < a) ap += pk;
4130 80853642 : for (j = ap-a+1; j <= n; j += pk) gel(L,j) = NULL;
4131 3646693 : if (bad) continue;
4132 :
4133 3584540 : t = a / p; ap = t * p;
4134 3584540 : if (ap < a) { ap += p; t++; }
4135 116460307 : for (j = ap-a+1; j <= n; j += p, t++)
4136 112875701 : if (gel(L,j)) { v[j] *= p; vecsmalltrunc_append(gel(L,j), p); }
4137 : }
4138 32592 : if (uel(P,lg(P)-1) <= sqb) P = NULL;
4139 : /* complete factorisation of non-sqrt(b)-smooth numbers */
4140 90859018 : for (k = 1, N = a; k <= n; k++, N++)
4141 90826567 : if (gel(L,k) && uel(v,k) != N)
4142 : {
4143 25673017 : ulong q = N / uel(v,k);
4144 25673017 : if (!P || !zv_search(P, q)) vecsmalltrunc_append(gel(L,k), q);
4145 : }
4146 32451 : return L;
4147 : }
4148 :
4149 : GEN
4150 49 : vecsquarefreeu(ulong a, ulong b)
4151 : {
4152 49 : ulong j, k, p, n = b-a+1;
4153 49 : GEN L = const_vecsmall(n, 1);
4154 : forprime_t T;
4155 49 : u_forprime_init(&T, 2, usqrt(b));
4156 462 : while ((p = u_forprime_next(&T)))
4157 : { /* p <= sqrt(b), kill nonsquarefree */
4158 413 : ulong pk = p*p, t = a / pk, ap = t * pk;
4159 413 : if (ap < a) { ap += pk; t++; }
4160 : /* t = (j+a-1) \ pk */
4161 21777 : for (j = ap-a+1; j <= n; j += pk, t++) L[j] = 0;
4162 : }
4163 48258 : for (k = j = 1; k <= n; k++)
4164 48209 : if (L[k]) L[j++] = a+k-1;
4165 49 : setlg(L,j); return L;
4166 : }
|