Ilya Zakharevich on Sat, 25 Apr 1998 22:58:17 +0200 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
Calculate primes up to 436M in a glance |
This implementation of initprimes is as quick as the old one for sizes below 5e6, becomes slightly (50%) quickier when you go to 50e6 (on my machine), and becomes infinitely quickier when primelimit is twice the simultaneously avalaible core - or more. Timing (P5/133, 32M memory): Old New 0.5e6 60ms 60ms 5e6 500ms 470ms 50e6 6 280ms 4 690ms 60e6 infinity 5 650ms 120e6 --- 11 650ms 240e6 --- 24 220ms 430e6 --- 46 970ms The reason why the old algorithm is so slow for big limits is a horrible memory locality. If you can put 40M in memory, and want to calculate primes up to n>80e6, you will move approx. 1/3*n^(3/2) bytes to the swapfile and back. The new algorithm requires swap if 1.09*n/log(n) + 0.5M > available memory, and runs over it only once. So, with 25M of available memory, it will run up to the current limit 436e6 without going to swap at all, and if you do not have that much memory, you need to move this amount to swap only once. Enjoy, Ilya --- ./src/basemath/arith2.c~.ini Sat Feb 7 11:11:30 1998 +++ ./src/basemath/arith2.c Sat Apr 25 16:21:54 1998 @@ -52,14 +52,12 @@ primes(long n) return y; } -byteptr -initprimes(long maxnum) +static byteptr +initprimes1(long maxnum, long *lenp, long *lastp) { - long k,size = (maxnum+257) >> 1; + long k,size = (maxnum+1) >> 1; byteptr q,r,s,fin, p = (byteptr) gpmalloc(size+1); - if (size > 218136500) - err(talker,"impossible to have prestored primes > 436273009"); memset(p, 0, size + 1); fin = p + size; for (r=q=p,k=1; r<fin; ) { @@ -74,7 +72,118 @@ initprimes(long maxnum) *r++ = (q-s) << 1; } *r++=0; - return (byteptr) gprealloc(p,r-p,size); + *lenp = r - p; + *lastp = ((s - p) << 1) + 1; + return (byteptr) gprealloc(p,r-p,size + 1); +} + +#define PRIME_ARENA (512 * 1024) + +byteptr +initprimes0(long maxnum, long *lenp, long *lastp) +{ + long k, size, alloced, asize, psize, rootnum, curlow, last, remains, need; + byteptr q,r,s,fin, p, p1, fin1, plast, curdiff; + + if (maxnum > 436273009) + err(talker,"impossible to have prestored primes > 436273009"); + if (maxnum <= 64*1024) /* Arbitrary. */ + return initprimes1(maxnum, lenp, lastp); + + if (!(maxnum % 2)) + maxnum--; /* Make it odd. */ + /* Checked to be enough up to 40e6, achieved at 155893. */ + size = 1.09 * maxnum/log(maxnum) + 145; + p1 = (byteptr) gpmalloc(size); + { + byteptr p2; + + p2 = initprimes0(rootnum = sqrt(maxnum), &psize, &last); + memcpy(p1, p2, psize); + free(p2); + } + fin1 = p1 + psize - 1; + if (!(rootnum % 2)) + rootnum--; /* Make it odd - checked up to. */ + remains = (maxnum - rootnum) >> 1; /* Odd numbers to check */ + need = 100 * rootnum; /* Make % overhead negligeable. */ + if (need < PRIME_ARENA) + need = PRIME_ARENA; + if (avma - bot < need/2) { + asize = need; + alloced = 1; + } else { + p = (byteptr) bot; + asize = avma - bot; + } + if (asize > remains) + asize = remains + 1; /* Guard at end. */ + if (alloced) + p = (byteptr) gpmalloc(asize); + fin = p + asize - 1; /* Leave 0-guard at fin. */ + curlow = rootnum + 2; /* Know below this odd number. */ + curdiff = fin1; + + /* During each iteration p..fin-1 represents a range of odd + numbers. plast is a pointer which represent last prime seen, + it may point before p..fin-1. */ + plast = p - ((rootnum - last) >> 1) - 1; + while (remains) { + if (asize > remains) { + asize = remains + 1; + fin = p + asize - 1; + } + memset(p, 0, asize); + /* p corresponds to curlow. q runs over primediffs. */ + for (q = p1 + 2, k = 3; q <= fin1; ) { + /* First odd number >= curlow divisible by p is (if curlow > p) + last odd number <= curlow + 2p - 1 divisible by p is + p plus last even number <= curlow + p - 1 divisible by p is + p plus last even number <= curlow + p - 2 divisible by p is + curlow + p - 2 - (curlow + p - 2)) % 2p + p. + */ + long k2 = k * k - curlow; + + if (k2 > 0) { + r = p + (k2 = k2 >> 1); + if (k2 > remains) /* Guard against an address wrap. */ + r = fin; + } else + r = p - ( ( (curlow + k - 2) % (2*k) ) >> 1 ) + k - 1; + for (s = r; s < fin; s += k) + *s = 1; + k += *q++; + } + /* q runs over addresses corresponding to primes */ + for (q = p; ; plast = q++) { + while (*q) + q++; /* Use 0-guard at end */ + if (q >= fin) + break; + *curdiff++ = (q - plast) << 1; + } + plast -= asize - 1; + remains -= asize - 1; + curlow += ((asize - 1)<<1); + } + last = curlow - ( (p - plast) << 1 ); + *curdiff++ = 0; + *lenp = curdiff - p1; + *lastp = last; + if (alloced) + free(p); + return (byteptr) gprealloc(p1, *lenp, size); +} + +byteptr +initprimes(long maxnum) /* Backward compatibility */ +{ + long len, last; + if (maxnum + 256 > 436273009) + err(talker,"impossible to have prestored primes > 436273009"); + /* The storage algorithm needs to know next prime after maxnum + to be sure that no primes up to maxnum are skipped. */ + return initprimes0(maxnum + 256, &len, &last); } static void