Bill Allombert on Thu, 28 Oct 2004 15:12:46 +0200 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
asm inline hppa level0 kernel |
Hello PARI-dev, The attached patch replace the asm hppa kernel with a inline version that work with gcc. This is for 32-bit only. You need to build with --kernel=hppa. It was only tested with gcc on GNU/Linux. Please test on others platforms. I think the code of the different level0 kernel could be cleaned up a bit: I found 1) obsolete functions 2) no support for LOCAL_HIREMAINDER/LOCAL_OVERFLOW 3) code duplication 4) use of inferior alternative. Performance of the hppa kernel (gcc 3.3.4, dual PA8600 at 550MHz, linux 2.4.26) ASM inline kernel: +++ Total bench for gp-sta is 2890 +++ Total bench for gp-dyn is 2976 Portable kernel: +++ Total bench for gp-sta is 4176 +++ Total bench for gp-dyn is 4264 Cheers, Bill
? src/kernel/hppa/level0.c ? src/kernel/hppa/level0.h Index: src/kernel/hppa/MakeLVL0.SH =================================================================== RCS file: /home/cvs/pari/src/kernel/hppa/MakeLVL0.SH,v retrieving revision 1.3 diff -u -r1.3 MakeLVL0.SH --- src/kernel/hppa/MakeLVL0.SH 25 Nov 2003 18:53:36 -0000 1.3 +++ src/kernel/hppa/MakeLVL0.SH 28 Oct 2004 12:07:08 -0000 @@ -1,10 +1,11 @@ # Level 0 kernel is "asm extern" kern=$src/kernel/$kernlvl0 +knone=$src/kernel/none cat >> $file << EOT -parilvl0.h: $src/kernel/none/asm0.h - cat $src/kernel/none/asm0.h > parilvl0.h -kernel\$(_O): $kern/level0.s - \$(AS) \$(ASFLAGS) -o kernel\$(_O) $kern/level0.s +parilvl0.h: $kern/level0.h + cat $kern/level0.h $knone/divll.h > parilvl0.h +kernel\$(_O): .headers $kern/level0.h + \$(CC) -c \$(CFLAGS) \$(CPPFLAGS) -o kernel\$(_O) $knone/level0.c EOT --- /dev/null 2004-08-20 19:57:57.000000000 +0000 +++ src/kernel/hppa/level0.h 2004-10-28 10:21:42.000000000 +0000 @@ -0,0 +1,95 @@ +#line 2 "../src/kernel/hppa/level0.h" +/* $Id: level0.h,v 1.9 2003/03/05 20:17:11 karim Exp $ + +Copyright (C) 2004 The PARI group. + +This file is part of the PARI/GP package. + +PARI/GP is free software; you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation. It is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY WHATSOEVER. + +Check the License for details. You should have received a copy of it, along +with the package; see the file 'COPYING'. If not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* This file was made using idea from Bruno Haible ix86 asm inline kernel + * and code from Nigel Smart hppa asm kernel. */ + +#define LOCAL_HIREMAINDER register ulong hiremainder +#define LOCAL_OVERFLOW register ulong overflow + +#define addll(a,b) \ +({ ulong __value, __arg1 = (a), __arg2 = (b); \ + __asm__ ("add %2,%3,%0\n\taddc %%r0,%%r0,%1" \ + : "=r" (__value), "=r" (overflow) \ + : "r" (__arg1), "r" (__arg2) \ + : "cc"); \ + __value; \ +}) + +#define addllx(a,b) \ +({ ulong __value, __arg1 = (a), __arg2 = (b); \ + __asm__ ("sub %4,%5,%%r0\n\taddc %2,%3,%0\n\taddc %%r0,%%r0,%1" \ + : "=r" (__value), "=r" (overflow) \ + : "r" (__arg1), "r" (__arg2), "r" (overflow), "r" ((ulong) 1)\ + : "cc"); \ + __value; \ +}) + +#define subll(a,b) \ +({ ulong __value, __arg1 = (a), __arg2 = (b); \ + __asm__ ("sub %2,%3,%0\n\taddc %%r0,%%r0,%1\n\tsubi 1,%1,%1" \ + : "=r" (__value), "=r" (overflow) \ + : "r" (__arg1), "r" (__arg2) , "r" ((ulong) 1)\ + : "cc"); \ + __value; \ +}) + +#define subllx(a,b) \ +({ ulong __value, __arg1 = (a), __arg2 = (b); \ + __asm__ ("sub %%r0,%4,%%r0\n\tsubb %2,%3,%0\n\taddc %%r0,%%r0,%1\n\tsubi 1,%1,%1" \ + : "=&r" (__value), "=r" (overflow) \ + : "r" (__arg1), "r" (__arg2), "r" (overflow)\ + : "cc"); \ + __value; \ +}) + +#define mulll(a,b) \ +({ ulong __arg1 = (a), __arg2 = (b); \ + union {double z; ulong x[2];} __vtab; \ + __asm__ ("xmpyu %1,%2,%0" \ + : "=f" (__vtab.z) \ + : "f" (__arg1), "f" (__arg2) \ + : "cc"); \ + hiremainder=__vtab.x[0]; \ + __vtab.x[1]; \ +}) + +#define addmul(a,b) \ +({ ulong __value, __arg1 = (a), __arg2 = (b); \ + union {double z; ulong x[2];} __vtab; \ + __asm__ ("xmpyu %1,%2,%0" \ + : "=f" (__vtab.z) \ + : "f" (__arg1), "f" (__arg2) \ + : "cc"); \ + __asm__ ("add %2,%3,%0\n\taddc %%r0, %4, %1" \ + : "=r" (__value), "=r" (hiremainder) \ + : "r" (__vtab.x[1]),"r" (hiremainder), "r" (__vtab.x[0]) \ + : "cc"); \ + __value; \ +}) + +/* From Peter Montgomery */ + +#define bfffo(x) \ +({int __value; \ + ulong __arg1=(x); \ + static int __bfffo_tabshi[16]={4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0};\ + __value = BITS_IN_LONG - 4; \ + if (__arg1 > 0xffffUL) {__value -= 16; __arg1 >>= 16;} \ + if (__arg1 > 0x00ffUL) {__value -= 8; __arg1 >>= 8;} \ + if (__arg1 > 0x000fUL) {__value -= 4; __arg1 >>= 4;} \ + __value + __bfffo_tabshi[__arg1]; \ +})