Vasili Burdo on Wed, 15 Jul 2015 18:09:14 +0200


[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]

Support of binary and hexadecimal integers in PARI/GP


Hi,

I implemented $SUBJ.
See the patch (against current MASTER) in attachment.

What was done:
- lexer updated to understand "0b" and "0x" prefixes for binary and
hexadecimal integers.
  Everything coming after prefix is considered an integer in
corresponding format.
  This change does not affect real numbers - only integers.

- implemented option "base". Like this: default(base, "dec|hex|HEX")
  Where "dec" is decimal output, "hex" - lower case hex output,
  "HEX" - upper case hex output. Option for binary output is not supported
  because large binary numbers look ugly.

- implemented escape shortcuts \od - switch to decimal output, \ox -
switch to lower case hex,
  \oX - switch to upper case hex

- implemented escape shortcut \A to print raw result in different base.
  I.e. in hex if decimal is default or vice versa.

- updated function bruti_intern() to print integers using selected base.
  I tried to make hex output "intelligent" - numbers less than 16 are
always printed
  in decimal. Only large numbers are printed in hex.
  For now 16 is hardcoded - there is no option to control this boundary.
  If you think this option is neccessary - please suggest name for it :)

I also thinking about "visual delimiters" implementation for large
numbers, like it was
defined in new C++ standard. For example, a number 1000000 may be delimited like
this: 1'000'000. However, lexing ticks may be difficult, so I'd prefer to use
underscore for delimiter: 1_000_000.
What do you think about this? Will these "visual delimiters" be useful or not?
What options I need to implement to control this feature?

Thanks,
Vasili
diff --git a/src/headers/paridecl.h b/src/headers/paridecl.h
index 520673c..793b2f2 100644
--- a/src/headers/paridecl.h
+++ b/src/headers/paridecl.h
@@ -2217,6 +2217,7 @@ GEN sd_debugmem(const char *v, long flag);
 GEN sd_factor_add_primes(const char *v, long flag);
 GEN sd_factor_proven(const char *v, long flag);
 GEN sd_format(const char *v, long flag);
+GEN sd_base(const char *v, long flag);
 GEN sd_histsize(const char *v, long flag);
 GEN sd_log(const char *v, long flag);
 GEN sd_logfile(const char *v, long flag);
@@ -2447,7 +2448,7 @@ GEN     Str(GEN g);
 GEN     Strchr(GEN g);
 GEN     Strexpand(GEN g);
 GEN     Strtex(GEN g);
-void    brute(GEN g, char format, long dec);
+void    brute(GEN g, char format, long dec, int base);
 void    dbgGEN(GEN x, long nb);
 void    error0(GEN g);
 void    dbg_pari_heap(void);
@@ -2464,7 +2465,7 @@ void    gpinstall(const char *s, const char *code,
 GEN     gsprintf(const char *fmt, ...);
 GEN     gvsprintf(const char *fmt, va_list ap);
 char*   itostr(GEN x);
-void    matbrute(GEN g, char format, long dec);
+void    matbrute(GEN g, char format, long dec, int base);
 char*   os_getenv(const char *s);
 void    (*os_signal(int sig, void (*f)(int)))(int);
 void    outmat(GEN x);
diff --git a/src/headers/paristio.h b/src/headers/paristio.h
index 5ff7bdf..faca0a7 100644
--- a/src/headers/paristio.h
+++ b/src/headers/paristio.h
@@ -226,6 +226,7 @@ typedef struct {
 typedef struct {
   char format; /* e,f,g */
   long sigd;   /* -1 (all) or number of significant digits printed */
+  int base;    /* 10, 16, -16 - i.e. decimal, 0xhex, or 0xHEX */
   int sp;      /* 0 = suppress whitespace from output */
   int prettyp; /* output style: raw, prettyprint, etc */
   int TeXstyle;
diff --git a/src/language/anal.c b/src/language/anal.c
index f77cf3a..fd01269 100644
--- a/src/language/anal.c
+++ b/src/language/anal.c
@@ -436,11 +436,35 @@ type0(GEN x)
 /*******************************************************************/
 
 #ifdef LONG_IS_64BIT
-static const long MAX_DIGITS = 19;
+static const long MAX_DIGITS  = 19;
+static const long MAX_XDIGITS = 15;
+static const long MAX_BDIGITS = 63;
 #else
-static const long MAX_DIGITS = 9;
+static const long MAX_DIGITS  = 9;
+static const long MAX_XDIGITS = 7;
+static const long MAX_BDIGITS = 31;
 #endif
 
+static int
+ishex(const char **s)
+{
+    if('0' == **s && ('x' == (*s)[1] || 'X' == (*s)[1])) {
+        *s += 2;
+        return 1;
+    }
+    return 0;
+}
+
+static int
+isbin(const char **s)
+{
+    if('0' == **s && ('b' == (*s)[1] || 'B' == (*s)[1])) {
+        *s += 2;
+        return 1;
+    }
+    return 0;
+}
+
 static ulong
 number(int *n, const char **s)
 {
@@ -450,6 +474,43 @@ number(int *n, const char **s)
   return m;
 }
 
+static ulong
+hexnumber(int *n, const char **s)
+{
+  ulong m = 0;
+  for(*n = 0; *n < MAX_XDIGITS; (*n)++,(*s)++)
+  {
+    if( **s >= '0' && **s <= '9') {
+        m = 16*m + (**s - '0');
+        continue;
+    }
+    if( **s >= 'A' && **s <= 'F') {
+        m = 16*m + (**s - 'A' + 10);
+        continue;
+    }
+    if( **s >= 'a' && **s <= 'f') {
+        m = 16*m + (**s - 'a' + 10);
+        continue;
+    }
+    break;
+  }
+  return m;
+}
+
+static ulong
+binnumber(int *n, const char **s)
+{
+  ulong m = 0;
+  for(*n = 0; *n < MAX_BDIGITS; (*n)++,(*s)++)
+  {
+    if( **s == '0' || **s == '1') {
+        m = 2*m + (**s - '0');
+    } else
+        break;
+  }
+  return m;
+}
+
 ulong
 u_pow10(int n)
 {
@@ -494,6 +555,34 @@ int_read_more(GEN y, const char **ps)
   return y;
 }
 
+static GEN
+hex_read_more(GEN y, const char **ps)
+{
+  pari_sp av = avma;
+  int i = 0, nb;
+  while (isxdigit((int)**ps))
+  {
+    ulong m = hexnumber(&nb, ps);
+    if (avma != av && ++i > 4) { avma = av; i = 0; } /* HACK gerepile */
+    y = addumului(m, 1UL << (nb*4), y);
+  }
+  return y;
+}
+
+static GEN
+bin_read_more(GEN y, const char **ps)
+{
+  pari_sp av = avma;
+  int i = 0, nb;
+  while (**ps == '0' || **ps == '1')
+  {
+    ulong m = binnumber(&nb, ps);
+    if (avma != av && ++i > 4) { avma = av; i = 0; } /* HACK gerepile */
+    y = addumului(m, 1UL << nb, y);
+  }
+  return y;
+}
+
 static long
 exponent(const char **pts)
 {
@@ -563,8 +652,19 @@ static GEN
 int_read(const char **s)
 {
   int nb;
-  GEN y = utoi(number(&nb, s));
-  if (nb == MAX_DIGITS) y = int_read_more(y, s);
+  GEN y;
+  if (isbin(s)) {
+    y = utoi(binnumber(&nb, s));
+    if (nb == MAX_BDIGITS) y = bin_read_more(y, s);
+  } else
+  if (ishex(s)) {
+    y = utoi(hexnumber(&nb, s));
+    if (nb == MAX_XDIGITS) y = hex_read_more(y, s);
+  } else
+  {
+    y = utoi(number(&nb, s));
+    if (nb == MAX_DIGITS) y = int_read_more(y, s);
+  }
   return y;
 }
 
@@ -688,6 +788,16 @@ pari_lex(union token_value *yylval, struct node_loc *yylloc, char **lex)
     yylloc->end=*lex;
     return '.';
   }
+  if (isbin((const char**)lex))
+  {
+    while (**lex=='0' || **lex=='1') ++*lex;
+    return KINTEGER;
+  }
+  if (ishex((const char**)lex))
+  {
+    while (isxdigit((int)**lex)) ++*lex;
+    return KINTEGER;
+  }
   if (isdigit((int)**lex))
   {
     int token=skipconstante(lex);
diff --git a/src/language/default.c b/src/language/default.c
index bf7b337..752e836 100644
--- a/src/language/default.c
+++ b/src/language/default.c
@@ -354,6 +354,37 @@ sd_format(const char *v, long flag)
 }
 
 GEN
+sd_base(const char *v, long flag)
+{
+  pariout_t *fmt = GP_DATA->fmt;
+  if(v) {
+    if (0 == strcmp(v,"dec")) {
+      fmt->base = 10;
+    } else
+    if (0 == strcmp(v,"hex")) {
+      fmt->base = 16;
+    } else
+    if (0 == strcmp(v,"HEX")) {
+      fmt->base = -16;
+    } else {
+      pari_err(e_SYNTAX,"default: bad base",v,v);
+    }
+  }
+  if (flag == d_RETURN)  {
+    char *s = stack_malloc(64);
+    (void)sprintf(s, "%d", fmt->base);
+    return strtoGENstr(s);
+  }
+  if (flag == d_ACKNOWLEDGE)
+    pari_printf("   base = %s\n",
+        fmt->base == 10 ? "dec" :
+        fmt->base == 16 ? "hex" :
+        fmt->base ==-16 ? "HEX" :
+        "error");
+  return gnil;
+}
+
+GEN
 sd_compatible(const char *v, long flag)
 {
   const char *msg[] = {
@@ -774,9 +805,9 @@ static void
 init_fmt(gp_data *D)
 {
 #ifdef LONG_IS_64BIT
-  static pariout_t DFLT_OUTPUT = { 'g', 38, 1, f_PRETTYMAT, 0 };
+  static pariout_t DFLT_OUTPUT = { 'g', 38, 10, 1, f_PRETTYMAT, 0 };
 #else
-  static pariout_t DFLT_OUTPUT = { 'g', 28, 1, f_PRETTYMAT, 0 };
+  static pariout_t DFLT_OUTPUT = { 'g', 28, 10, 1, f_PRETTYMAT, 0 };
 #endif
   D->fmt = &DFLT_OUTPUT;
 }
diff --git a/src/language/es.c b/src/language/es.c
index 2ac5919..8194be6 100644
--- a/src/language/es.c
+++ b/src/language/es.c
@@ -2743,7 +2743,18 @@ bruti_intern(GEN g, pariout_t *T, outString *S, int addsign)
   {
     case t_INT:
       if (addsign && signe(g) < 0) str_putc(S, '-');
-      str_absint(S, g); break;
+      int base=T->base;
+      if(base != 10) {
+        if(cmpiu(g,16) < 0)
+          base = 10;
+      }
+      if(base != 10) {
+        str_putc(S, '0');
+        str_putc(S, 'x');
+      }
+      fmtnum(S,0,absi(g),base,-1,0,0,0);
+      //str_absint(S, g);
+      break;
     case t_REAL:
     {
       pari_sp av;
@@ -3239,10 +3250,11 @@ texi_sign(GEN g, pariout_t *T, outString *S, int addsign)
 /**                                                               **/
 /*******************************************************************/
 static void
-_initout(pariout_t *T, char f, long sigd, long sp)
+_initout(pariout_t *T, char f, long sigd, long sp, int base)
 {
   T->format = f;
   T->sigd = sigd;
+  T->base = base;
   T->sp = sp;
 }
 
@@ -3269,32 +3281,32 @@ gen_output(GEN x, pariout_t *T)
 }
 
 void
-brute(GEN g, char f, long d)
+brute(GEN g, char f, long d, int base)
 {
-  pariout_t T; _initout(&T,f,d,0);
+  pariout_t T; _initout(&T,f,d,0,base);
   gen_output_fun(g, &T, &bruti);
 }
 
 void
-matbrute(GEN g, char f, long d)
+matbrute(GEN g, char f, long d, int base)
 {
-  pariout_t T; _initout(&T,f,d,1);
+  pariout_t T; _initout(&T,f,d,1,base);
   gen_output_fun(g, &T, &matbruti);
 }
 
 void
 texe(GEN g, char f, long d)
 {
-  pariout_t T; _initout(&T,f,d,0);
+  pariout_t T; _initout(&T,f,d,0,10);
   gen_output_fun(g, &T, &texi);
 }
 
 void
 output(GEN x)
-{ brute(x,'g',-1); pari_putc('\n'); pari_flush(); }
+{ brute(x,'g',-1,10); pari_putc('\n'); pari_flush(); }
 void
 outmat(GEN x)
-{ matbrute(x,'g',-1); pari_putc('\n'); pari_flush(); }
+{ matbrute(x,'g',-1,10); pari_putc('\n'); pari_flush(); }
 
 void
 err_printf(const char* fmt, ...)
diff --git a/src/language/gplib.c b/src/language/gplib.c
index 93c5996..189e9dd 100644
--- a/src/language/gplib.c
+++ b/src/language/gplib.c
@@ -301,6 +301,8 @@ slash_commands(void)
 ##      : print time for last result\n\
 \\\\      : comment up to end of line\n\
 \\a {n}  : print result in raw format (readable by PARI)\n\
+\\A {n}  : print result in different base - i.e. in decimal\n\
+          when hex is default or vice versa\n\
 \\B {n}  : print result in beautified format\n\
 \\c      : list all commands (same effect as ?*)\n\
 \\d      : print all defaults\n\
@@ -312,6 +314,9 @@ slash_commands(void)
 \\l {f}  : enable/disable logfile (set logfile=f)\n\
 \\m {n}  : print result in prettymatrix format\n\
 \\o {n}  : set output method (0=raw, 1=prettymatrix, 2=prettyprint, 3=2-dim)\n\
+\\ox     : print integers in lower case hexadecimal: 0x1234abcd\n\
+\\oX     : print integers in upper case hexadecimal: 0x1234ABCD\n\
+\\od     : print integers in decimal\n\
 \\p {n}  : change real precision\n\
 \\ps{n}  : change series precision\n\
 \\q      : quit completely this GP session\n\
@@ -1690,7 +1695,7 @@ escape(const char *tch, int ismain)
   char c;
   switch ((c = *s++))
   {
-    case 'w': case 'x': case 'a': case 'b': case 'B': case 'm':
+    case 'w': case 'x': case 'a': case 'A': case 'b': case 'B': case 'm':
     { /* history things */
       long d;
       GEN x;
@@ -1706,8 +1711,9 @@ escape(const char *tch, int ismain)
         case 'B': /* prettyprinter */
           if (tex2mail_output(x,0)) break;
         case 'b': /* fall through */
-        case 'm': matbrute(x, GP_DATA->fmt->format, -1); break;
-        case 'a': brute(x, GP_DATA->fmt->format, -1); break;
+        case 'm': matbrute(x, GP_DATA->fmt->format, -1, GP_DATA->fmt->base); break;
+        case 'a': brute(x, GP_DATA->fmt->format, -1, GP_DATA->fmt->base); break;
+        case 'A': brute(x, GP_DATA->fmt->format, -1, GP_DATA->fmt->base == 10 ? -16 : 10); break;
         case 'x': dbgGEN(x, get_int(s, -1)); break;
         case 'w':
           s = get_sep(s); if (!*s) s = current_logfile;
@@ -1740,7 +1746,14 @@ escape(const char *tch, int ismain)
       }
       (void)sd_log(pari_logfile?"0":"1",d_ACKNOWLEDGE);
       break;
-    case 'o': (void)sd_output(*s? s: NULL,d_ACKNOWLEDGE); break;
+    case 'o': //(void)sd_output(*s? s: NULL,d_ACKNOWLEDGE); break;
+      switch(*s) {
+        case 'd': (void)sd_base("dec",d_ACKNOWLEDGE); break;
+        case 'x': (void)sd_base("hex",d_ACKNOWLEDGE); break;
+        case 'X': (void)sd_base("HEX",d_ACKNOWLEDGE); break;
+        default:  (void)sd_output(*s? s: NULL,d_ACKNOWLEDGE); break;
+      }
+      break;
     case 'p':
       switch (*s)
       {