[Rd] Weakness in Knuth-TAOCP RNG (fwd) (PR#1336)

Prof Brian D Ripley ripley@stats.ox.ac.uk
Sun, 3 Mar 2002 09:28:04 +0000 (GMT)


It's not that simple.  This is an new generator, incompatible with
the old one.

I am sure the `weakness' does not arise in reasonable R usage, but I was
amazed that Knuth was unaware of the problem (which I regard as
well-known): but then he is a theoretician.

I will have to add this as a separate generator, since we really, really
can't re-define random number generators and so make work unreproducible.

On Sun, 3 Mar 2002 schutz@wehi.edu.au wrote:

> Prof Brian D Ripley wrote:
>
> >Attachments don't really work with R-bugs.  To save us trying to unpick
> >this manually, could you re-send it as the body of a message to R-bugs
> >in reply to this one, or quoting (PR#1336) in the subject, please?
>
> Sorry about that (Pine sometimes modify lines of the body without warning,
> so I preferred using an attachment). Here is the patch for src/main/RNG.c.
>
> Frédéric
> ---
> >> Don Knuth recently updated its RNG, following the discovery of a
> >> weakness in the seed initialisation (see
> >> http://Sunburn.Stanford.EDU/~knuth/news02.html), so I believe that the
> >> corresponding code in R should be updated as well. If it may be usefull,
> >> I attach a patch to do the change.
> >>
> >> Notes:
> >>
> >>   - only a few lines of code have really changed, but the order of the
> >>     functions has changed in Knuth's original code, so the patch is
> >>     longer than it should really be;
> >>   - Despite Knuth's request that no change should be made to the code,
> >>     the present R code (1.5.0-devel and previous) contains a change (the
> >>     line "long ran_x[KK];" is commented, and ran_x is defined elsewhere
> >>     in the program). The patch contains the _same_ change; I let you
> >>     decide if this is acceptable in regard to Knuth's request.
>
> --- RNG.c.old	Fri Mar  1 08:42:45 2002
> +++ RNG.c	Fri Mar  1 14:26:08 2002
> @@ -543,6 +543,7 @@
>  #define ran_x             dummy
>
>  /* ===================  Knuth TAOCP ========================== */
> +/* From http://www-cs-faculty.Stanford.EDU/~knuth/programs/rng.c */
>
>           /* Please do NOT alter this part of the code */
>
> @@ -551,15 +552,17 @@
>   *    It is explained in Seminumerical Algorithms, 3rd edition, Section 3.6
>   *    (or in the errata to the 2nd edition --- see
>   *        http://www-cs-faculty.stanford.edu/~knuth/taocp.html
> - *    in the changes to pages 171 and following of Volume 2).              */
> + *    in the changes to Volume 2 on pages 171 and following).              */
> +
> +/*    N.B. The MODIFICATIONS introduced in the 9th printing (2002) are
> +      included here; there's no backwards compatibility with the original. */
>
>  /*    If you find any bugs, please report them immediately to
>   *                 taocp@cs.stanford.edu
>   *    (and you will be rewarded if the bug is genuine). Thanks!            */
>
>  /************ see the book for explanations and caveats! *******************/
> -
> -/* the old C calling conventions are used here, for reasons of portability */
> +/************ in particular, you need two's complement arithmetic **********/
>
>  #define KK 100                     /* the long lag */
>  #define LL  37                     /* the short lag */
> @@ -568,10 +571,13 @@
>
>  /*long ran_x[KK]; */                   /* the generator state */
>
> -/* void ran_array(long aa[],int n) */
> +#ifdef __STDC__
> +void ran_array(long aa[],int n)
> +#else
>  void ran_array(aa,n)    /* put n new random numbers in aa */
>    long *aa;   /* destination */
>    int n;      /* array length (must be at least KK) */
> +#endif
>  {
>    register int i,j;
>    for (j=0;j<KK;j++) aa[j]=ran_x[j];
> @@ -580,57 +586,57 @@
>    for (;i<KK;i++,j++) ran_x[i]=mod_diff(aa[j-KK],ran_x[i-LL]);
>  }
>
> +/* the following routines are from exercise 3.6--15 */
> +/* after calling ran_start, get new randoms by, e.g., "x=ran_arr_next()" */
> +
> +#define QUALITY 1009 /* recommended quality level for high-res use */
> +long ran_arr_buf[QUALITY];
> +long ran_arr_sentinel=-1;
> +long *ran_arr_ptr=&ran_arr_sentinel; /* the next random number, or -1 */
> +
> +#define ran_arr_next() (*ran_arr_ptr>=0? *ran_arr_ptr++: ran_arr_cycle())
> +long ran_arr_cycle()
> +{
> +  ran_array(ran_arr_buf,QUALITY);
> +  ran_arr_buf[100]=-1;
> +  ran_arr_ptr=ran_arr_buf+1;
> +  return ran_arr_buf[0];
> +}
> +
>  #define TT  70   /* guaranteed separation between streams */
>  #define is_odd(x)  ((x)&1)          /* units bit of x */
> -#define evenize(x) ((x)&(MM-2))   /* make x even */
>
> -/* void ran_start(long seed) */
> +#ifdef __STDC__
> +void ran_start(long seed)
> +#else
>  void ran_start(seed)    /* do this before using ran_array */
>    long seed;            /* selector for different streams */
> +#endif
>  {
>    register int t,j;
>    long x[KK+KK-1];              /* the preparation buffer */
> -  register long ss=evenize(seed+2);
> +  register long ss=(seed+2)&(MM-2);
>    for (j=0;j<KK;j++) {
>      x[j]=ss;                      /* bootstrap the buffer */
>      ss<<=1; if (ss>=MM) ss-=MM-2; /* cyclic shift 29 bits */
>    }
> -  for (;j<KK+KK-1;j++) x[j]=0;
>    x[1]++;              /* make x[1] (and only x[1]) odd */
> -  ss=seed&(MM-1);
> -  t=TT-1; while (t) {
> -    for (j=KK-1;j>0;j--) x[j+j]=x[j];  /* "square" */
> -    for (j=KK+KK-2;j>KK-LL;j-=2) x[KK+KK-1-j]=evenize(x[j]);
> -    for (j=KK+KK-2;j>=KK;j--) if(is_odd(x[j])) {
> -      x[j-(KK-LL)]=mod_diff(x[j-(KK-LL)],x[j]);
> +  for (ss=seed&(MM-1),t=TT-1; t; ) {
> +    for (j=KK-1;j>0;j--) x[j+j]=x[j], x[j+j-1]=0; /* "square" */
> +    for (j=KK+KK-2;j>=KK;j--)
> +      x[j-(KK-LL)]=mod_diff(x[j-(KK-LL)],x[j]),
>        x[j-KK]=mod_diff(x[j-KK],x[j]);
> -    }
>      if (is_odd(ss)) {              /* "multiply by z" */
>        for (j=KK;j>0;j--)  x[j]=x[j-1];
>        x[0]=x[KK];            /* shift the buffer cyclically */
> -      if (is_odd(x[KK])) x[LL]=mod_diff(x[LL],x[KK]);
> +      x[LL]=mod_diff(x[LL],x[KK]);
>      }
>      if (ss) ss>>=1; else t--;
>    }
>    for (j=0;j<LL;j++) ran_x[j+KK-LL]=x[j];
>    for (;j<KK;j++) ran_x[j-LL]=x[j];
> -}
> -
> -/* the following routines are from exercise 3.6--15 */
> -/* after calling ran_start, get new randoms by, e.g., "x=ran_arr_next()" */
> -
> -#define QUALITY 1009 /* recommended quality level for high-res use */
> -long ran_arr_buf[QUALITY];
> -long ran_arr_sentinel=-1;
> -long *ran_arr_ptr=&ran_arr_sentinel; /* the next random number, or -1 */
> -
> -#define ran_arr_next() (*ran_arr_ptr>=0? *ran_arr_ptr++: ran_arr_cycle())
> -long ran_arr_cycle()
> -{
> -  ran_array(ran_arr_buf,QUALITY);
> -  ran_arr_buf[100]=-1;
> -  ran_arr_ptr=ran_arr_buf+1;
> -  return ran_arr_buf[0];
> +  for (j=0;j<10;j++) ran_array(x,KK+KK-1); /* warm things up */
> +  ran_arr_ptr=&ran_arr_sentinel;
>  }
>
>  /* ===================== end of Knuth's code ====================== */
>
>
> -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
> r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
> Send "info", "help", or "[un]subscribe"
> (in the "body", not the subject !)  To: r-devel-request@stat.math.ethz.ch
> _._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
>

-- 
Brian D. Ripley,                  ripley@stats.ox.ac.uk
Professor of Applied Statistics,  http://www.stats.ox.ac.uk/~ripley/
University of Oxford,             Tel:  +44 1865 272861 (self)
1 South Parks Road,                     +44 1865 272860 (secr)
Oxford OX1 3TG, UK                Fax:  +44 1865 272595

-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-devel-request@stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._