public inbox for gsl-discuss@sourceware.org
 help / color / mirror / Atom feed
* Re: Multinomial randist
@ 2002-11-26 11:48 Gavin Crooks
  0 siblings, 0 replies; 3+ messages in thread
From: Gavin Crooks @ 2002-11-26 11:48 UTC (permalink / raw)
  To: gsl-discuss

[-- Attachment #1: Type: text/plain, Size: 635 bytes --]

> Gavin writes:
>  >  I've attached code for a multinomial random distribution
>  > generator, which might be of some use.
>
> Thanks.
>
> Can you modify it so that p can be unnormalized (weights). Then it
> will also work if the user provides p's that don't sum to 1, like the
> gsl_ran_discrete functions do.

That seems like a good idea. Modified diff attached.

Gavin



--
Gavin E. Crooks
Postdoctoral Fellow                  tel:  (510) 642-9614
461 Koshland Hall                    fax:  (413) 280-7813
University of California             http://threeplusone.com/
Berkeley, CA 94720-3102, USA         gec@compbio.berkeley.edu


[-- Attachment #2: multinomial2.diff --]
[-- Type: application/octet-stream, Size: 12083 bytes --]

 
Index: doc/randist.texi
===================================================================
RCS file: /cvs/gsl/gsl/doc/randist.texi,v
retrieving revision 1.48
diff -u -r1.48 randist.texi
--- doc/randist.texi	26 Aug 2002 14:15:00 -0000	1.48
+++ doc/randist.texi	25 Nov 2002 20:01:42 -0000
@@ -48,7 +48,8 @@
 * General Discrete Distributions::  
 * The Poisson Distribution::    
 * The Bernoulli Distribution::  
-* The Binomial Distribution::   
+* The Binomial Distribution::
+* The Multinomial Distribution::   
 * The Negative Binomial Distribution::  
 * The Pascal Distribution::     
 * The Geometric Distribution::  
@@ -1425,6 +1426,63 @@
 @tex
 \centerline{\input rand-binomial.tex}
 @end tex
+
+
+@page
+@node The Multinomial Distribution
+@section The Multinomial Distribution
+@deftypefn Random void gsl_ran_multinomial (const gsl_rng * @var{r}, const size_t @var{K}, const unsigned int @var{N}, const double @var{p}[], unsigned int @var{n}[])
+@cindex Multinomial distribution 
+
+This function returns an array of @var{K} random variates from a 
+multinomial distribution. The distribution function is,
+
+@tex
+\beforedisplay
+$$
+P(n_1, n_2,\cdots, n_K) = {{ N!}\over{n_1 ! n_2 ! \cdots n_K !}} \,
+  p_1^{n_1} p_2^{n_2} \cdots p_K^{n_K}
+$$
+\afterdisplay
+@end tex
+@ifinfo
+@example
+                                 N!           n_1  n_2      n_K
+ P(n_1, n_2, ... n_K) = -------------------- p_1  p_2  ... p_K
+                        (n_1! n_2! ... n_K!) 
+@end example
+@end ifinfo
+@noindent
+where @c{$n_1$, $n_2$, $\ldots$, $n_K$}
+@math{n_1, n_2, ..., n_K} 
+are nonnegative integers, 
+@c{$\sum_{k=1}^{K} n_k =N$} 
+@math{sum_{k=1,K} n_k = N},
+and
+@c{$p = (p_1, p_2, \ldots, p_K)$} 
+@math{p = (p_1, p_2, ..., p_K)}
+is a probability distribution. The array @var{p}[@var{K}] may
+contain non-negative weights that do not sum to one; it will be
+appropriately normalized.
+
+Random variates are generated using the conditional binomial method.
+
+See C.S. David, The computer generation of multinomial random variates,
+Comp. Stat. Data Anal. 16 (1993) 205-217
+@end deftypefn
+
+@deftypefun double gsl_ran_multinomial_pdf (const size_t @var{K}, const double @var{p}[], const unsigned int @var{n}[]) 
+This function computes the probability 
+@c{$P(p_1, p_2, \ldots, p_K)$}
+@math{P(p_1, p_2, ..., p_K)}
+of sampling @var{n}[@var{K}] from a multinomial distribution 
+with parameters @var{p}[@var{K}], using the formula given above.
+@end deftypefun
+
+@deftypefun double gsl_ran_multinomial_lnpdf (const size_t @var{K}, const double @var{p}[], const unsigned int @var{n}[]) 
+This function returns the logarithm of the probability density. 
+@end deftypefun
+
 
 @page
 @node The Negative Binomial Distribution
Index: randist/Makefile.am
===================================================================
RCS file: /cvs/gsl/gsl/randist/Makefile.am,v
retrieving revision 1.43
diff -u -r1.43 Makefile.am
--- randist/Makefile.am	26 Aug 2002 14:15:00 -0000	1.43
+++ randist/Makefile.am	25 Nov 2002 20:01:42 -0000
@@ -4,7 +4,7 @@
 
 INCLUDES= -I$(top_builddir)
 
-libgslrandist_la_SOURCES = bernoulli.c beta.c bigauss.c binomial.c cauchy.c chisq.c dirichlet.c discrete.c erlang.c exponential.c exppow.c fdist.c flat.c gamma.c gauss.c gausstail.c geometric.c gumbel.c hyperg.c laplace.c levy.c logarithmic.c logistic.c lognormal.c nbinomial.c pareto.c pascal.c poisson.c rayleigh.c shuffle.c sphere.c tdist.c weibull.c landau.c
+libgslrandist_la_SOURCES = bernoulli.c beta.c bigauss.c binomial.c cauchy.c chisq.c dirichlet.c discrete.c erlang.c exponential.c exppow.c fdist.c flat.c gamma.c gauss.c gausstail.c geometric.c gumbel.c hyperg.c laplace.c levy.c logarithmic.c logistic.c lognormal.c multinomial.c nbinomial.c pareto.c pascal.c poisson.c rayleigh.c shuffle.c sphere.c tdist.c weibull.c landau.c
 
 TESTS = test
 
Index: randist/gsl_randist.h
===================================================================
RCS file: /cvs/gsl/gsl/randist/gsl_randist.h,v
retrieving revision 1.38
diff -u -r1.38 gsl_randist.h
--- randist/gsl_randist.h	26 Aug 2002 14:15:01 -0000	1.38
+++ randist/gsl_randist.h	25 Nov 2002 20:01:42 -0000
@@ -112,6 +112,15 @@
 unsigned int gsl_ran_logarithmic (const gsl_rng * r, const double p);
 double gsl_ran_logarithmic_pdf (const unsigned int k, const double p);
 
+void gsl_ran_multinomial (const gsl_rng * r, const size_t K,
+                          const unsigned int N, const double p[],
+                          unsigned int n[] );
+double gsl_ran_multinomial_pdf (const size_t K,
+                                const double p[], const unsigned int n[] );
+double gsl_ran_multinomial_lnpdf (const size_t K,
+                           const double p[], const unsigned int n[] );
+
+
 unsigned int gsl_ran_negative_binomial (const gsl_rng * r, double p, double n);
 double gsl_ran_negative_binomial_pdf (const unsigned int k, const double p, double n);
 
Index: randist/test.c
===================================================================
RCS file: /cvs/gsl/gsl/randist/test.c,v
retrieving revision 1.36
diff -u -r1.36 test.c
--- randist/test.c	26 Aug 2002 14:15:01 -0000	1.36
+++ randist/test.c	25 Nov 2002 20:01:42 -0000
@@ -28,6 +28,11 @@
 #include <gsl/gsl_ieee_utils.h>
 
 #define N 100000
+
+/* Convient test dimension for multivariant distributions */
+#define MULTI_DIM 10
+
+
 void testMoments (double (*f) (void), const char *name,
 		  double a, double b, double p);
 void testPDF (double (*f) (void), double (*pdf) (double), const char *name);
@@ -153,6 +158,11 @@
 double test_lognormal_pdf (double x);
 double test_logarithmic (void);
 double test_logarithmic_pdf (unsigned int n);
+double test_multinomial (void);
+double test_multinomial_pdf (unsigned int n);
+double test_multinomial_large (void);
+double test_multinomial_large_pdf (unsigned int n);
+void test_multinomial_moments (void);
 double test_negative_binomial (void);
 double test_negative_binomial_pdf (unsigned int n);
 double test_pascal (void);
@@ -216,6 +226,7 @@
   testMoments (FUNC (discrete1), 1.5, 3.5, 0.01);
 
   test_dirichlet_moments ();
+  test_multinomial_moments ();
 
   testPDF (FUNC2 (beta));
   testPDF (FUNC2 (cauchy));
@@ -296,6 +307,8 @@
   testDiscretePDF (FUNC2 (hypergeometric5));
   testDiscretePDF (FUNC2 (hypergeometric6));
   testDiscretePDF (FUNC2 (logarithmic));
+  testDiscretePDF (FUNC2 (multinomial));
+  testDiscretePDF (FUNC2 (multinomial_large));
   testDiscretePDF (FUNC2 (negative_binomial));
   testDiscretePDF (FUNC2 (pascal));
 
@@ -791,6 +804,50 @@
 }
 
 
+/* Check that the observed means of the multinomial variables are
+   within reasonable statistical errors of their correct values. */
+
+void
+test_multinomial_moments (void)
+{
+  const unsigned int sum_n = 100;
+
+  const double p[MULTI_DIM] ={ 0.2, 0.20, 0.17, 0.14, 0.12,
+                               0.07, 0.05, 0.02, 0.02, 0.01 };
+
+  unsigned int  x[MULTI_DIM];
+  double x_sum[MULTI_DIM];
+
+  double mean, obs_mean, sd, sigma;
+  int status, k, n;
+
+  for (k = 0; k < MULTI_DIM; k++)
+    x_sum[k] =0.0;
+
+  for (n = 0; n < N; n++)
+    {
+      gsl_ran_multinomial (r_global, MULTI_DIM, sum_n, p, x);
+      for (k = 0; k < MULTI_DIM; k++)
+	x_sum[k] += x[k];
+    }
+
+  for (k = 0; k < MULTI_DIM; k++)
+    {
+      mean = p[k] * sum_n;
+      sd = p[k] * (1.-p[k]) * sum_n;
+
+      obs_mean = x_sum[k] / N;
+      sigma = sqrt ((double) N) * fabs (mean - obs_mean) / sd;
+
+      status = (sigma > 3.0);
+
+      gsl_test (status,
+		"test gsl_ran_multinomial: mean (%g observed vs %g expected)",
+		obs_mean, mean);
+    }
+}
+
+
 static gsl_ran_discrete_t *g1 = NULL;
 static gsl_ran_discrete_t *g2 = NULL;
 
@@ -1456,6 +1513,56 @@
 test_lognormal_pdf (double x)
 {
   return gsl_ran_lognormal_pdf (x, 2.7, 1.3);
+}
+
+double
+test_multinomial (void)
+{
+  const size_t K = 3;
+  const unsigned int sum_n = BINS;
+  unsigned int n[3];
+  /* Test use of weights instead of probabilities. */
+  const double p[] = { 2., 7., 1.};
+
+  gsl_ran_multinomial ( r_global, K, sum_n, p, n);
+
+  return n[0];
+}
+
+double
+test_multinomial_pdf (unsigned int n_0)
+{
+  /* The margional distribution of just 1 variate  is binomial. */
+  size_t K = 2;
+  /* Test use of weights instead of probabilities */
+  double p[] = { 0.4, 1.6};
+  const unsigned int sum_n = BINS;
+  unsigned int n[2];
+
+  n[0] = n_0;
+  n[1] =sum_n - n_0;
+
+  return gsl_ran_multinomial_pdf (K, p, n);
+}
+
+
+double
+test_multinomial_large (void)
+{
+  const unsigned int sum_n = BINS;
+  unsigned int n[MULTI_DIM];
+  const double p[MULTI_DIM] = { 0.2, 0.20, 0.17, 0.14, 0.12,
+                                0.07, 0.05, 0.04, 0.01, 0.00  };
+
+  gsl_ran_multinomial ( r_global, MULTI_DIM, sum_n, p, n);
+
+  return n[0];
+}
+
+double
+test_multinomial_large_pdf (unsigned int n_0)
+{
+  return test_multinomial_pdf(n_0);
 }
 
 double


--- randist/multinomial.c	Mon Nov 25 12:02:25 2002
+++ randist/multinomial.c	Sun Nov 24 22:37:35 2002
@@ -0,0 +1,108 @@
+/* randist/multinomial.c
+ * 
+ * Copyright (C) 2002 Gavin E. Crooks <gec@compbio.berkeley.edu>
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <config.h>
+#include <math.h>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_sf_gamma.h>
+
+/* The multinomial distribution has the form
+
+                                      N!           n_1  n_2      n_K
+   prob(n_1, n_2, ... n_K) = -------------------- p_1  p_2  ... p_K
+                             (n_1! n_2! ... n_K!) 
+
+   where n_1, n_2, ... n_K are nonnegative integers, sum_{k=1,K} n_k = N,
+   and p = (p_1, p_2, ..., p_K) is a probability distribution. 
+
+   Random variates are generated using the conditional binomial method.
+   This scales well with N and does not require a setup step.
+
+   Ref: 
+   C.S. David, The computer generation of multinomial random variates,
+   Comp. Stat. Data Anal. 16 (1993) 205-217
+*/
+  
+void
+gsl_ran_multinomial (const gsl_rng * r, const size_t K,
+                     const unsigned int N, const double p[],
+                     unsigned int n[] )
+{
+  size_t k;
+  double norm =0.0;
+  double sum_p = 0.0;
+  
+  unsigned int sum_n = 0;
+
+  /* p[k] may contain non-negative weights that do not sum to 1.0.
+   * Even a probability distribution will not exactly sum to 1.0
+   * due to rounding errors. 
+   * Also, worry about what happens when p[K-1] = 0.0;
+   */
+  for(k=0; k<K; k++) 
+    norm += p[k];
+
+  for(k=0; k<K && sum_p<norm; k++)
+    {     
+      n[k] = gsl_ran_binomial (r, p[k] / (norm -sum_p), N - sum_n);
+      sum_p += p[k];
+      sum_n += n[k];
+    }
+
+}
+
+
+double
+gsl_ran_multinomial_pdf (const size_t K,
+                         const double p[], const unsigned int n[] )
+{
+  return exp( gsl_ran_multinomial_lnpdf( K, p, n) );
+}
+     
+
+double
+gsl_ran_multinomial_lnpdf (const size_t K,
+                           const double p[], const unsigned int n[] )
+{
+  size_t k;
+  unsigned int N = 0;
+  double log_pdf = 0.0;
+  double norm = 0.0;
+  double invnorm = 0.0;
+
+  for(k=0; k<K; k++) 
+    N += n[k];
+
+  for(k=0; k<K; k++) 
+    norm += p[k];
+  invnorm = 1./norm;
+
+
+  /* Note: n! == gamma(n+1) */
+  log_pdf = gsl_sf_lngamma( N+1 );
+
+  for(k=0; k<K; k++) 
+    log_pdf -= gsl_sf_lngamma( n[k]+1);
+
+  for(k=0; k<K; k++)
+    log_pdf += log(invnorm * p[k]) * n[k];  
+  
+  return log_pdf;
+}

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: Multinomial randist
  2002-11-24  5:54 Gavin
@ 2002-11-24 11:38 ` Brian Gough
  0 siblings, 0 replies; 3+ messages in thread
From: Brian Gough @ 2002-11-24 11:38 UTC (permalink / raw)
  To: Gavin; +Cc: gsl-discuss

Gavin writes:
 >  I've attached code for a multinomial random distribution
 > generator, which might be of some use.

Thanks.

Can you modify it so that p can be unnormalized (weights). Then it
will also work if the user provides p's that don't sum to 1, like the
gsl_ran_discrete functions do.


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Multinomial randist
@ 2002-11-24  5:54 Gavin
  2002-11-24 11:38 ` Brian Gough
  0 siblings, 1 reply; 3+ messages in thread
From: Gavin @ 2002-11-24  5:54 UTC (permalink / raw)
  To: gsl-discuss

[-- Attachment #1: Type: text/plain, Size: 923 bytes --]

Hi,

I've attached code for a multinomial random distribution generator, 
which
might be of some use. I used the conditional bionomial method, which,
unlike the implementation that was floating around the list in July, 
does
not become much slower as N, the total number of samples, increases.

As well as the standard gsl_ran_multinomial and gsl_ran_multinomial_pdf
functions, I also added a gsl_ran_multinomial_lnpdf function, the log
of the pdf. I've found that the pdf function is useless when the
dimension of the distribution is large; then any point in the 
distribution
will be of such low probability that gsl_ran_multinomial_pdf returns 
zero.


Gavin

--
Gavin E. Crooks
Postdoctoral Fellow                  tel:  (510) 642-9614
461 Koshland Hall                    fax:  (413) 280-7813
University of California             http://threeplusone.com/
Berkeley, CA 94720-3102, USA         gec@compbio.berkeley.edu


[-- Attachment #2: multinomial.diff --]
[-- Type: application/octet-stream, Size: 11575 bytes --]


 
Index: doc/randist.texi
===================================================================
RCS file: /cvs/gsl/gsl/doc/randist.texi,v
retrieving revision 1.48
diff -u -r1.48 randist.texi
--- doc/randist.texi	26 Aug 2002 14:15:00 -0000	1.48
+++ doc/randist.texi	23 Nov 2002 00:47:57 -0000
@@ -48,7 +48,8 @@
 * General Discrete Distributions::  
 * The Poisson Distribution::    
 * The Bernoulli Distribution::  
-* The Binomial Distribution::   
+* The Binomial Distribution::
+* The Multinomial Distribution::   
 * The Negative Binomial Distribution::  
 * The Pascal Distribution::     
 * The Geometric Distribution::  
@@ -1425,6 +1426,61 @@
 @tex
 \centerline{\input rand-binomial.tex}
 @end tex
+
+
+@page
+@node The Multinomial Distribution
+@section The Multinomial Distribution
+@deftypefn Random void gsl_ran_multinomial (const gsl_rng * @var{r}, const size_t @var{K}, const unsigned int @var{N}, const double @var{p}[], unsigned int @var{n}[])
+@cindex Multinomial distribution 
+
+This function returns an array of @var{K} random variates from a 
+multinomial distribution. The distribution function is,
+
+@tex
+\beforedisplay
+$$
+P(n_1, n_2,\cdots, n_K) = {{ N!}\over{n_1 ! n_2 ! \cdots n_K !}} \,
+  p_1^{n_1} p_2^{n_2} \cdots p_K^{n_K}
+$$
+\afterdisplay
+@end tex
+@ifinfo
+@example
+                                   N!           n_1  n_2      n_K
+   P(n_1, n_2, ... n_K) = -------------------- p_1  p_2  ... p_K
+                          (n_1! n_2! ... n_K!) 
+@end example
+@end ifinfo
+@noindent
+where @c{$n_1$, $n_2$, $\ldots$, $n_K$}
+@math{n_1, n_2, ..., n_K} 
+are nonnegative integers, 
+@c{$\sum_{k=1}^{K} n_k =N$} 
+@math{sum_{k=1,K} n_k = N},
+and
+@c{$p = (p_1, p_2, \ldots, p_K)$} 
+@math{p = (p_1, p_2, ..., p_K)}
+is a probability distribution.
+
+Random variates are generated using the conditional binomial method.
+
+See C.S. David, The computer generation of multinomial random variates,
+Comp. Stat. Data Anal. 16 (1993) 205-217
+@end deftypefn
+
+@deftypefun double gsl_ran_multinomial_pdf (const size_t @var{K}, const double @var{p}[], const unsigned int @var{n}[]) 
+This function computes the probability 
+@c{$P(p_1, p_2, \ldots, p_K)$}
+@math{P(p_1, p_2, ..., p_K)}
+of sampling @var{n}[@var{K}] from a multinomial distribution 
+with parameters @var{p}[@var{K}], using the formula given above.
+@end deftypefun
+
+@deftypefun double gsl_ran_multinomial_lnpdf (const size_t @var{K}, const double @var{p}[], const unsigned int @var{n}[]) 
+This function returns the logarithm of the probability density. 
+@end deftypefun
+
 
 @page
 @node The Negative Binomial Distribution
Index: randist/Makefile.am
===================================================================
RCS file: /cvs/gsl/gsl/randist/Makefile.am,v
retrieving revision 1.43
diff -u -r1.43 Makefile.am
--- randist/Makefile.am	26 Aug 2002 14:15:00 -0000	1.43
+++ randist/Makefile.am	23 Nov 2002 00:47:57 -0000
@@ -4,7 +4,7 @@
 
 INCLUDES= -I$(top_builddir)
 
-libgslrandist_la_SOURCES = bernoulli.c beta.c bigauss.c binomial.c cauchy.c chisq.c dirichlet.c discrete.c erlang.c exponential.c exppow.c fdist.c flat.c gamma.c gauss.c gausstail.c geometric.c gumbel.c hyperg.c laplace.c levy.c logarithmic.c logistic.c lognormal.c nbinomial.c pareto.c pascal.c poisson.c rayleigh.c shuffle.c sphere.c tdist.c weibull.c landau.c
+libgslrandist_la_SOURCES = bernoulli.c beta.c bigauss.c binomial.c cauchy.c chisq.c dirichlet.c discrete.c erlang.c exponential.c exppow.c fdist.c flat.c gamma.c gauss.c gausstail.c geometric.c gumbel.c hyperg.c laplace.c levy.c logarithmic.c logistic.c lognormal.c multinomial.c nbinomial.c pareto.c pascal.c poisson.c rayleigh.c shuffle.c sphere.c tdist.c weibull.c landau.c
 
 TESTS = test
 
Index: randist/gsl_randist.h
===================================================================
RCS file: /cvs/gsl/gsl/randist/gsl_randist.h,v
retrieving revision 1.38
diff -u -r1.38 gsl_randist.h
--- randist/gsl_randist.h	26 Aug 2002 14:15:01 -0000	1.38
+++ randist/gsl_randist.h	23 Nov 2002 00:47:57 -0000
@@ -112,6 +112,15 @@
 unsigned int gsl_ran_logarithmic (const gsl_rng * r, const double p);
 double gsl_ran_logarithmic_pdf (const unsigned int k, const double p);
 
+void gsl_ran_multinomial (const gsl_rng * r, const size_t K,
+                          const unsigned int N, const double p[],
+                          unsigned int n[] );
+double gsl_ran_multinomial_pdf (const size_t K,
+                                const double p[], const unsigned int n[] );
+double gsl_ran_multinomial_lnpdf (const size_t K,
+                           const double p[], const unsigned int n[] );
+
+
 unsigned int gsl_ran_negative_binomial (const gsl_rng * r, double p, double n);
 double gsl_ran_negative_binomial_pdf (const unsigned int k, const double p, double n);
 
Index: randist/test.c
===================================================================
RCS file: /cvs/gsl/gsl/randist/test.c,v
retrieving revision 1.36
diff -u -r1.36 test.c
--- randist/test.c	26 Aug 2002 14:15:01 -0000	1.36
+++ randist/test.c	23 Nov 2002 00:47:57 -0000
@@ -28,6 +28,11 @@
 #include <gsl/gsl_ieee_utils.h>
 
 #define N 100000
+
+/* Convient test dimension for multivariant distributions */
+#define MULTI_DIM 10
+
+
 void testMoments (double (*f) (void), const char *name,
 		  double a, double b, double p);
 void testPDF (double (*f) (void), double (*pdf) (double), const char *name);
@@ -153,6 +158,11 @@
 double test_lognormal_pdf (double x);
 double test_logarithmic (void);
 double test_logarithmic_pdf (unsigned int n);
+double test_multinomial (void);
+double test_multinomial_pdf (unsigned int n);
+double test_multinomial_large (void);
+double test_multinomial_large_pdf (unsigned int n);
+void test_multinomial_moments (void);
 double test_negative_binomial (void);
 double test_negative_binomial_pdf (unsigned int n);
 double test_pascal (void);
@@ -216,6 +226,7 @@
   testMoments (FUNC (discrete1), 1.5, 3.5, 0.01);
 
   test_dirichlet_moments ();
+  test_multinomial_moments ();
 
   testPDF (FUNC2 (beta));
   testPDF (FUNC2 (cauchy));
@@ -296,6 +307,8 @@
   testDiscretePDF (FUNC2 (hypergeometric5));
   testDiscretePDF (FUNC2 (hypergeometric6));
   testDiscretePDF (FUNC2 (logarithmic));
+  testDiscretePDF (FUNC2 (multinomial));
+  testDiscretePDF (FUNC2 (multinomial_large));
   testDiscretePDF (FUNC2 (negative_binomial));
   testDiscretePDF (FUNC2 (pascal));
 
@@ -791,6 +804,50 @@
 }
 
 
+/* Check that the observed means of the multinomial variables are
+   within reasonable statistical errors of their correct values. */
+
+void
+test_multinomial_moments (void)
+{
+  const unsigned int sum_n = 100;
+
+  const double p[MULTI_DIM] ={ 0.2, 0.20, 0.17, 0.14, 0.12,
+                               0.07, 0.05, 0.02, 0.02, 0.01 };
+
+  unsigned int  x[MULTI_DIM];
+  double x_sum[MULTI_DIM];
+
+  double mean, obs_mean, sd, sigma;
+  int status, k, n;
+
+  for (k = 0; k < MULTI_DIM; k++)
+    x_sum[k] =0.0;
+
+  for (n = 0; n < N; n++)
+    {
+      gsl_ran_multinomial (r_global, MULTI_DIM, sum_n, p, x);
+      for (k = 0; k < MULTI_DIM; k++)
+	x_sum[k] += x[k];
+    }
+
+  for (k = 0; k < MULTI_DIM; k++)
+    {
+      mean = p[k] * sum_n;
+      sd = p[k] * (1.-p[k]) * sum_n;
+
+      obs_mean = x_sum[k] / N;
+      sigma = sqrt ((double) N) * fabs (mean - obs_mean) / sd;
+
+      status = (sigma > 3.0);
+
+      gsl_test (status,
+		"test gsl_ran_multinomial: mean (%g observed vs %g expected)",
+		obs_mean, mean);
+    }
+}
+
+
 static gsl_ran_discrete_t *g1 = NULL;
 static gsl_ran_discrete_t *g2 = NULL;
 
@@ -1456,6 +1513,55 @@
 test_lognormal_pdf (double x)
 {
   return gsl_ran_lognormal_pdf (x, 2.7, 1.3);
+}
+
+double
+test_multinomial (void)
+{
+  const size_t K = 3;
+  const unsigned int sum_n = BINS;
+  unsigned int n[3];
+  const double p[] = { 0.2, 0.7, 0.1};
+
+  gsl_ran_multinomial ( r_global, K, sum_n, p, n);
+
+  return n[0];
+}
+
+double
+test_multinomial_pdf (unsigned int n_0)
+{
+  // The margional distribution of just 1 variate 
+  // is binomial.
+  size_t K = 2;
+  double p[] = { 0.2, 0.8};
+  const unsigned int sum_n = BINS;
+  unsigned int n[2];
+
+  n[0] = n_0;
+  n[1] =sum_n - n_0;
+
+  return gsl_ran_multinomial_pdf (K, p, n);
+}
+
+
+double
+test_multinomial_large (void)
+{
+  const unsigned int sum_n = BINS;
+  unsigned int n[MULTI_DIM];
+  const double p[MULTI_DIM] = { 0.2, 0.20, 0.17, 0.14, 0.12,
+                                0.07, 0.05, 0.04, 0.01, 0.00  };
+
+  gsl_ran_multinomial ( r_global, MULTI_DIM, sum_n, p, n);
+
+  return n[0];
+}
+
+double
+test_multinomial_large_pdf (unsigned int n_0)
+{
+  return test_multinomial_pdf(n_0);
 }
 
 double

--- randist/multinomial.c	Fri Nov 22 16:48:12 2002
+++ randist/multinomial.c	Fri Nov 22 16:44:33 2002
@@ -0,0 +1,96 @@
+/* randist/multinomial.c
+ * 
+ * Copyright (C) 2002 Gavin E. Crooks <gec@compbio.berkeley.edu>
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <config.h>
+#include <math.h>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_sf_gamma.h>
+
+/* The multinomial distribution has the form
+
+                                      N!           n_1  n_2      n_K
+   prob(n_1, n_2, ... n_K) = -------------------- p_1  p_2  ... p_K
+                             (n_1! n_2! ... n_K!) 
+
+   where n_1, n_2, ... n_K are nonnegative integers, sum_{k=1,K} n_k = N,
+   and p = (p_1, p_2, ..., p_K) is a probability distribution.
+
+   Random variates are generated using the conditional binomial method.
+   This scales well with N and does not require a setup step.
+
+   Ref: 
+   C.S. David, The computer generation of multinomial random variates,
+   Comp. Stat. Data Anal. 16 (1993) 205-217
+*/
+  
+void
+gsl_ran_multinomial (const gsl_rng * r, const size_t K,
+                     const unsigned int N, const double p[],
+                     unsigned int n[] )
+{
+  size_t k;
+  double almost_1 = 0.0;
+  double sum_p = 0.0;
+  unsigned int sum_n = 0;
+
+  /* The sum of p[k] will not be exactly 1.0 due to rounding errors */
+  for(k=0; k<K; k++) 
+    almost_1 += p[k];
+  
+  for(k=0; k<K; k++)
+    {
+      n[k] = gsl_ran_binomial (r, p[k] / (1. -sum_p/almost_1), N - sum_n);
+      sum_p += p[k];
+      sum_n += n[k];
+    }
+
+}
+
+
+double
+gsl_ran_multinomial_pdf (const size_t K,
+                         const double p[], const unsigned int n[] )
+{
+  return exp( gsl_ran_multinomial_lnpdf( K, p, n) );
+}
+     
+
+double
+gsl_ran_multinomial_lnpdf (const size_t K,
+                           const double p[], const unsigned int n[] )
+{
+  size_t k;
+  unsigned int N = 0;
+  double log_pdf = 0.0;
+
+  for(k=0; k<K; k++) 
+    N += n[k];
+
+  /* Note: n! == gamma(n+1) */
+  log_pdf = gsl_sf_lngamma( N+1 );
+
+  for(k=0; k<K; k++) 
+    log_pdf -= gsl_sf_lngamma( n[k]+1);
+
+  for(k=0; k<K; k++)
+    log_pdf += log(p[k]) * n[k];  
+  
+  return log_pdf;
+}

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2002-11-25 20:14 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-11-26 11:48 Multinomial randist Gavin Crooks
  -- strict thread matches above, loose matches on Subject: below --
2002-11-24  5:54 Gavin
2002-11-24 11:38 ` Brian Gough

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).