public inbox for gsl-discuss@sourceware.org
 help / color / mirror / Atom feed
* Multinomial CDF
@ 2008-11-17  9:14 Glenn.Stone
  0 siblings, 0 replies; only message in thread
From: Glenn.Stone @ 2008-11-17  9:14 UTC (permalink / raw)
  To: gsl-discuss

[-- Attachment #1: Type: text/plain, Size: 815 bytes --]


Hi,

 I have written a piece of code to implement the multinomial CDF. GSL currently provides random variates from a multinomial and the multinomial PDF. The CDF is implemented as the approximation from,

"A Representation for Multinomial Cumulative Distribution Functions",  Bruce Levin, The Annals of Statistics, v.9, n.5, pp.1123-1126, 1981

The approximation is meant to be pretty good.

Because the multinomial is a vector random variate the "P" version of the CDF is implemented.

The P version corresponds to P(X1<=n1,X2<=n2,...,XK<=nK)

In keeping with the pdf function already in GSL no checking of the consistency/validity of the inputs is done.

Code attached along with a small test function.


Glenn Stone
CSIRO Mathematical and Information Sciences
North Ryde
+61 2 9325 3259


[-- Attachment #2: test.c --]
[-- Type: text/plain, Size: 830 bytes --]

#include <stdio.h>
#include "multinomial.h"

/* this is a test example taken from the paper ,see multinomial.c
 * gcc -Wall -static -g test.c multinomial.c -lgsl -lm
 * or 
 * gcc -Wall -Xlinker --allow-shlib-undefined -g test.c multinomial.c -lgsl -lm
 * (Not sure whay it needs the allow-shlib-undefined, but otherwise lots of unresolved cblas errors)
 */

int main() {
  double p[12], res, lb, ub;
  unsigned int n[12];
  int i;

  for(i=0; i<12; i++ ) {
    p[i] = 1/12.0;
    n[i] = 3;
  }

  res = cdf_multinomial_P(12,12,p,n);

  /* Result should be 0.8367 */
  printf("%g\n", res);

  /* Result should be (0.8340,0.8461) */
  cdf_multinomial_BMboundsP(12,12,p,n,&lb,&ub);
  printf("(%g,%g)\n", lb, ub);


  res = cdf_multinomial_Pmax(12,12,3);
  /* Result should be 0.8367 */
  printf("%g\n", res);

  return(0);
}
  

  

[-- Attachment #3: multinomial.c --]
[-- Type: text/plain, Size: 3786 bytes --]

#include <math.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_cdf.h>
#include <gsl/gsl_randist.h>

/* Computes the (log) CDF of a multinomial distribution with parameters p and N
 * i.e. P(X1<=n1,X2<=n2,...,XK<=nK)
 * Uses a approximation from 
 * "A Representation for Multinomial Cumulative Distribution Functions", 
 * Bruce Levin, The Annals of Statistics, v.9, n.5, pp.1123-1126, 1981
 */

double
cdf_multinomial_lnP (const size_t K, const unsigned int N,
                           const double p[], const unsigned int n[])
{
  size_t k;
  double log_cdf = 0.0;
  double s;
  double gamma1=0.0, gamma2=0.0, sum_s2=0.0, sum_mu=0.0;
  double mu, s2, sp, pcdf, mr, mf2, mf3, mf4, mu2, mu3, mu4, x, x2, PWN;
  
  s = (double) N;
  log_cdf = -log(gsl_ran_poisson_pdf(N,s));

  /* This is the P(W=N) bit */
  for(k=0; k<K; k++) {
    sp = s*p[k];

    pcdf = gsl_cdf_poisson_P(n[k],sp);
    log_cdf += log(pcdf);

    mu = sp*(1-gsl_ran_poisson_pdf(n[k],sp)/pcdf);
    s2 = mu-(n[k]-mu)*(sp-mu);

    /* factorial moments? */
    mr = n[k];
    mf2= sp*mu-mr*(sp-mu);

    mr *= n[k]-1;
    mf3 = sp*mf2-mr*(sp-mu);
    
    mr *= n[k]-2;
    mf4 = sp*mf3-mr*(sp-mu);

    /* Central Moments */
    mu2 = mf2+mu*(1-mu);
    mu3 = mf3+mf2*(3-3*mu)+mu*(1+mu*(-3+2*mu));
    mu4 = mf4+mf3*(6-4*mu)+mf2*(7+mu*(-12+6*mu))+mu*(1+mu*(-4+mu*(6-3*mu)));

    /* accumulate coef skewness and excess */
    gamma1 += mu3;
    gamma2 += mu4-3*s2*s2;
    sum_mu += mu;
    sum_s2 += s2;
  }
  
  sp = sqrt(sum_s2);
  gamma1 /= sum_s2*sp;
  gamma2 /= sum_s2*sum_s2;

  x = (N-sum_mu)/sp;
  x2 = x*x;
  PWN = -x2/2
    +log(1+gamma1/6*x*(x2-3)+gamma2/24*(x2*x2-6*x2+3)+
	 gamma1*gamma1/72*(((x2-15)*x2+45)*x2-15))
    -log(2*M_PI)/2 -log(sp);

  log_cdf += PWN;

  return log_cdf;
}

double
cdf_multinomial_P (const size_t K, const unsigned int N,
                         const double p[], const unsigned int n[])
{
  return exp (cdf_multinomial_lnP (K, N, p, n));
}

/* Computes the Bonferroni-Mallows bounds for the CDF */

void
cdf_multinomial_BMboundsP (const size_t K, const unsigned int N,
			   const double p[], const unsigned int n[], double *lb, double *ub)
{
  double LB = 1.0, UB = 1.0;
  size_t k;
  
  for(k=0; k<K; k++) {
    LB -= gsl_cdf_binomial_Q(n[k],p[k],N);
    UB *= gsl_cdf_binomial_P(n[k],p[k],N); 
  }

  *lb = LB;
  *ub = UB;
}
  

  
/* Special case with all p equal and all n equal
 * ei. P(max_k X_k <= n) when p_k = p for all k
 */

double
cdf_multinomial_lnPmax (const size_t K, const unsigned int N,
			const unsigned int n)
{
  double log_cdf = 0.0;
  double p, s;
  double gamma1, gamma2;
  double mu, s2, sp, pcdf, mr, mf2, mf3, mf4, mu2, mu3, mu4, x, x2, PWN;
  
  p = 1/((double) K);
  s = (double) N;

  log_cdf = -log(gsl_ran_poisson_pdf(N,s));

  sp = s*p;
  pcdf = gsl_cdf_poisson_P(n,sp);
  log_cdf += K*log(pcdf);

  /* This is the P(W=N) bit */
  mu = sp*(1-gsl_ran_poisson_pdf(n,sp)/pcdf);
  s2 = mu-(n-mu)*(sp-mu);

  /* factorial moments? */
  mr = n;
  mf2= sp*mu-mr*(sp-mu);

  mr *= n-1;
  mf3 = sp*mf2-mr*(sp-mu);
    
  mr *= n-2;
  mf4 = sp*mf3-mr*(sp-mu);

    /* Central Moments */
  mu2 = mf2+mu*(1-mu);
  mu3 = mf3+mf2*(3-3*mu)+mu*(1+mu*(-3+2*mu));
  mu4 = mf4+mf3*(6-4*mu)+mf2*(7+mu*(-12+6*mu))+mu*(1+mu*(-4+mu*(6-3*mu)));

    /* accumulate coef skewness and excess */
  sp = sqrt(K*s2);
  gamma1 = mu3/(sp*s2);
  gamma2 = (mu4-3*s2*s2)/(s2*s2*K);

  x = (N-K*mu)/sp;
  x2 = x*x;
  PWN = -x2/2
    +log(1+gamma1/6*x*(x2-3)+gamma2/24*(x2*x2-6*x2+3)+
	 gamma1*gamma1/72*(((x2-15)*x2+45)*x2-15))
    -log(2*M_PI)/2 -log(sp);

  log_cdf += PWN;

  return log_cdf;
}

double
cdf_multinomial_Pmax (const size_t K, const unsigned int N,
		      const unsigned int n)
{
  return exp (cdf_multinomial_lnPmax (K, N, n));
}

[-- Attachment #4: multinomial.h --]
[-- Type: text/plain, Size: 604 bytes --]

extern double
cdf_multinomial_lnP (const size_t K, const unsigned int N,
		     const double p[], const unsigned int n[]);

extern double
cdf_multinomial_P (const size_t K, const unsigned int N,
		   const double p[], const unsigned int n[]);

extern void
cdf_multinomial_BMboundsP (const size_t K, const unsigned int N,
			   const double p[], const unsigned int n[], double *lb, double *ub);


extern double
cdf_multinomial_lnPmax (const size_t K, const unsigned int N,
			const unsigned int n);

extern double
cdf_multinomial_Pmax (const size_t K, const unsigned int N,
		      const unsigned int n);

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2008-11-17  9:14 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-11-17  9:14 Multinomial CDF Glenn.Stone

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).