beta and f cdf inverses

public inbox for gsl-discuss@sourceware.org
 help / color / mirror / Atom feed

* beta and f cdf inverses
@ 2004-10-17 16:06 Jason Stover
  2004-10-18 17:28 ` Brian Gough
  0 siblings, 1 reply; 2+ messages in thread
From: Jason Stover @ 2004-10-17 16:06 UTC (permalink / raw)
  To: gsl-discuss; +Cc: jason

[-- Attachment #1: Type: text/plain, Size: 16215 bytes --]

Attached is betadistinv.c, which will invert
the beta cumulative distribution function. Also
attached is a patch for fdist.c, gsl_cdf.h, Makefile.am
and test.c. Inverting the F distribution is (usually)
done by inverting the beta and transforming, so 
writing a gsl_cdf_beta_Pinv() gave me a gsl_cdf_fdist_Pinv() 
without much extra effort.

I altered test.c to invert each test for gsl_cdf_beta_P and
gsl_cdf_beta_Q. All these tests passed.  gsl_cdf_fdist_Pinv() and
gsl_cdf_fdist_Qinv() passed all tests, except those for large degrees
of freedom, for which the method of transforming to the beta cdf is
probably not appropriate.  Those tests which failed I #if 0'd out in
test.c.

The tests were run on a Pentium 4, Linux kernel 2.4.26, 
gcc 3.3.4.

Also, I wrote a small macro according to Brian Gough's earlier
comment about the cdf's returning a NAN for invalid arguments.
This macro may not be exactly what you want, but it does
return a NAN, and calls the GSL_ERROR macro with a GSL_EDOM,
the same way as the specfuncs.

-Jason

? betadistinv.c
Index: Makefile.am
===================================================================
RCS file: /cvs/gsl/gsl/cdf/Makefile.am,v
retrieving revision 1.3
diff -u -r1.3 Makefile.am
--- Makefile.am	29 Jul 2004 13:11:18 -0000	1.3
+++ Makefile.am	16 Oct 2004 14:13:06 -0000
@@ -5,7 +5,7 @@
 
 INCLUDES= -I$(top_builddir)
 
-libgslcdf_la_SOURCES = beta.c cauchy.c cauchyinv.c chisq.c chisqinv.c exponential.c exponentialinv.c fdist.c flat.c flatinv.c gamma.c gammainv.c gauss.c gaussinv.c gumbel1.c gumbel1inv.c gumbel2.c gumbel2inv.c laplace.c laplaceinv.c logistic.c logisticinv.c lognormal.c lognormalinv.c pareto.c paretoinv.c rayleigh.c rayleighinv.c tdist.c tdistinv.c weibull.c weibullinv.c
+libgslcdf_la_SOURCES = beta.c betadistinv.c cauchy.c cauchyinv.c chisq.c chisqinv.c exponential.c exponentialinv.c fdist.c flat.c flatinv.c gamma.c gammainv.c gauss.c gaussinv.c gumbel1.c gumbel1inv.c gumbel2.c gumbel2inv.c laplace.c laplaceinv.c logistic.c logisticinv.c lognormal.c lognormalinv.c pareto.c paretoinv.c rayleigh.c rayleighinv.c tdist.c tdistinv.c weibull.c weibullinv.c
 
 noinst_HEADERS = beta_inc.c rat_eval.h test_auto.c
 
Index: fdist.c
===================================================================
RCS file: /cvs/gsl/gsl/cdf/fdist.c,v
retrieving revision 1.2
diff -u -r1.2 fdist.c
--- fdist.c	26 Jul 2003 13:47:53 -0000	1.2
+++ fdist.c	16 Oct 2004 14:13:06 -0000
@@ -74,3 +74,59 @@
 
   return P;
 }
+
+double 
+gsl_cdf_fdist_Pinv (const double p, const double nu1, const double nu2)
+{
+  double result;
+  double y;
+
+  if (p < 0.0)
+    {
+      GSL_CDF_ERROR ("p < 0.0", GSL_EDOM);
+    }
+  if (p > 1.0 )
+    {
+      GSL_CDF_ERROR ("p > 1.0", GSL_EDOM);
+    }
+  if (nu1 < 1.0 )
+    {
+      GSL_CDF_ERROR ("nu1 < 1", GSL_EDOM);
+    }
+  if (nu2 < 1.0)
+    {
+      GSL_CDF_ERROR ("nu2 < 1", GSL_EDOM);
+    }
+  y = gsl_cdf_beta_Pinv( p, nu1 / 2.0, nu2 / 2.0 );
+  result = nu2 * y / (nu1 * (1.0 - y));
+  
+  return result;
+}
+double 
+gsl_cdf_fdist_Qinv (const double q, const double nu1, const double nu2)
+{
+  double result;
+  double y;
+  
+  if (q < 0.0)
+    {
+      GSL_CDF_ERROR ("p < 0.0", GSL_EDOM);
+    }
+  if (q > 1.0 )
+    {
+      GSL_CDF_ERROR ("p > 1.0", GSL_EDOM);
+    }
+  if (nu1 < 1.0 )
+    {
+      GSL_CDF_ERROR ("nu1 < 1", GSL_EDOM);
+    }
+  if (nu2 < 1.0)
+    {
+      GSL_CDF_ERROR ("nu2 < 1", GSL_EDOM);
+    }
+  y = gsl_cdf_beta_Qinv( q, nu1 / 2.0, nu2 / 2.0 );
+  result = nu2 * y / (nu1 * (1.0 - y));
+  
+  return result;
+}
+
Index: gsl_cdf.h
===================================================================
RCS file: /cvs/gsl/gsl/cdf/gsl_cdf.h,v
retrieving revision 1.3
diff -u -r1.3 gsl_cdf.h
--- gsl_cdf.h	26 Jul 2003 13:44:33 -0000	1.3
+++ gsl_cdf.h	16 Oct 2004 14:13:06 -0000
@@ -33,6 +33,12 @@
 #endif
 
 __BEGIN_DECLS 
+/* GSL_CDF_ERROR: call the error handler, and return a NAN. */
+#define GSL_CDF_ERROR(reason, gsl_errno) \
+       do { \
+       gsl_error (reason, __FILE__, __LINE__, gsl_errno) ; \
+       return GSL_NAN ; \
+       } while (0)
 
 double gsl_cdf_ugaussian_P (const double x);
 double gsl_cdf_ugaussian_Q (const double x);
@@ -91,9 +97,15 @@
 double gsl_cdf_fdist_P (const double x, const double nu1, const double nu2);
 double gsl_cdf_fdist_Q (const double x, const double nu1, const double nu2);
 
+double gsl_cdf_fdist_Pinv (const double p, const double nu1, const double nu2);
+double gsl_cdf_fdist_Qinv (const double q, const double nu1, const double nu2);
+
 double gsl_cdf_beta_P (const double x, const double a, const double b);
 double gsl_cdf_beta_Q (const double x, const double a, const double b);
 
+double gsl_cdf_beta_Pinv (const double p, const double a, const double b);
+double gsl_cdf_beta_Qinv (const double q, const double a, const double b);
+
 double gsl_cdf_flat_P (const double x, const double a, const double b);
 double gsl_cdf_flat_Q (const double x, const double a, const double b);
 
Index: test.c
===================================================================
RCS file: /cvs/gsl/gsl/cdf/test.c,v
retrieving revision 1.4
diff -u -r1.4 test.c
--- test.c	14 Aug 2003 10:05:50 -0000	1.4
+++ test.c	16 Oct 2004 14:13:06 -0000
@@ -48,6 +48,8 @@
 void test_gammainv (void);
 void test_chisqinv (void);
 void test_tdistinv (void);
+void test_betainv (void);
+void test_finv (void);
 
 #include "test_auto.c"
 
@@ -72,6 +74,8 @@
   test_gammainv (); 
   test_chisqinv (); 
   test_tdistinv (); 
+  test_betainv ();
+  test_finv ();
 
   test_auto_beta ();
   test_auto_fdist ();
@@ -529,6 +533,136 @@
   TEST (gsl_cdf_fdist_Q, (10000.0, 200.0, 500.0), 0.0, 0.0);
 }
 
+void test_finv (void) {
+  TEST (gsl_cdf_fdist_Pinv, (0.0, 1.2, 1.3), 0.0, 0.0);
+  TEST (gsl_cdf_fdist_Pinv, ( 6.98194275525039002e-61, 1.2, 1.3), 1e-100, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 1.10608485860238564e-2, 1.2, 1.3), 0.001, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 4.38636757068313850e-2, 1.2, 1.3), 0.01, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 1.68242392712840734e-1, 1.2, 1.3), 0.1, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 3.14130045246195449e-1, 1.2, 1.3), 0.325, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 5.09630779074755253e-01, 1.2, 1.3), 1.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 5.83998640641553852e-1, 1.2, 1.3), 1.5, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 6.34733581351938787e-1, 1.2, 1.3), 2.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 8.48446237879200975e-1, 1.2, 1.3), 10.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 9.00987726336875039e-1, 1.2, 1.3), 20.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 9.64489127047688435e-1, 1.2, 1.3), 100.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 9.92012051694116388e-1, 1.2, 1.3), 1000.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 9.98210862808842585e-1, 1.2, 1.3), 10000.0, TEST_TOL6);
+
+  TEST (gsl_cdf_fdist_Qinv, ( 1.0, 1.2, 1.3), 0.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 9.88939151413976144e-1, 1.2, 1.3), 0.001, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 9.56136324293168615e-1, 1.2, 1.3), 0.01, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 8.31757607287159265e-1, 1.2, 1.3), 0.1, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 6.85869954753804551e-1, 1.2, 1.3), 0.325, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 4.90369220925244747e-1, 1.2, 1.3), 1.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 4.16001359358446148e-1, 1.2, 1.3), 1.5, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 3.65266418648061213e-1, 1.2, 1.3), 2.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 1.51553762120799025e-1, 1.2, 1.3), 10.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 9.90122736631249612e-2, 1.2, 1.3), 20.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 3.55108729523115643e-2, 1.2, 1.3), 100.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 7.98794830588361109e-3, 1.2, 1.3), 1000.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 1.7891371911574145e-3, 1.2, 1.3), 10000.0, TEST_TOL6);
+
+
+  TEST (gsl_cdf_fdist_Pinv, ( 0.0, 500.0, 1.3), 0.0, 0.0);
+
+  TEST (gsl_cdf_fdist_Pinv, ( 9.83434460393304765e-141, 500.0, 1.3), 0.001, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 1.45915624888550014e-26, 500.0, 1.3), 0.01, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 5.89976509619688165e-4, 500.0, 1.3), 0.1, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 6.86110486051542533e-2, 500.0, 1.3), 0.325, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 3.38475053806404615e-1, 500.0, 1.3), 1.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 4.52016245247457422e-1, 500.0, 1.3), 1.5, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 5.27339068937388798e-1, 500.0, 1.3), 2.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 8.16839628578413905e-1, 500.0, 1.3), 10.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 8.81784623056911406e-1, 500.0, 1.3), 20.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 9.58045057204221295e-1, 500.0, 1.3), 100.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 9.90585749380655275e-1, 500.0, 1.3), 1000.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 9.97891924831461387e-1, 500.0, 1.3), 10000.0, TEST_TOL6);
+
+  TEST (gsl_cdf_fdist_Qinv, ( 1.0, 500.0, 1.3), 0.0, TEST_TOL6);
+
+  /*
+   * The algorithm currently implemented in gsl_cdf_fdist_Qinv and Pinv
+   * are not accurate for very large degrees of freedom, so the tests
+   * here are commented out. Another algorithm more suitable for
+   * these extreme values might pass these tests.
+   */
+#if 0
+  TEST (gsl_cdf_fdist_Qinv, ( 1.0, 500.0, 1.3), 0.001, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 9.99410023490380312e-1, 500.0, 1.3), 0.1, TEST_TOL6);
+#endif
+  TEST (gsl_cdf_fdist_Qinv, ( 9.31388951394845747e-1, 500.0, 1.3), 0.325, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 6.61524946193595385e-1, 500.0, 1.3), 1.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 5.47983754752542572e-1, 500.0, 1.3), 1.5, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 4.72660931062611202e-1, 500.0, 1.3), 2.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 1.83160371421586096e-1, 500.0, 1.3), 10.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 1.18215376943088595e-1, 500.0, 1.3), 20.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 4.19549427957787016e-2, 500.0, 1.3), 100.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 9.41425061934473424e-3, 500.0, 1.3), 1000.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 2.10807516853862603e-3, 500.0, 1.3), 10000.0, TEST_TOL6);
+
+  TEST (gsl_cdf_fdist_Pinv, ( 0.0, 1.2, 500.0), 0.0, 0.0);
+  TEST (gsl_cdf_fdist_Pinv, ( 8.23342055585482999e-61, 1.2, 500.0), 1e-100, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 1.30461496441289529e-2, 1.2, 500.0), 0.001, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 5.18324224608033294e-2, 1.2, 500.0), 0.01, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 2.02235101716076289e-1, 1.2, 500.0), 0.1, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 3.90502983219393749e-1, 1.2, 500.0), 0.325, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 6.67656191574653619e-1, 1.2, 500.0), 1.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 7.75539230271467054e-1, 1.2, 500.0), 1.5, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 8.45209114904613705e-1, 1.2, 500.0), 2.0, TEST_TOL6);
+#if 0
+  TEST (gsl_cdf_fdist_Pinv, ( 9.99168017659120988e-1, 1.2, 500.0), 10.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 9.99998005738371669e-1, 1.2, 500.0), 20.0, TEST_TOL6);
+#endif
+  TEST (gsl_cdf_fdist_Pinv, ( 1.0, 1.2, 500.0), GSL_POSINF, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 1.0, 1.2, 500.0), GSL_POSINF, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 1.0, 1.2, 500.0), GSL_POSINF, TEST_TOL6);
+
+  TEST (gsl_cdf_fdist_Qinv, ( 1.0, 1.2, 500.0), 0.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 9.86953850355871047e-1, 1.2, 500.0), 0.001, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 9.48167577539196671e-1, 1.2, 500.0), 0.01, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 7.97764898283923711e-1, 1.2, 500.0), 0.1, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 6.09497016780606251e-1, 1.2, 500.0), 0.325, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 3.32343808425346381e-1, 1.2, 500.0), 1.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 2.24460769728532946e-1, 1.2, 500.0), 1.5, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 1.54790885095386295e-1, 1.2, 500.0), 2.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 8.3198234087901168e-4, 1.2, 500.0), 10.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 1.99426162833131e-6, 1.2, 500.0), 20.0, TEST_TOL6);
+#if 0
+  TEST (gsl_cdf_fdist_Qinv, ( 6.23302662288217117e-25, 1.2, 500.0), 100.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 1.14328577259666930e-134, 1.2, 500.0), 1000.0, TEST_TOL6);
+#endif
+  TEST (gsl_cdf_fdist_Qinv, ( 0.0, 1.2, 500.0), GSL_POSINF, 0.0);
+
+  TEST (gsl_cdf_fdist_Pinv, ( 0.0, 200.0, 500.0), 0.0, 0.0);
+#if 0
+  TEST (gsl_cdf_fdist_Pinv, ( 4.09325080403669893e-251, 200.0, 500.0), 0.001, TEST_TOL6);
+#endif
+  TEST (gsl_cdf_fdist_Pinv, ( 1.17894325419628688e-151, 200.0, 500.0), 0.01, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 5.92430940796861258e-57, 200.0, 500.0), 0.1, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 3.18220452357263554e-18, 200.0, 500.0), 0.325, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 5.06746326121168266e-1, 200.0, 500.0), 1.0, TEST_TOL6);
+#if 0
+  TEST (gsl_cdf_fdist_Pinv, ( 9.99794175718712438e-1, 200.0, 500.0), 1.5, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Pinv, ( 9.99999999528236152e-1, 200.0, 500.0), 2.0, TEST_TOL6);
+#endif
+  TEST (gsl_cdf_fdist_Pinv, ( 1.0, 200.0, 500.0), GSL_POSINF, TEST_TOL6);
+
+  TEST (gsl_cdf_fdist_Qinv, ( 1.0, 200.0, 500.0), 0.0, TEST_TOL6);
+#if 0
+  TEST (gsl_cdf_fdist_Qinv, ( 9.99999999999999997e-1, 200.0, 500.0), 0.325, TEST_TOL6);
+#endif
+  TEST (gsl_cdf_fdist_Qinv, ( 4.93253673878831734e-1, 200.0, 500.0), 1.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 2.05824281287561795e-4, 200.0, 500.0), 1.5, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 4.71763848371410786e-10, 200.0, 500.0), 2.0, TEST_TOL6);
+#if 0
+  TEST (gsl_cdf_fdist_Qinv, ( 5.98048337181948436e-96, 200.0, 500.0), 10.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 2.92099265879979502e-155, 200.0, 500.0), 20.0, TEST_TOL6);
+  TEST (gsl_cdf_fdist_Qinv, ( 6.53118977244362760e-316, 200.0, 500.0), 100.0, 0.0);
+#endif
+  TEST (gsl_cdf_fdist_Qinv, ( 0.0, 200.0, 500.0), GSL_POSINF, 0.0);
+}
+
   /* Tests for gamma distribution */
 
   /* p(x, a, b) := gammaP(b, x / a) */
@@ -687,6 +821,32 @@
   TEST (gsl_cdf_beta_Q, (1.0, 1.2, 1.3), 0.0, TEST_TOL6);
 }
 
+void test_betainv (void) {
+  TEST (gsl_cdf_beta_Pinv, (0.0, 1.2, 1.3), 0.0, 0.0);
+  TEST (gsl_cdf_beta_Pinv, ( 1.34434944656489596e-120, 1.2, 1.3), 1e-100, TEST_TOL6);
+  TEST (gsl_cdf_beta_Pinv, ( 3.37630042504535813e-4, 1.2, 1.3), 0.001, TEST_TOL6);
+  TEST (gsl_cdf_beta_Pinv, ( 5.34317264038929473e-3, 1.2, 1.3), 0.01, TEST_TOL6);
+  TEST (gsl_cdf_beta_Pinv, ( 8.33997828306748346e-2, 1.2, 1.3), 0.1, TEST_TOL6);
+  TEST (gsl_cdf_beta_Pinv, ( 3.28698654180583916e-1, 1.2, 1.3), 0.325, TEST_TOL6);
+  TEST (gsl_cdf_beta_Pinv, ( 5.29781429451299081e-1, 1.2, 1.3), 0.5, TEST_TOL6);
+  TEST (gsl_cdf_beta_Pinv, ( 9.38529397224430659e-1, 1.2, 1.3), 0.9, TEST_TOL6);
+  TEST (gsl_cdf_beta_Pinv, ( 9.96886438341254380e-1, 1.2, 1.3), 0.99, TEST_TOL6);
+  TEST (gsl_cdf_beta_Pinv, ( 9.99843792833067634e-1, 1.2, 1.3), 0.999, TEST_TOL6);
+  TEST (gsl_cdf_beta_Pinv, ( 1.0, 1.2, 1.3), 1.0, TEST_TOL6);
+
+  TEST (gsl_cdf_beta_Qinv, ( 1.0, 1.2, 1.3), 0.0, 0.0);
+  TEST (gsl_cdf_beta_Qinv, ( 1e0, 1.2, 1.3), 0.0, TEST_TOL6);
+  TEST (gsl_cdf_beta_Qinv, ( 9.99662369957495464e-1, 1.2, 1.3), 0.001, TEST_TOL6);
+  TEST (gsl_cdf_beta_Qinv, ( 9.94656827359610705e-1, 1.2, 1.3), 0.01, TEST_TOL6);
+  TEST (gsl_cdf_beta_Qinv, ( 9.16600217169325165e-1, 1.2, 1.3), 0.1, TEST_TOL6);
+  TEST (gsl_cdf_beta_Qinv, ( 6.71301345819416084e-1, 1.2, 1.3), 0.325, TEST_TOL6);
+  TEST (gsl_cdf_beta_Qinv, ( 4.70218570548700919e-1, 1.2, 1.3), 0.5, TEST_TOL6);
+  TEST (gsl_cdf_beta_Qinv, ( 6.14706027755693408e-2, 1.2, 1.3), 0.9, TEST_TOL6);
+  TEST (gsl_cdf_beta_Qinv, ( 3.11356165874561958e-3, 1.2, 1.3), 0.99, TEST_TOL6);
+  TEST (gsl_cdf_beta_Qinv, ( 1.56207166932365759e-4, 1.2, 1.3), 0.999, TEST_TOL6);
+  TEST (gsl_cdf_beta_Qinv, ( 0.0, 1.2, 1.3), 1.0, TEST_TOL6);
+}
+
 void test_gammainv (void) {
   TEST (gsl_cdf_gamma_Pinv, (0.0, 1.0, 1.0), 0.0, 0.0);
   TEST (gsl_cdf_gamma_Pinv, (1e-100, 1.0, 1.0), 1e-100, TEST_TOL6);

[-- Attachment #2: betadistinv.c --]
[-- Type: text/plain, Size: 15181 bytes --]

/* cdf/betadistinv.c
 *
 * Copyright (C) 2004 Jason H. Stover.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307, USA.
 */

/*
 * Invert the Beta distribution. 
 * 
 * References:
 *
 * Roger W. Abernathy and Robert P. Smith. "Applying Series Expansion 
 * to the Inverse Beta Distribution to Find Percentiles of the F-Distribution,"
 * ACM Transactions on Mathematical Software, volume 19, number 4, December 1993,
 * pages 474-480.
 *
 * G.W. Hill and A.W. Davis. "Generalized asymptotic expansions of a 
 * Cornish-Fisher type," Annals of Mathematical Statistics, volume 39, number 8,
 * August 1968, pages 1264-1273.
 */
#include <config.h>
#include <math.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_sf_gamma.h>
#include <gsl/gsl_cdf.h>
#include <gsl/gsl_randist.h>

#define BETAINV_INIT_ERR .01
#define BETADISTINV_N_TERMS 3
#define BETADISTINV_MAXITER 20

static double 
s_bisect (double x, double y)
{
  double result = GSL_MIN(x,y) + fabs(x - y) / 2.0;
  return result;
}
static double
new_guess_P ( double old_guess, double x, double y, 
	      double prob, double a, double b)
{
  double result;
  double p_hat;
  double end_point;
  
  p_hat = gsl_cdf_beta_P(old_guess, a, b);
  if (p_hat < prob)
    {
      end_point = GSL_MAX(x,y);
    }
  else if ( p_hat > prob )
    {
      end_point = GSL_MIN(x,y);
    }
  else
    {
      end_point = old_guess;
    }
  result = s_bisect(old_guess, end_point);
  
  return result;
}

static double
new_guess_Q ( double old_guess, double x, double y, 
	      double prob, double a, double b)
{
  double result;
  double q_hat;
  double end_point;
  
  q_hat = gsl_cdf_beta_Q(old_guess, a, b);
  if (q_hat >= prob)
    {
      end_point = GSL_MAX(x,y);
    }
  else if ( q_hat < prob )
    {
      end_point = GSL_MIN(x,y);
    }
  else
    {
      end_point = old_guess;
    }
  result = s_bisect(old_guess, end_point);
  
  return result;
}

/*
 * The get_corn_fish_* functions below return the first
 * three terms of the Cornish-Fisher expansion
 * without recursion. The recursive functions
 * make the code more legible when higher order coefficients
 * are used, but terms beyond the cubic do not 
 * improve accuracy.
 */
  /*
   * Linear coefficient for the 
   * Cornish-Fisher expansion.
   */
static double 
get_corn_fish_lin (const double x, const double a, const double b)
{
  double result;
  
  result = gsl_ran_beta_pdf (x, a, b);
  if(result > 0)
    {
      result = 1.0 / result;
    }
  else
    {
      result = GSL_DBL_MAX;
    }

  return result;
}
  /*
   * Quadratic coefficient for the 
   * Cornish-Fisher expansion.
   */
static double
get_corn_fish_quad (const double x, const double a, const double b)
{
  double result;
  double gam_ab;
  double gam_a;
  double gam_b;
  double num;
  double den;
  
  gam_ab =  gsl_sf_lngamma(a + b);
  gam_a = gsl_sf_lngamma (a);
  gam_b = gsl_sf_lngamma (b);
  num = exp(2 * (gam_a + gam_b - gam_ab)) * (1 - a + x * (b + a - 2));
  den = 2.0 * pow ( x, 2*a - 1 ) * pow ( 1 - x, 2 * b - 1 );
  if ( fabs(den) > 0.0)
    {
      result = num / den;
    }
  else
    {
      result = 0.0;
    }

  return result;
}
/*
 * The cubic term for the Cornish-Fisher expansion.
 * Theoretically, use of this term should give a better approximation, 
 * but in practice inclusion of the cubic term worsens the 
 * iterative procedure in gsl_cdf_beta_Pinv and gsl_cdf_beta_Qinv
 * for extreme values of p, a or b.
 */				    
#if 0
static double 
get_corn_fish_cube (const double x, const double a, const double b)
{
  double result;
  double am1 = a - 1.0;
  double am2 = a - 2.0;
  double apbm2 = a+b-2.0;
  double apbm3 = a+b-3.0;
  double apbm4 = a+b-4.0;
  double ab1ab2 = am1 * am2;
  double tmp;
  double num;
  double den;

  num =  (am1 - x * apbm2) * (am1 - x * apbm2);
  tmp = ab1ab2 - x * (apbm2 * ( ab1ab2 * apbm4 + 1) + x * apbm2 * apbm3);
  num += tmp;
  tmp = gsl_ran_beta_pdf(x,a,b);
  den = 2.0 * x * x * (1 - x) * (1 - x) * pow(tmp,3.0);
  if ( fabs(den) > 0.0)
    {
      result = num / den;
    }
  else
    {
      result = 0.0;
    }

  return result;
}
#endif
/*
 * The Cornish-Fisher coefficients can be defined recursivley,
 * starting with the nth derivative of s_psi = -f'(x)/f(x),
 * where f is the beta density.
 *
 * The section below was commented out since 
 * the recursive generation of the coeficients did
 * not improve the accuracy of the directly coded 
 * the first three coefficients.
 */
#if 0
static double
s_d_psi (double x, double a, double b, int n)
{
  double result;
  double np1 = (double) n + 1;
  double asgn;
  double bsgn;
  double bm1 = b - 1.0;
  double am1 = a - 1.0;
  double mx = 1.0 - x;
  
  asgn = (n % 2) ? 1.0:-1.0;
  bsgn = (n % 2) ? -1.0:1.0;
  result = gsl_sf_gamma(np1) * ((bsgn * bm1 / (pow(mx, np1)))
				+ (asgn * am1 / (pow(x,np1))));
  return result;
}
/*
 * nth derivative of a coefficient with respect 
 * to x.
 */
static double 
get_d_coeff ( double x, double a, 
	      double b, double n, double k)
{
  double result;
  double d_psi;
  double k_fac;
  double i_fac;
  double kmi_fac;
  double i;
  
  if (n == 2)
    {
      result = s_d_psi(x, a, b, k);
    }
  else
    {
      result = 0.0;
      for (i = 0.0; i < (k+1); i++)
	{
	  k_fac = gsl_sf_lngamma(k+1.0);
	  i_fac = gsl_sf_lngamma(i+1.0);
	  kmi_fac = gsl_sf_lngamma(k-i+1.0);
	  
	  result += exp(k_fac - i_fac - kmi_fac)
	    * get_d_coeff( x, a, b, 2.0, i) 
	    * get_d_coeff( x, a, b, (n - 1.0), (k - i));
	}
      result += get_d_coeff ( x, a, b, (n-1.0), (k+1.0));
    }

  return result;
}
/*
 * Cornish-Fisher coefficient.
 */
static double
get_corn_fish (double c, double x, 
	       double a, double b, double n)
{
  double result;
  double dc;
  double c_prev;
  
  if(n == 1.0)
    {
      result = 1;
    }
  else if (n==2.0)
    {
      result = s_d_psi(x, a, b, 0);
    }
  else
    {
      dc = get_d_coeff(x, a, b, (n-1.0), 1.0);
      c_prev = get_corn_fish(c, x, a, b, (n-1.0));
      result = (n-1) * s_d_psi(x,a,b,0) * c_prev + dc;
    }
  return result;
}
#endif

double 
gsl_cdf_beta_Pinv ( const double p, const double a, const double b)
{
  double result;
  double state;
  double beta_result;
  double lower = 0.0;
  double upper = 1.0;
  double c1;
  double c2;
  double c3;
  double frac1;
  double frac2;
  double frac3;
  double frac4;
  double p0;
  double p1;
  double p2;
  double tmp;
  double err;
  double abserr;
  double relerr;
  double min_err;
  int n_iter = 0;

  if ( p < 0.0 )
    {
      GSL_CDF_ERROR("p < 0", GSL_EDOM);
    }
  if ( p > 1.0 )
    {
      GSL_CDF_ERROR("p > 1",GSL_EDOM);
    }
  if ( a < 0.0 )
    {
      GSL_CDF_ERROR ("a < 0", GSL_EDOM );
    }
  if ( b < 0.0 )
    {
      GSL_CDF_ERROR ( "b < 0", GSL_EDOM );
    }
  if ( p == 0.0 )
    {
      return 0.0;
    }
  if ( p == 1.0 )
    {
      return 1.0;
    }

  if (p > (1.0 - GSL_DBL_EPSILON))
    {
      /*
       * When p is close to 1.0, the bisection
       * works better with gsl_cdf_Q.
       */
      state = gsl_cdf_beta_Qinv ( p, a, b);
      result = 1.0 - state;
      return result;
    }
  if (p < GSL_DBL_EPSILON )
    {
      /*
       * Start at a small value and rise until
       * we are above the correct result. This 
       * avoids overflow. When p is very close to 
       * 0, an initial state value of a/(a+b) will
       * cause the interpolating polynomial
       * to overflow.
       */
      upper = GSL_DBL_MIN;
      beta_result = gsl_cdf_beta_P (upper, a, b);
      while (beta_result < p)
	{
	  lower = upper;
	  upper *= 4.0;
	  beta_result = gsl_cdf_beta_P (upper, a, b);
	}
      state = (lower + upper) / 2.0;
    }
  else
    {
      /*
       * First guess is the expected value.
       */
      lower = 0.0;
      upper = 1.0;
      state = a/(a+b);
      beta_result = gsl_cdf_beta_P (state, a, b);
    }
  err = beta_result - p;
  abserr = fabs(err);
  relerr = abserr / p;
  while ( relerr > BETAINV_INIT_ERR && n_iter < 100)
    {
      tmp = new_guess_P ( state, lower, upper, 
			  p, a, b);
      lower = ( tmp < state ) ? lower:state;
      upper = ( tmp < state ) ? state:upper;
      state = tmp;
      beta_result = gsl_cdf_beta_P (state, a, b);
      err = p - beta_result;
      abserr = fabs(err);
      relerr = abserr / p;
    }

  result = state;
  min_err = relerr;
  /*
   * Use a second order Lagrange interpolating
   * polynomial to get closer before switching to
   * the iterative method.
   */
  p0 = gsl_cdf_beta_P (lower, a, b);
  p1 = gsl_cdf_beta_P (state, a, b);
  p2 = gsl_cdf_beta_P (upper, a, b);
  if( p0 < p1 && p1 < p2)
    {
      frac1 = (p - p2) / (p0 - p1);
      frac2 = (p - p1) / (p0 - p2);
      frac3 = (p - p0) / (p1 - p2);
      frac4 = (p - p0) * (p - p1) / ((p2 - p0) * (p2 - p1));
      state = frac1 * (frac2 * lower - frac3 * state)
	+ frac4 * upper;

      beta_result = gsl_cdf_beta_P( state, a, b);
      err = p - beta_result;
      abserr = fabs(err);
      relerr = abserr / p;
      if (relerr < min_err)
	{
	  result = state;
	  min_err = relerr;
	}
    }

  n_iter = 0;

  /*
   * Newton-type iteration using the terms from the
   * Cornish-Fisher expansion. If only the first term
   * of the exapansion is used, this is Newton's method.
   */
  while ( relerr > GSL_DBL_EPSILON && n_iter < BETADISTINV_MAXITER)
    {
      n_iter++;
      c1 = get_corn_fish_lin (state, a, b);
      c2 = get_corn_fish_quad (state, a, b);
      /*
       * The cubic term does not help, and can can
       * harm the approximation for extreme values of
       * p, a, or b.       
       */      
#if 0
      c3 = get_corn_fish_cube (state, a, b);
      state += err * (c1 + (err / 2.0 ) * (c2 + c3 * err / 3.0));
#endif
      state += err * (c1 + (c2 * err / 2.0 ));
      /*
       * The section below which is commented out uses
       * a recursive function to get the coefficients. 
       * The recursion makes coding higher-order terms
       * easier, but did not improve the result beyond
       * the use of three terms. Since explicitly coding
       * those three terms in the get_corn_fish_* functions
       * was not difficult, the recursion was abandoned.
       */
#if 0 
      coeff = 1.0;
      for(i = 1.0; i < BETADISTINV_N_TERMS; i += 1.0)
	{
	  i_fac *= i;
	  coeff = get_corn_fish (coeff, prior_state, a, b, i);
	  state += coeff * pow(err, i) / 
	    (i_fac * pow (gsl_ran_beta_pdf(prior_state,a,b), i));
	}
#endif
      beta_result = gsl_cdf_beta_P ( state, a, b );
      err = p - beta_result;
      abserr = fabs(err);
      relerr = abserr / p;
      if (relerr < min_err)
	{
	  result = state;
	  min_err = relerr;
	}
    }

  return result;
}

double
gsl_cdf_beta_Qinv (double q, double a, double b)
{
  double result;
  double state;
  double beta_result;
  double lower = 0.0;
  double upper = 1.0;
  double c1;
  double c2;
  double c3;
  double p0;
  double p1;
  double p2;
  double frac1;
  double frac2;
  double frac3;
  double frac4;
  double tmp;
  double err;
  double abserr;
  double relerr;
  double min_err;
  int n_iter = 0;

  if ( q < 0.0 )
    {
      GSL_CDF_ERROR("q < 0", GSL_EDOM);
    }
  if ( q > 1.0 )
    {
      GSL_CDF_ERROR("q > 1",GSL_EDOM);
    }
  if ( a < 0.0 )
    {
      GSL_CDF_ERROR ("a < 0", GSL_EDOM );
    }
  if ( b < 0.0 )
    {
      GSL_CDF_ERROR ( "b < 0", GSL_EDOM );
    }
  if ( q == 0.0 )
    {
      return 1.0;
    }
  if ( q == 1.0 )
    {
      return 0.0;
    }

  if ( q < GSL_DBL_EPSILON )
    {
      /*
       * When q is close to 0, the bisection
       * and interpolation done in the rest of
       * this routine will not give the correct
       * value within double precision, so 
       * gsl_cdf_beta_Qinv is called instead.
       */
      state = gsl_cdf_beta_Pinv ( q, a, b);
      result = 1.0 - state;
      return result;
    }
  if ( q > 1.0 - GSL_DBL_EPSILON )
    {
      /*
       * Make the initial guess close to 0.0.
       */
      upper = GSL_DBL_MIN;
      beta_result = gsl_cdf_beta_Q ( upper, a, b);
      while (beta_result > q )
	{
	  lower = upper;
	  upper *= 4.0;
	  beta_result = gsl_cdf_beta_Q ( upper, a, b);
	}
      state = (upper + lower) / 2.0;
    }
  else
    {
      /* Bisection to get an initial approximation.
       * First guess is the expected value.
       */
      state = a/(a+b);
      lower = 0.0;
      upper = 1.0;
    }
  beta_result = gsl_cdf_beta_Q (state, a, b);
  err = beta_result - q;
  abserr = fabs(err);
  relerr = abserr / q;
  while ( relerr > BETAINV_INIT_ERR && n_iter < 100)
    {
      n_iter++;
      tmp = new_guess_Q ( state, lower, upper, 
			  q, a, b);
      lower = ( tmp < state ) ? lower:state;
      upper = ( tmp < state ) ? state:upper;
      state = tmp;
      beta_result = gsl_cdf_beta_Q (state, a, b);
      err = q - beta_result;
      abserr = fabs(err);
      relerr = abserr / q;
    }
  result = state;
  min_err = relerr;

  /*
   * Use a second order Lagrange interpolating
   * polynomial to get closer before switching to
   * the iterative method.
   */
  p0 = gsl_cdf_beta_Q (lower, a, b);
  p1 = gsl_cdf_beta_Q (state, a, b);
  p2 = gsl_cdf_beta_Q (upper, a, b);
  if(p0 > p1 && p1 > p2)
    {
      frac1 = (q - p2) / (p0 - p1);
      frac2 = (q - p1) / (p0 - p2);
      frac3 = (q - p0) / (p1 - p2);
      frac4 = (q - p0) * (q - p1) / ((p2 - p0) * (p2 - p1));
      state = frac1 * (frac2 * lower - frac3 * state)
	+ frac4 * upper;
      beta_result = gsl_cdf_beta_Q( state, a, b);
      err = beta_result - q;
      abserr = fabs(err);
      relerr = abserr / q;
      if (relerr < min_err)
	{
	  result = state;
	  min_err = relerr;
	}
    }

  /*
   * Iteration using the terms from the
   * Cornish-Fisher expansion. If only the first term
   * of the exapansion is used, this is Newton's method.
   */

  n_iter = 0;
  while ( relerr > GSL_DBL_EPSILON && n_iter < BETADISTINV_MAXITER)
    {
      n_iter++;
      c1 = get_corn_fish_lin (state, a, b);
      c2 = get_corn_fish_quad (state, a, b);
      /*
       * The cubic term does not help, and can harm
       * the approximation for extreme values of p, a and b.
       */
#if 0
      c3 = get_corn_fish_cube (state, a, b);
      state += err * (c1 + (err / 2.0 ) * (c2 + c3 * err / 3.0));
#endif
      state += err * (c1 + (c2 * err / 2.0 ));
      beta_result = gsl_cdf_beta_Q ( state, a, b );
      err = beta_result - q;
      abserr = fabs(err);
      relerr = abserr / q;
      if (relerr < min_err)
	{
	  result = state;
	  min_err = relerr;
	}
    }

  return result;
}

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: beta and f cdf inverses
  2004-10-17 16:06 beta and f cdf inverses Jason Stover
@ 2004-10-18 17:28 ` Brian Gough
  0 siblings, 0 replies; 2+ messages in thread
From: Brian Gough @ 2004-10-18 17:28 UTC (permalink / raw)
  To: Jason Stover; +Cc: gsl-discuss

Jason Stover writes:
 > Attached is betadistinv.c, which will invert
 > the beta cumulative distribution function. Also
 > attached is a patch for fdist.c, gsl_cdf.h, Makefile.am
 > and test.c. Inverting the F distribution is (usually)
 > done by inverting the beta and transforming, so 
 > writing a gsl_cdf_beta_Pinv() gave me a gsl_cdf_fdist_Pinv() 
 > without much extra effort.

Thanks Jason. That looks good.

-- 
Brian Gough

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2004-10-18 17:28 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-10-17 16:06 beta and f cdf inverses Jason Stover
2004-10-18 17:28 ` Brian Gough

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).