* Functionality to fit data to statistical distributions
@ 2003-12-22 15:30 Joakim Hove
2003-12-23 10:31 ` Brian Gough
0 siblings, 1 reply; 2+ messages in thread
From: Joakim Hove @ 2003-12-22 15:30 UTC (permalink / raw)
To: gsl-discuss
[-- Attachment #1: Type: text/plain, Size: 852 bytes --]
Hello,
I have written some functionality to set up the
'gsl_multifit_fdfsolver' to determine parameters for various
statistical distributions.
Up til now I have only implemented the distributions I have used
needed myself, but if it is of interest to include something like this
in gsl I would be more than happy to complete all the (continuous)
distributions, and improve (i.e. write) the documentation.
The included files are:
gsl_distfit_core: This sets up some datastructures, and actually
calls the gsl solver.
gsl_distfit_eval: This is a long list of small functions which
evaluate the various distribution functions, and
the derivatives needed for least squares fdf
fitting.
In addition there is a Makefile and a small test application:
dist-test.c
Joakim
[-- Attachment #2: gsl_distfit_core.c --]
[-- Type: application/octet-stream, Size: 7612 bytes --]
#include <string.h>
#include <math.h>
#include <gsl/gsl_histogram.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_multifit_nlin.h>
#include <gsl/gsl_math.h>
#include "gsl_distfit_core.h"
#include "gsl_distfit_eval.h"
int generic_f(const gsl_vector * fit_state, void *params, gsl_vector *f) {
size_t n = ((gsl_distfit_ptype *) params)->n;
double *y = ((gsl_distfit_ptype *) params)->y;
double *x = ((gsl_distfit_ptype *) params)->x;
double *sigma = ((gsl_distfit_ptype *) params)->sigma;
gsl_distfit_ftype *F = ((gsl_distfit_ptype *) params)->F;
double Yi;
size_t i;
for (i=0; i<n; i++) {
Yi = F->F(x[i],fit_state);
gsl_vector_set(f,i,(Yi - y[i])/sigma[i]);
}
return GSL_SUCCESS;
}
int generic_df(const gsl_vector * fit_state, void *params, gsl_matrix *J) {
size_t n = ((gsl_distfit_ptype *) params)->n;
double *x = ((gsl_distfit_ptype *) params)->x;
double *sigma = ((gsl_distfit_ptype *) params)->sigma;
gsl_distfit_ftype *F = ((gsl_distfit_ptype *) params)->F;
double dYi;
size_t i1,i2;
for (i1=0; i1<n; i1++) {
for (i2=0; i2<F->Nparams; i2++) {
dYi = F->dF[i2](x[i1],fit_state) / sigma[i1];
gsl_matrix_set(J,i1,i2,dYi);
}
}
return GSL_SUCCESS;
}
int generic_fdf(const gsl_vector *fit_state, void *params, gsl_vector *f, gsl_matrix *J) {
generic_f(fit_state,params,f);
generic_df(fit_state,params,J);
return GSL_SUCCESS;
}
void gsl_distfit_init(size_t n, double *x, double *px, gsl_distfit_ftype *F , gsl_vector *p) {
F->init(n,x,px,p);
}
double gsl_distfit_Pinv(gsl_distfit_ftype *F, double x , gsl_vector *fit_res) {
return F->Pinv(x,fit_res);
}
void gsl_distfit_ftype_free(gsl_distfit_ftype *F) {
free(F->dF);
free(F);
}
gsl_distfit_ftype *gsl_distfit_ftype_alloc(size_t params) {
gsl_distfit_ftype *F;
F = (gsl_distfit_ftype *) (malloc(sizeof(gsl_distfit_ftype)));
F->Nparams = params;
F->dF = ( gsl_distfit_eval_type * ) (malloc(F->Nparams * sizeof( gsl_distfit_eval_type )));
return F;
}
#define NAME(x) !strcmp(dist_name,(x))
gsl_distfit_ftype *gsl_distfit_func(char *dist_name) {
gsl_distfit_ftype *F;
if (NAME("weibull")) {
F = gsl_distfit_ftype_alloc(2);
F->F = eval_weibull;
F->Pinv = eval_weibull_Pinv;
F->dF[0] = eval_weibull_da;
F->dF[1] = eval_weibull_db;
F->init = init_weibull;
} else if (NAME("lognormal")) {
F = gsl_distfit_ftype_alloc(2);
F->F = eval_lognormal;
F->Pinv = eval_lognormal_Pinv;
F->dF[0] = eval_lognormal_dzeta;
F->dF[1] = eval_lognormal_dsigma;
F->init = init_lognormal;
} else if (NAME("rayleigh")) {
F = gsl_distfit_ftype_alloc(1);
F->F = eval_rayleigh;
F->Pinv = eval_rayleigh_Pinv;
F->dF[0] = eval_rayleigh_dsigma;
F->init = init_rayleigh;
} else if (NAME("gaussian")) {
F = gsl_distfit_ftype_alloc(2);
F->F = eval_gaussian;
F->Pinv = eval_gaussian_Pinv;
F->dF[0] = eval_gaussian_dmu;
F->dF[1] = eval_gaussian_dsigma;
F->init = init_gaussian;
} else if (NAME("exponential")) {
F = gsl_distfit_ftype_alloc(1);
F->F = eval_exponential;
F->Pinv = eval_exponential_Pinv;
F->init = init_exponential;
F->dF[0] = eval_exponential_dmu;
} else if (NAME("chisq")) {
F = gsl_distfit_ftype_alloc(1);
F->F = eval_chisq;
F->Pinv = eval_chisq_Pinv;
F->init = init_chisq;
F->dF[0] = eval_chisq_dnu;
} else if (NAME("tdist")) {
F = gsl_distfit_ftype_alloc(1);
F->F = eval_tdist;
F->Pinv = eval_tdist_Pinv;
F->init = init_tdist;
F->dF[0] = eval_tdist_dnu;
} else if (NAME("beta")) {
F = gsl_distfit_ftype_alloc(2);
F->F = eval_beta;
F->Pinv = eval_beta_Pinv;
F->init = init_beta;
F->dF[0] = eval_beta_da;
F->dF[1] = eval_beta_db;
} else {
fprintf(stderr," Unknown distribution: %s - aborting\n",dist_name);
exit(1);
}
return F;
}
void gsl_distfit_solver_set(gsl_multifit_fdfsolver *solver, size_t n, double *x, double *y, double *sigma, gsl_distfit_ftype *F, gsl_vector *fit_state) {
gsl_distfit_ptype *params;
gsl_multifit_function_fdf *f;
params = (gsl_distfit_ptype *) (malloc(sizeof(gsl_distfit_ptype)));
params->n = n;
params->x = x;
params->y = y;
params->sigma = sigma;
params->F = F;
f = (gsl_multifit_function_fdf *) (malloc(sizeof(gsl_multifit_function_fdf)));
f->n = n;
f->p = gsl_distfit_psize(F);
f->params = params;
f->f = &generic_f;
f->df = &generic_df;
f->fdf = &generic_fdf;
gsl_multifit_fdfsolver_set (solver , f , fit_state);
}
int gsl_distfit(gsl_multifit_fdfsolver *solver, size_t n, double *x, double *y, double *sigma, gsl_distfit_ftype *F, gsl_vector *fit_state) {
gsl_matrix *covar;
size_t p,iter,status;
p = gsl_distfit_psize(F);
covar = gsl_matrix_alloc (p, p);
gsl_distfit_solver_set(solver,n,x,y,sigma,F,fit_state);
iter = 0;
do {
iter++;
status = gsl_multifit_fdfsolver_iterate (solver);
/*if (status)
break;*/
status = gsl_multifit_test_delta (solver->dx, solver->x, 1e-4, 1e-4);
/*printf("Status: %d error: %g %g \n",status,gsl_vector_get(solver->dx,0) , gsl_vector_get(solver->x,0));*/
} while (status == GSL_CONTINUE && iter < 500);
gsl_vector_memcpy(fit_state,solver->x);
free(solver->fdf->params);
free(solver->fdf);
return GSL_SUCCESS;
}
gsl_multifit_fdfsolver *gsl_distfit_solver_alloc(size_t n, gsl_distfit_ftype *F) {
const gsl_multifit_fdfsolver_type *T;
gsl_multifit_fdfsolver *solver;
T = gsl_multifit_fdfsolver_lmsder;
solver = gsl_multifit_fdfsolver_alloc (T, n , gsl_distfit_psize(F));
return solver;
}
size_t gsl_distfit_psize(gsl_distfit_ftype *F) {
return F->Nparams;
}
double gsl_distfit_chi_square(size_t n, double *x, double *y, double *sigma, gsl_distfit_ftype *F , gsl_vector *fit_state) {
double chi2;
size_t i;
chi2 = 0;
for (i=0; i < n; i++)
chi2 += gsl_pow_2(y[i] - F->F(x[i],fit_state));
return chi2;
}
void gsl_distfit_set_histogram_midpoints(const gsl_histogram *hist, double *x) {
size_t i;
double lower,upper;
lower = hist->range[0];
for (i=0; i < gsl_histogram_bins(hist); i++) {
upper = hist->range[i+1];
x[i] = 0.5*(upper + lower);
lower = upper;
}
}
double *gsl_distfit_xdata_alloc(const gsl_histogram *hist) {
double *x;
x = (double *) (calloc(gsl_histogram_bins(hist),sizeof(double)));
gsl_distfit_set_histogram_midpoints(hist,x);
return x;
}
double gsl_histogram_normalize(gsl_histogram *hist) {
double area,norm,lower,upper;
size_t i;
area = 0;
lower = hist->range[0];
for (i=0; i < gsl_histogram_bins(hist); i++) {
upper = hist->range[i+1];
area += gsl_histogram_get(hist,i) * (upper - lower);
lower = upper;
}
norm = 1.0/area;
gsl_histogram_scale(hist,norm);
return norm;
}
void gsl_distfit_fprintf(size_t n, double *x, double *y, double *sigma , gsl_distfit_ftype *F, gsl_vector *fit_state, FILE *stream) {
size_t i;
if (sigma == NULL) {
for (i=0; i < n; i++)
fprintf(stream,"%12.6f %9.6f 0.00 %12.6f \n",x[i],y[i],F->F(x[i],fit_state));
} else {
for (i=0; i < n; i++)
fprintf(stream,"%12.6f %9.6f %9.6f %12.6f \n",x[i],y[i],sigma[i],F->F(x[i],fit_state));
}
}
[-- Attachment #3: gsl_distfit_core.h --]
[-- Type: application/octet-stream, Size: 1953 bytes --]
#ifndef __GSL_DISTFIT_CORE_H__
#define __GSL_DISTFIT_CORE_H__
#include <gsl/gsl_histogram.h>
#include <gsl/gsl_multifit_nlin.h>
#include <gsl/gsl_vector.h>
typedef double (* gsl_distfit_eval_type) (double , const gsl_vector *);
typedef struct {
size_t Nparams;
gsl_distfit_eval_type F; /* No pointer '*' because the typedef already defines a pointer! */
gsl_distfit_eval_type Pinv;
gsl_distfit_eval_type *dF;
void (* init) (size_t, double * , double *, gsl_vector *);
} gsl_distfit_ftype;
typedef struct {
size_t n;
double * x;
double * y;
double * sigma;
gsl_distfit_ftype *F;
} gsl_distfit_ptype;
gsl_distfit_ftype *gsl_distfit_ftype_alloc(size_t params);
void gsl_distfit_ftype_free(gsl_distfit_ftype *F);
void gsl_distfit_init(size_t n, double *x, double *px, gsl_distfit_ftype *F , gsl_vector *p);
int gsl_distfit(gsl_multifit_fdfsolver *solver, size_t n, double *x, double *y, double *sigma, gsl_distfit_ftype *F, gsl_vector *fit_state);
size_t gsl_distfit_psize(gsl_distfit_ftype *F);
double gsl_distfit_chi_square(size_t n, double *x, double *y, double *sigma, gsl_distfit_ftype *F , gsl_vector *fit_state);
double gsl_distfit_Pinv(gsl_distfit_ftype *F, double x , gsl_vector *fit_res);
gsl_distfit_ftype *gsl_distfit_func(char *dist_name);
void gsl_distfit_solver_set(gsl_multifit_fdfsolver *solver, size_t n, double *x, double *y, double *sigma, gsl_distfit_ftype *F, gsl_vector *fit_state);
gsl_multifit_fdfsolver *gsl_distfit_solver_alloc(size_t n, gsl_distfit_ftype *F);
void gsl_distfit_fprintf(size_t n, double *x, double *y, double *sigma , gsl_distfit_ftype *F, gsl_vector *fit_state, FILE *stream);
void gsl_histogram_set_midpoints(const gsl_histogram *hist, double *x);
double *gsl_distfit_xdata_alloc(const gsl_histogram *hist);
double gsl_histogram_normalize(gsl_histogram *hist);
#endif
[-- Attachment #4: gsl_distfit_eval.c --]
[-- Type: application/octet-stream, Size: 11243 bytes --]
#include <math.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_cdf.h>
#include <gsl/gsl_sf_gamma.h>
#include "gsl_distfit_eval.h"
/* Should maybe base everything more haevily on histograms?? */
/* W is akward */
/* Only approximate - center values approximated by left edge values. */
void gsl_distfit_init_moments(size_t n, double *x, double *px, double *_m1, double *_m2) {
size_t i;
double m1 = 0;
double m2 = 0;
double lower,upper,w;
w = 0;
lower = x[0];
for (i=0; i < (n-1); i++) {
upper = x[i+1];
w = upper - lower;
m1 += px[i]*x[i]*w;
m2 += gsl_pow_2(x[i])*px[i]*w;
lower = upper;
}
*_m1 = m1 + px[n-1]*x[n-1]*w;
*_m2 = m2 + px[n-1]*gsl_pow_2(x[n-1])*w;
}
/*p(x) dx = {1 \over \sqrt{2 \pi \sigma^2}} \exp (-(x - mu)^2 / 2\sigma^2) dx*/
double eval_gaussian(double x, const gsl_vector *p) {
double mu = gsl_vector_get(p,0);
double sigma2 = gsl_pow_2(gsl_vector_get(p,1));
return 1.0 / (sqrt(2*M_PI*sigma2)) * exp( - gsl_pow_2(x - mu)*0.5/sigma2);
}
double eval_gaussian_dsigma(double x, const gsl_vector *p) {
double mu = gsl_vector_get(p,0);
double sigma2 = gsl_pow_2(gsl_vector_get(p,1));
return exp(-gsl_pow_2(x - mu)*0.5/sigma2)/(sigma2*sqrt(2*M_PI))*(gsl_pow_2(x-mu)/sigma2 - 1);
}
double eval_gaussian_dmu(double x, const gsl_vector *p) {
double mu = gsl_vector_get(p,0);
double sigma2 = gsl_pow_2(gsl_vector_get(p,1));
return exp(-gsl_pow_2(x - mu)*0.5/sigma2)*sqrt(2)*(x - mu)/(sigma2*sqrt(sigma2*M_PI));
}
double eval_gaussian_Pinv(double x, const gsl_vector *p) {
double mu = gsl_vector_get(p,0);
double sigma = gsl_vector_get(p,1);
return gsl_cdf_gaussian_Pinv(x-mu,sigma);
}
void init_gaussian(size_t n, double *x, double *px, gsl_vector *p) {
double m1, m2;
gsl_distfit_init_moments(n,x,px,&m1,&m2);
gsl_vector_set(p,0,m1);
gsl_vector_set(p,1,sqrt(m2 - m1*m1));
}
/* <Plain functions> */
/*p(x) dx = (x / sigma^2) exp(-x^2/(2 sigma^2)) dx*/
double eval_rayleigh(double x, const gsl_vector *p) {
double inv_sigma_sqr = 1 / gsl_pow_2(gsl_vector_get(p,0));
return x * inv_sigma_sqr * exp(-0.5*x*x*inv_sigma_sqr);
}
double eval_rayleigh_dsigma(double x, const gsl_vector *p) {
double sigma = gsl_vector_get(p,0);
return ((gsl_pow_2(x / sigma) - 2) * x / gsl_pow_3(sigma) * exp(-0.5*gsl_pow_2(x/sigma)));
}
double eval_rayleigh_Pinv(double x, const gsl_vector *p) {
double sigma = gsl_vector_get(p,0);
return gsl_cdf_rayleigh_Pinv(x,sigma);
}
/*
m1 = sigma * sqrt(pi/2);
m2 = 2*sigma^2;
*/
void init_rayleigh(size_t n, double *x, double *px, gsl_vector *p) {
double m1, m2;
gsl_distfit_init_moments(n,x,px,&m1,&m2);
gsl_vector_set(p,0,0.5*(m1 * sqrt(2 / M_PI) + sqrt(m2 / 2)));
}
/* The lognormal distribution has the form
p(x) dx = 1/(x * sqrt(2 pi sigma^2)) exp(-(ln(x) - zeta)^2/2 sigma^2) dx
for x > 0. Lognormal random numbers are the exponentials of
gaussian random numbers */
/*
m1 = exp(zeta + sigma*sigma/2);
m2 = exp(2*(zeta + sigma*sigma));
*/
void init_lognormal(size_t n, double *x, double *px, gsl_vector *p) {
double m1,m2;
double zeta,sigma;
gsl_distfit_init_moments(n,x,px,&m1,&m2);
zeta = 2 * log(m1) - 0.5*log(m2);
sigma = sqrt(log(m2) - 2*log(m1));
gsl_vector_set(p,0,zeta);
gsl_vector_set(p,1,sigma);
}
double eval_lognormal(double x, const gsl_vector *p) {
double zeta = gsl_vector_get(p,0);
double sigma = gsl_vector_get(p,1);
return sqrt(2/(M_PI*sigma*sigma))*exp(-(pow(log(x) - zeta,2)/(2*pow(sigma,2))))/(2 * x);
}
double eval_lognormal_dsigma(double x, const gsl_vector *p) {
double zeta = gsl_vector_get(p,0);
double sigma = gsl_vector_get(p,1);
double e = exp(-(pow(log(x) - zeta,2)/(2*pow(sigma,2)))) * sqrt(2 / (M_PI * pow(sigma,2)));
return e*(pow(log(x) - zeta,2) / (2*x*pow(sigma,3)) - 1/(2*x*sigma));
}
double eval_lognormal_dzeta(double x, const gsl_vector *p) {
double zeta = gsl_vector_get(p,0);
double sigma = gsl_vector_get(p,1);
return eval_lognormal(x,p)*(log(x) - zeta) / pow(sigma,2);
}
double eval_lognormal_Pinv(double x, const gsl_vector *p) {
double zeta = gsl_vector_get(p,0);
double sigma = gsl_vector_get(p,1);
return gsl_cdf_lognormal_Pinv(x,zeta,sigma);
}
/* The Weibull distribution has the form,
p(x) dx = (b/a) (x/a)^(b-1) exp(-(x/a)^b) dx
*/
double eval_weibull(double x, const gsl_vector *p) {
double a = gsl_vector_get(p,0);
double b = gsl_vector_get(p,1);
return (b / a) * pow(x / a,b-1) * exp(-pow(x/a,b));
}
double eval_weibull_da(double x, const gsl_vector *p) {
double a = gsl_vector_get(p,0);
double b = gsl_vector_get(p,1);
double exp_x_a_b = exp(-pow(x/a,b));
double x_a_b_1 = pow(x/a,b-1);
double inv_a2 = 1/pow(a,2);
return exp_x_a_b * inv_a2 * x_a_b_1 * b*b*(pow(x/a,b) - 1);
}
double eval_weibull_db(double x, const gsl_vector *p) {
double a = gsl_vector_get(p,0);
double b = gsl_vector_get(p,1);
double exp_x_a_b = exp(-pow(x/a,b));
double x_a_b_1 = pow(x/a,b-1);
double inv_a = 1/a;
return exp_x_a_b * x_a_b_1 * inv_a*(1 + b*log(x/a)*(1 - pow(x/a,b)));
}
double eval_weibull_Pinv(double x, const gsl_vector *p) {
double a = gsl_vector_get(p,0);
double b = gsl_vector_get(p,1);
return gsl_cdf_weibull_Pinv(x,a,b);
}
/*
m1 = beta * sf_gamma(1 + 1/alpha);
m2 = beta^2 * sf_gamma(1 + 2/alpha);
*/
void init_weibull(size_t n, double *x, double *px, gsl_vector *p) {
double m1,m2;
double alpha,beta;
gsl_distfit_init_moments(n,x,px,&m1,&m2);
/*
For no apparent reason - I did not want to set up a nonlinear solver with Gamma functions.
*/
alpha = sqrt(m2 - m1*m1);
beta = 2;
gsl_vector_set(p,0,alpha);
gsl_vector_set(p,1,beta);
fprintf(stderr,"Warning: initialization of the weibull fit is completely broken\n");
}
double eval_exponential(double x, const gsl_vector *p) {
double mu = gsl_vector_get(p,0);
return 1.0 / mu * exp(-x/mu);
}
double eval_exponential_dmu(double x, const gsl_vector *p) {
double mu = gsl_vector_get(p,0);
return exp(-x/mu)*(x - 1)/gsl_pow_2(mu) ;
}
double eval_exponential_Pinv(double x, const gsl_vector *p) {
double mu = gsl_vector_get(p,0);
return gsl_cdf_exponential_Pinv(x,mu);
}
void init_exponential(size_t n, double *x, double *px, gsl_vector *p) {
double m1, m2;
gsl_distfit_init_moments(n,x,px,&m1,&m2);
gsl_vector_set(p,0,m1);
}
/*
p(x) dx = {1 \over 2 \Gamma(\nu/2) } (x/2)^{\nu/2 - 1} \exp(-x/2) dx
*/
double eval_dgamma(double x) {
const double h0 = 0.05;
double h;
if (x > h0)
h = h0;
else
h = 0.5*x;
return (gsl_sf_gamma(x + h) - gsl_sf_gamma(x - h))*0.5/h;
}
double eval_chisq(double x, const gsl_vector *p) {
double nu = gsl_vector_get(p,0);
return 0.5 / gsl_sf_gamma(nu/2) * pow(x/2 , nu/2 - 1)*exp(-x/2);
}
double eval_chisq_dnu(double x, const gsl_vector *p) {
double nu = gsl_vector_get(p,0);
return exp(-x/2)*pow(2,-nu/2)*pow(x,nu/2 - 1)/gsl_pow_2(gsl_sf_gamma(nu/2))*(log(x)/2*gsl_sf_gamma(nu/2) - 0.5*eval_dgamma(nu/2));
}
void init_chisq(size_t n, double *x, double *px, gsl_vector *p) {
double m1, m2;
gsl_distfit_init_moments(n,x,px,&m1,&m2);
printf("Kjorer init_chisq m1 = %g \n",m1);
gsl_vector_set(p,0,m1);
}
double eval_chisq_Pinv(double x, const gsl_vector *p) {
double nu = gsl_vector_get(p,0);
return gsl_cdf_chisq_Pinv(x,nu);
}
/*
p(x) dx = {\Gamma((\nu + 1)/2) \over \sqrt{\pi \nu} \Gamma(\nu/2)}
(1 + x^2/\nu)^{-(\nu + 1)/2} dx*/
double eval_tdist(double x, const gsl_vector *p) {
double nu = gsl_vector_get(p,0);
return gsl_sf_gamma(0.5*(nu + 1))/(sqrt(M_PI * nu) * gsl_sf_gamma(nu*0.5))*pow(1 + gsl_pow_2(x)/nu,-0.5*(nu + 1));
}
double eval_tdist_dnu(double x, const gsl_vector *p) {
double nu = gsl_vector_get(p,0);
double gamma_nu_half = gsl_sf_gamma(nu/2);
double gamma_nup1_half = gsl_sf_gamma((nu+1)/2);
double sqrt_nu_pi = sqrt(nu*M_PI);
double power_arg = 1 + gsl_pow_2(x)/nu;
double gamma_factor = gamma_nup1_half/(sqrt_nu_pi * gamma_nu_half);
double power_factor = pow(power_arg , -0.5*(1 + nu));
double dgamma_factor = (eval_dgamma(0.5*(nu + 1))*sqrt_nu_pi*gamma_nu_half - gamma_nup1_half*(eval_dgamma(nu*0.5)*sqrt_nu_pi + 0.5*sqrt(M_PI/nu)*gamma_nu_half)) /
(nu*M_PI*gsl_pow_2(gamma_nu_half));
double dpower_factor = power_factor*((nu + 1)*0.5*gsl_pow_2(x/nu)/power_arg - 0.5*log(power_arg));
return gamma_factor*dpower_factor + dgamma_factor*power_factor;
}
void init_tdist(size_t n, double *x, double *px, gsl_vector *p) {
const double nu_max = 100;
double m1, m2;
double nu;
gsl_distfit_init_moments(n,x,px,&m1,&m2);
nu = 2*m2/(m2 - 1);
if (nu > nu_max)
nu = nu_max;
gsl_vector_set(p, 0 , nu);
}
double eval_tdist_Pinv(double x, const gsl_vector *p) {
double nu = gsl_vector_get(p,0);
return gsl_cdf_tdist_Pinv(x,nu);
}
/*
p(x) dx = {\Gamma(a+b) \over \Gamma(a) \Gamma(b)} x^{a-1} (1-x)^{b-1} dx
*/
double eval_beta(double x, const gsl_vector *p) {
double a = gsl_vector_get(p,0);
double b = gsl_vector_get(p,1);
return gsl_sf_gamma(a+b) / (gsl_sf_gamma(a) * gsl_sf_gamma(b)) * pow(x,a-1)* pow(1-x,b-1);
}
double eval_beta_da(double x, const gsl_vector *p) {
double a = gsl_vector_get(p,0);
double b = gsl_vector_get(p,1);
double gamma_a = gsl_sf_gamma(a);
double gamma_b = gsl_sf_gamma(b);
double gamma_ab = gsl_sf_gamma(a+b);
double power_xa_1 = pow(x,a-1);
double power_1_xb_1 = pow(1-x,b-1);
double power_factor = power_xa_1 * power_1_xb_1;
double gamma_factor = gamma_ab / (gamma_a * gamma_b);
return (eval_dgamma(a+b)*(gamma_a * gamma_b) - eval_dgamma(a) * gamma_b * gamma_ab)/(gsl_pow_2(gamma_a * gamma_b)) * power_factor +
gamma_factor * (power_factor * log(x));
}
double eval_beta_db(double x, const gsl_vector *p) {
double a = gsl_vector_get(p,0);
double b = gsl_vector_get(p,1);
double gamma_a = gsl_sf_gamma(a);
double gamma_b = gsl_sf_gamma(b);
double gamma_ab = gsl_sf_gamma(a+b);
double power_xa_1 = pow(x,a-1);
double power_1_xb_1 = pow(1-x,b-1);
double power_factor = power_xa_1 * power_1_xb_1;
double gamma_factor = gamma_ab / (gamma_a * gamma_b);
return (eval_dgamma(a+b)*(gamma_a * gamma_b) - eval_dgamma(b) * gamma_a * gamma_ab)/(gsl_pow_2(gamma_a * gamma_b)) * power_factor +
gamma_factor * (power_factor * log(1-x));
}
void init_beta(size_t n, double *x, double *px, gsl_vector *p) {
double m1, m2;
double a,b;
gsl_distfit_init_moments(n,x,px,&m1,&m2);
a = 1;
b = 1;
gsl_vector_set(p,0,a);
gsl_vector_set(p,1,b);
fprintf(stderr," Warning: init of beta distribution is *not* implemented - using dummy values a = b = 1 \n");
/* Solve these equations:
m1 = a / (a + b);
m2 = 1 / (a+b)^2 * (a*b/(a+b+1) + a*a)
*/
}
double eval_beta_Pinv(double x, const gsl_vector *p) {
/*double a = gsl_vector_get(p,0);
double b = gsl_vector_get(p,1);*/
fprintf(stderr," Sorry Pinv is not implemented for the beta distribution - aborting\n");
exit(1);
return 0;
/*return gsl_cdf_beta_Pinv(x,a,b);*/
}
[-- Attachment #5: gsl_distfit_eval.h --]
[-- Type: application/octet-stream, Size: 2327 bytes --]
#ifndef __GSL_DISTFIT_EVAL_H__
#define __GSL_DISTFIT_EVAL_H__
#include <gsl/gsl_vector.h>
void gsl_distfit_init_moments(size_t n, double *x, double *px, double *_m1, double *_m2);
double eval_gaussian(double x, const gsl_vector *p);
double eval_gaussian_dsigma(double x, const gsl_vector *p);
double eval_gaussian_dmu(double x, const gsl_vector *p);
double eval_gaussian_Pinv(double x, const gsl_vector *p);
void init_gaussian(size_t n, double *x, double *px, gsl_vector *p);
double eval_rayleigh(double x, const gsl_vector *p);
double eval_rayleigh_dsigma(double x, const gsl_vector *p);
double eval_rayleigh_Pinv(double x, const gsl_vector *p);
void init_rayleigh(size_t n, double *x, double *px, gsl_vector *p);
void init_lognormal(size_t n, double *x, double *px, gsl_vector *p);
double eval_lognormal(double x, const gsl_vector *p);
double eval_lognormal_dsigma(double x, const gsl_vector *p);
double eval_lognormal_dzeta(double x, const gsl_vector *p);
double eval_lognormal_Pinv(double x, const gsl_vector *p);
double eval_weibull(double x, const gsl_vector *p);
double eval_weibull_da(double x, const gsl_vector *p);
double eval_weibull_db(double x, const gsl_vector *p);
double eval_weibull_Pinv(double x, const gsl_vector *p);
void init_weibull(size_t n, double *x, double *px, gsl_vector *p);
double eval_exponential(double x, const gsl_vector *p);
double eval_exponential_dmu(double x, const gsl_vector *p);
double eval_exponential_Pinv(double x, const gsl_vector *p);
void init_exponential(size_t n, double *x, double *px, gsl_vector *p);
double eval_chisq(double x, const gsl_vector *p);
double eval_chisq_Pinv(double x, const gsl_vector *p);
void init_chisq(size_t n, double *x, double *px, gsl_vector *p);
double eval_chisq_dnu (double x, const gsl_vector *p);
double eval_tdist(double x, const gsl_vector *p);
double eval_tdist_Pinv(double x, const gsl_vector *p);
void init_tdist(size_t n, double *x, double *px, gsl_vector *p);
double eval_tdist_dnu(double x, const gsl_vector *p);
double eval_beta(double x, const gsl_vector *p);
double eval_beta_Pinv(double x, const gsl_vector *p);
void init_beta(size_t n, double *x, double *px, gsl_vector *p);
double eval_beta_da (double x, const gsl_vector *p);
double eval_beta_db (double x, const gsl_vector *p);
#endif
[-- Attachment #6: Makefile --]
[-- Type: application/octet-stream, Size: 686 bytes --]
CC = gcc
LD = gcc
CFLAGS = -O4 -ansi -pedantic -Wall
GSL_HOME = /Home/skjellgran/hove/gsl/gsl-1.4
LIB_PATH = -L$(GSL_HOME)/lib
INCLUDE_PATH = -I$(GSL_HOME)/include
LIBS = -lgsl -lgslcblas -lm
OBJECTS = gsl_distfit_core.o gsl_distfit_eval.o dist-test.o
EXE = dist-test
$(EXE): $(OBJECTS)
$(LD) $(LDFLAGS) $(OBJECTS) $(LIB_PATH) $(LIBS) -o $(EXE)
%.o : %.c
$(CC) -c $(CFLAGS) $(INCLUDE_PATH) $(CPPFLAGS) $< -o $@
gsl_distfit_eval.o : gsl_distfit_eval.h gsl_distfit_eval.c
gsl_distfit_core.o : gsl_distfit_core.h gsl_distfit_core.c gsl_distfit_eval.o
dist-test.o : dist-test.c gsl_distfit_core.o
clean:
rm -f *.o
[-- Attachment #7: dist-test.c --]
[-- Type: application/octet-stream, Size: 1899 bytes --]
#include <gsl/gsl_histogram.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_randist.h>
#include "gsl_distfit_core.h"
int main (void) {
gsl_histogram *h;
gsl_rng *r;
double *s,*x,*y;
double sigma = 1.0;
double mu = 0.0;
double R;
size_t bins = 100;
size_t sample = 250000;
size_t i;
FILE *stream;
/* Special variables for the fitting */
gsl_vector *fit_state;
gsl_distfit_ftype *F;
gsl_multifit_fdfsolver *solver;
/* Starting with allocation ..*/
r = gsl_rng_alloc (gsl_rng_mt19937);
gsl_rng_set(r , 0);
h = gsl_histogram_alloc(bins);
gsl_histogram_set_ranges_uniform(h,-3*sigma,3*sigma);
for (i=0; i < sample; i++) {
R = mu + gsl_ran_gaussian(r,sigma);
gsl_histogram_increment(h , R);
}
gsl_histogram_normalize(h);
/* Don't have any reasonable values for uncertainities*/
s = (double *) (calloc(bins,sizeof(double)));
for (i=0; i<bins; i++)
s[i] = 1.0;
y = h->bin;
x = gsl_distfit_xdata_alloc(h);
F = gsl_distfit_func("gaussian");
/*
Nearly all subsequent calls to *_distfit_* functions take this
F - as an argument.
*/
fit_state = gsl_vector_alloc(gsl_distfit_psize(F));
gsl_distfit_init(bins,x,y,F,fit_state);
printf("Starting values: mu = %10.7f sigma = %10.7f \n",gsl_vector_get(fit_state,0),gsl_vector_get(fit_state,1));
solver = gsl_distfit_solver_alloc(bins,F);
gsl_distfit(solver,bins,x,y,s,F,fit_state);
printf("Fit resulted in: mu = %10.7f sigma = %10.7f \n",gsl_vector_get(fit_state,0),gsl_vector_get(fit_state,1));
stream = fopen("RDist","w");
gsl_distfit_fprintf(bins,x,y,NULL,F,fit_state,stream);
fclose(stream);
free(x);
free(s);
gsl_histogram_free(h);
gsl_rng_free(r);
gsl_multifit_fdfsolver_free(solver);
gsl_distfit_ftype_free(F);
return 1;
}
[-- Attachment #8: Type: text/plain, Size: 450 bytes --]
--
/--------------------------------------------------------------------\
/ Joakim Hove / hove@bccs.no / (55 5) 84076 | \
| Unifob AS, Avdeling for Beregningsvitenskap (BCCS) | Stabburveien 18 |
| CMU | 5231 Paradis |
\ Thormøhlensgt.55, 5020 Bergen. | 55 91 28 18 /
\--------------------------------------------------------------------/
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: Functionality to fit data to statistical distributions
2003-12-22 15:30 Functionality to fit data to statistical distributions Joakim Hove
@ 2003-12-23 10:31 ` Brian Gough
0 siblings, 0 replies; 2+ messages in thread
From: Brian Gough @ 2003-12-23 10:31 UTC (permalink / raw)
To: Joakim Hove; +Cc: gsl-discuss
Joakim Hove writes:
> I have written some functionality to set up the
> 'gsl_multifit_fdfsolver' to determine parameters for various
> statistical distributions.
Thanks, that sounds interesting - I'll take a look at it.
--
Brian
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2003-12-23 10:31 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-12-22 15:30 Functionality to fit data to statistical distributions Joakim Hove
2003-12-23 10:31 ` Brian Gough
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).