00001 #pragma ident "$Id: RobustStats.hpp 2402 2010-04-20 17:22:04Z pben $"
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00035 #ifndef GPSTK_ROBUSTSTATS_HPP
00036 #define GPSTK_ROBUSTSTATS_HPP
00037
00038
00039
00040 #include <cmath>
00041 #include <string>
00042
00043
00044 #include "Exception.hpp"
00045
00046 namespace gpstk {
00047
00048 #define ABSOLUTE(x) ((x) < T() ? -(x) : (x))
00049
00050 #define RobustTuningT (1.5) // or 1.345
00051
00052 #define RobustTuningA (0.778) // or 0.67
00053
00054 #define RobustTuningE (0.6745)
00055
00056
00059
00060
00061
00062
00069 template <typename T>
00070 int Qsort_compare(const T& a, const T& b) {
00071 if(a > b) return 1;
00072 else if(a < b) return -1;
00073 else return 0;
00074 }
00075
00081 template <typename T>
00082 void insert(T *sa,
00083 int na,
00084 int (*comp)(const T&, const T&) = gpstk::Qsort_compare)
00085 {
00086 int i,j;
00087 T stemp;
00088 for(i=1; i < na; i=i+1) {
00089 stemp = sa[i];
00090 j = i - 1;
00091 while((j >= 0) && (comp(stemp,sa[j]) < 0)) {
00092 sa[j+1] = sa[j];
00093 j = j - 1;
00094 }
00095 sa[j+1] = stemp;
00096 }
00097 }
00098
00106 template <typename T>
00107 void QSort(T *sa,
00108 int na,
00109 int (*comp)(const T&, const T&) = gpstk::Qsort_compare)
00110 {
00111 int i,j,nr;
00112 T stemp, spart;
00113 while(1) {
00114 if(na < 8) {
00115 insert(sa,na,comp);
00116 return;
00117 }
00118 spart = sa[na/2];
00119 i = -1;
00120 j = na;
00121 while(1) {
00122 do {
00123 i = i + 1;
00124 } while(comp(sa[i],spart) < 0);
00125 do {
00126 j = j - 1;
00127 } while(comp(sa[j], spart) > 0);
00128
00129
00130 if(i >= j) break;
00131
00132 stemp = sa[i];
00133 sa[i] = sa[j];
00134 sa[j] = stemp;
00135 }
00136 nr = na - i;
00137 if(i < (na/2)) {
00138 QSort(sa, i, comp);
00139 sa = &sa[i];
00140 na = nr;
00141 }
00142 else {
00143 QSort(&(sa[i]), nr, comp);
00144 na = i;
00145 }
00146 }
00147 }
00148
00154 template <typename T, typename S>
00155 void insert(T *sa,
00156 S *pa,
00157 int na,
00158 int (*comp)(const T&, const T&) = gpstk::Qsort_compare)
00159 {
00160 int i,j;
00161 T stemp;
00162 S ptemp;
00163 for(i=1; i < na; i=i+1) {
00164 stemp = sa[i];
00165 ptemp = pa[i];
00166 j = i - 1;
00167 while((j >= 0) && (comp(stemp,sa[j]) < 0)) {
00168 sa[j+1] = sa[j];
00169 pa[j+1] = pa[j];
00170 j = j - 1;
00171 }
00172 sa[j+1] = stemp;
00173 pa[j+1] = ptemp;
00174 }
00175 }
00176
00183 template <typename T, typename S>
00184 void QSort(T *sa,
00185 S *pa,
00186 int na,
00187 int (*comp)(const T&, const T&) = gpstk::Qsort_compare)
00188 {
00189 int i,j,nr;
00190 T stemp, spart;
00191 S ptemp, ppart;
00192 while(1) {
00193 if(na < 8) {
00194 insert(sa,pa,na,comp);
00195 return;
00196 }
00197 spart = sa[na/2];
00198 ppart = pa[na/2];
00199 i = -1;
00200 j = na;
00201 while(1) {
00202 do {
00203 i = i + 1;
00204 } while(comp(sa[i],spart) < 0);
00205 do {
00206 j = j - 1;
00207 } while(comp(sa[j], spart) > 0);
00208
00209
00210 if(i >= j) break;
00211
00212 stemp = sa[i]; ptemp = pa[i];
00213 sa[i] = sa[j]; pa[i] = pa[j];
00214 sa[j] = stemp; pa[j] = ptemp;
00215 }
00216 nr = na - i;
00217 if(i < (na/2)) {
00218 QSort(sa, pa, i, comp);
00219 sa = &sa[i];
00220 pa = &pa[i];
00221 na = nr;
00222 }
00223 else {
00224 QSort(&(sa[i]), &(pa[i]), nr, comp);
00225 na = i;
00226 }
00227 }
00228 }
00229
00235 template <typename T>
00236 T errfc(T x) throw()
00237 {
00238 T t,z,ret;
00239 z = ::fabs(x);
00240 t = T(1)/(T(1)+z/T(2));
00241 ret = t * ::exp(-z*z - 1.26551223 + t*(1.00002368 + t*(0.37409196
00242 + t*(0.09678418 + t*(-0.18628806 + t*(0.27886807 + t*(-1.13520398
00243 + t*(1.48851587 + t*(-0.82215223 + t*0.17087277)))))))));
00244 return (x >= T(0) ? ret : T(2)-ret);
00245 }
00246
00254 template <typename T>
00255 T normalCDF(T m, T s, T x) throw()
00256 {
00257 if(s == T(0)) return T(0);
00258 T arg = (x - m)/(::sqrt(T(2.0)) * s);
00259 return (T(1) - errfc<T>(arg)/T(2));
00260 }
00261
00272 double ADtest(double *xd, const int nd, double m, double s, bool save_flag=true)
00273 throw(Exception);
00274
00275
00277 namespace Robust
00278 {
00286 template <typename T>
00287 T Median(T *xd, const int nd, bool save_flag=true)
00288 throw(Exception)
00289 {
00290 if(!xd || nd < 2) {
00291 Exception e("Invalid input");
00292 GPSTK_THROW(e);
00293 }
00294
00295 try {
00296 int i;
00297 T med, *save;
00298
00299 if(save_flag) {
00300 save = new T[nd];
00301 if(!save) {
00302 Exception e("Could not allocate temporary array");
00303 GPSTK_THROW(e);
00304 }
00305 for(i=0; i<nd; i++) save[i]=xd[i];
00306 }
00307
00308 QSort(xd,nd);
00309
00310 if(nd%2)
00311 med = xd[(nd+1)/2-1];
00312 else
00313 med = (xd[nd/2-1]+xd[nd/2])/T(2);
00314
00315
00316 if(save_flag) {
00317 for(i=0; i<nd; i++) xd[i]=save[i];
00318 delete[] save;
00319 }
00320
00321 return med;
00322 }
00323 catch(Exception& e) { GPSTK_RETHROW(e); }
00324
00325 }
00326
00335 template <typename T>
00336 void Quartiles(const T *xd, const int nd, T& Q1, T& Q3)
00337 throw(Exception)
00338 {
00339 if(!xd || nd < 2) {
00340 Exception e("Invalid input");
00341 GPSTK_THROW(e);
00342 }
00343
00344 int q;
00345 if(nd % 2) q = (nd+1)/2;
00346 else q = nd/2;
00347
00348 if(q % 2) {
00349 Q1 = xd[(q+1)/2-1];
00350 Q3 = xd[nd-(q+1)/2];
00351 }
00352 else {
00353 Q1 = (xd[q/2-1]+xd[q/2])/T(2);
00354 Q3 = (xd[nd-q/2]+xd[nd-q/2-1])/T(2);
00355 }
00356 }
00357
00366 template <typename T>
00367 T MedianAbsoluteDeviation(T *xd, int nd, T& M, bool save_flag=true)
00368 throw(Exception)
00369 {
00370 int i;
00371 T mad, *save;
00372
00373 if(!xd || nd < 2) {
00374 Exception e("Invalid input");
00375 GPSTK_THROW(e);
00376 }
00377
00378
00379 if(save_flag) {
00380 save = new T[nd];
00381 if(!save) {
00382 Exception e("Could not allocate temporary array");
00383 GPSTK_THROW(e);
00384 }
00385 for(i=0; i<nd; i++) save[i]=xd[i];
00386 }
00387
00388
00389 M = Median(xd, nd, false);
00390
00391
00392 for(i=0; i<nd; i++) xd[i] = ABSOLUTE(xd[i]-M);
00393
00394
00395 QSort(xd, nd);
00396
00397
00398 mad = Median(xd, nd, false) / T(RobustTuningE);
00399
00400
00401 if(save_flag) {
00402 for(i=0; i<nd; i++) xd[i]=save[i];
00403 delete[] save;
00404 }
00405
00406 return mad;
00407
00408 }
00409
00412 template <typename T>
00413 T MAD(T *xd, int nd, T& M, bool save_flag=true)
00414 throw(Exception)
00415 { return MedianAbsoluteDeviation(xd,nd,M,save_flag); }
00416
00429 template <typename T>
00430 T MEstimate(const T *xd, int nd, const T& M, const T& MAD, T *w=NULL)
00431 throw(Exception)
00432 {
00433 try {
00434 T tv, m, mold, sum, sumw, *wt, weight, *t;
00435 T tol=0.000001;
00436 int i, n, N=10;
00437
00438 if(!xd || nd < 2) {
00439 Exception e("Invalid input");
00440 GPSTK_THROW(e);
00441 }
00442
00443 tv = T(RobustTuningT)*MAD;
00444 n = 0;
00445 m = M;
00446 do {
00447 mold = m;
00448 n++;
00449 sum = sumw = T();
00450 for(i=0; i<nd; i++) {
00451 if(w) wt=&w[i];
00452 else wt=&weight;
00453 *wt = T(1);
00454 if(xd[i]<m-tv) *wt = -tv/(xd[i]-m);
00455 else if(xd[i]>m+tv) *wt = tv/(xd[i]-m);
00456 sumw += (*wt);
00457 sum += (*wt)*xd[i];
00458 }
00459 m = sum / sumw;
00460
00461 } while(T(ABSOLUTE((m-mold)/m)) > tol && n < N);
00462
00463 return m;
00464 }
00465 catch(Exception& e) { GPSTK_RETHROW(e); }
00466
00467 }
00468
00488 int RobustPolyFit(double *xd, const double *td, int nd,
00489 int n, double *c, double *w=NULL)
00490 throw(Exception);
00491
00499 void StemLeafPlot(std::ostream& os, double *xd, long nd,
00500 std::string msg=std::string(""))
00501 throw(Exception);
00502
00513 void QuantilePlot(double *yd, long nd, double *xd)
00514 throw(Exception);
00515
00516 }
00517
00519
00520 }
00521
00522 #undef ABSOLUTE
00523
00524 #endif