RobustStats.hpp

Go to the documentation of this file.
00001 #pragma ident "$Id: RobustStats.hpp 2402 2010-04-20 17:22:04Z pben $"
00002 
00003 //============================================================================
00004 //
00005 //  This file is part of GPSTk, the GPS Toolkit.
00006 //
00007 //  The GPSTk is free software; you can redistribute it and/or modify
00008 //  it under the terms of the GNU Lesser General Public License as published
00009 //  by the Free Software Foundation; either version 2.1 of the License, or
00010 //  any later version.
00011 //
00012 //  The GPSTk is distributed in the hope that it will be useful,
00013 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 //  GNU Lesser General Public License for more details.
00016 //
00017 //  You should have received a copy of the GNU Lesser General Public
00018 //  License along with GPSTk; if not, write to the Free Software Foundation,
00019 //  Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00020 //  
00021 //  Copyright 2004, The University of Texas at Austin
00022 //
00023 //============================================================================
00024 
00035 #ifndef GPSTK_ROBUSTSTATS_HPP
00036 #define GPSTK_ROBUSTSTATS_HPP
00037 
00038 //------------------------------------------------------------------------------------
00039 // system includes
00040 #include <cmath>
00041 #include <string>
00042 
00043 // GPSTk
00044 #include "Exception.hpp"
00045 
00046 namespace gpstk {
00047 //------------------------------------------------------------------------------------
00048 #define ABSOLUTE(x) ((x) < T() ? -(x) : (x))
00049 
00050 #define RobustTuningT (1.5)         // or 1.345
00051 
00052 #define RobustTuningA (0.778)       // or 0.67
00053 
00054 #define RobustTuningE (0.6745)
00055 
00056 //------------------------------------------------------------------------------------
00059 
00060    //--------------------------------------------------------------------------------
00061    // quick sort, for use by robust statistics routines
00062 
00069    template <typename T>
00070    int Qsort_compare(const T& a, const T& b) {
00071       if(a > b) return 1;
00072       else if(a < b) return -1;
00073       else return 0;
00074    }
00075    
00081    template <typename T>
00082    void insert(T *sa,
00083                int na,
00084                int (*comp)(const T&, const T&) = gpstk::Qsort_compare)
00085    {
00086       int i,j;
00087       T stemp;
00088       for(i=1; i < na; i=i+1) { // insert the i-th element into the sorted array
00089          stemp = sa[i];
00090          j = i - 1;             // find where it goes
00091          while((j >= 0) && (comp(stemp,sa[j]) < 0)) { 
00092             sa[j+1] = sa[j];
00093             j = j - 1;
00094          }
00095          sa[j+1] = stemp;
00096       }
00097    }  // end insert sort
00098 
00106    template <typename T>
00107    void QSort(T *sa,
00108               int na,
00109               int (*comp)(const T&, const T&) = gpstk::Qsort_compare)
00110    {
00111       int i,j,nr;
00112       T stemp, spart;
00113       while(1) { 
00114          if(na < 8) {                     // use insert sort for small arrays
00115             insert(sa,na,comp);
00116             return;
00117          }
00118          spart = sa[na/2];                // pick middle element as pivot
00119          i = -1; 
00120          j = na;
00121          while(1) {
00122             do {                          // find first element to move right
00123                i = i + 1;
00124             } while(comp(sa[i],spart) < 0);
00125             do {                          // find first element to move left
00126                j = j - 1;
00127             } while(comp(sa[j], spart) > 0);
00128                                           // if the boundaries have met,
00129                                           // through paritioning,
00130             if(i >= j) break;
00131                                           // swap i and j elements
00132             stemp = sa[i];
00133             sa[i] = sa[j];
00134             sa[j] = stemp;
00135          }
00136          nr = na - i;
00137          if(i < (na/2)) {                 // now sort each partition
00138             QSort(sa, i, comp);           // sort left side 
00139             sa = &sa[i];                  // sort right side here
00140             na = nr;                      // memsort(&(sa[i]),nr,comp);
00141          }
00142          else { 
00143             QSort(&(sa[i]), nr, comp);    // sort right side
00144             na = i;
00145          }
00146       }
00147    }  // end QSort
00148    
00154    template <typename T, typename S>
00155    void insert(T *sa,
00156                S *pa,
00157                int na,
00158                int (*comp)(const T&, const T&) = gpstk::Qsort_compare)
00159    {
00160       int i,j;
00161       T stemp;
00162       S ptemp;
00163       for(i=1; i < na; i=i+1) { // insert the i-th element into the sorted array
00164          stemp = sa[i];
00165          ptemp = pa[i];
00166          j = i - 1;             // find where it goes
00167          while((j >= 0) && (comp(stemp,sa[j]) < 0)) { 
00168             sa[j+1] = sa[j];
00169             pa[j+1] = pa[j];
00170             j = j - 1;
00171          }
00172          sa[j+1] = stemp;
00173          pa[j+1] = ptemp;
00174       }
00175    }  // end insert sort
00176 
00183    template <typename T, typename S>
00184    void QSort(T *sa,
00185               S *pa,
00186               int na,
00187               int (*comp)(const T&, const T&) = gpstk::Qsort_compare)
00188    {
00189       int i,j,nr;
00190       T stemp, spart;
00191       S ptemp, ppart;
00192       while(1) { 
00193          if(na < 8) {                     // use insert sort for small arrays
00194             insert(sa,pa,na,comp);
00195             return;
00196          }
00197          spart = sa[na/2];                // pick middle element as pivot
00198          ppart = pa[na/2];
00199          i = -1; 
00200          j = na;
00201          while(1) {
00202             do {                          // find first element to move right
00203                i = i + 1;
00204             } while(comp(sa[i],spart) < 0);
00205             do {                          // find first element to move left
00206                j = j - 1;
00207             } while(comp(sa[j], spart) > 0);
00208                                           // if the boundaries have met,
00209                                           // through paritioning,
00210             if(i >= j) break;
00211                                           // swap i and j elements
00212             stemp = sa[i]; ptemp = pa[i];
00213             sa[i] = sa[j]; pa[i] = pa[j];
00214             sa[j] = stemp; pa[j] = ptemp;
00215          }
00216          nr = na - i;
00217          if(i < (na/2)) {                 // now sort each partition
00218             QSort(sa, pa, i, comp);       // sort left side 
00219             sa = &sa[i];                  // sort right side here
00220             pa = &pa[i];
00221             na = nr;                      // memsort(&(sa[i]),nr,comp);
00222          }
00223          else { 
00224             QSort(&(sa[i]), &(pa[i]), nr, comp);    // sort right side
00225             na = i;
00226          }
00227       }
00228    }  // end QSort
00229    
00235    template <typename T>
00236    T errfc(T x) throw()
00237    {
00238       T t,z,ret;
00239       z = ::fabs(x);
00240       t = T(1)/(T(1)+z/T(2));
00241       ret = t * ::exp(-z*z - 1.26551223 + t*(1.00002368 + t*(0.37409196
00242               + t*(0.09678418 + t*(-0.18628806 + t*(0.27886807 + t*(-1.13520398
00243               + t*(1.48851587 + t*(-0.82215223 + t*0.17087277)))))))));
00244       return (x >= T(0) ? ret : T(2)-ret);
00245    }  // end errfc
00246 
00254    template <typename T>
00255    T normalCDF(T m, T s, T x) throw()
00256    {
00257       if(s == T(0)) return T(0);
00258       T arg = (x - m)/(::sqrt(T(2.0)) * s);
00259       return (T(1) - errfc<T>(arg)/T(2));
00260    }  // end normal CDF
00261 
00272    double ADtest(double *xd, const int nd, double m, double s, bool save_flag=true)
00273       throw(Exception);
00274 
00275    //--------------------------------------------------------------------------------
00277    namespace Robust
00278    {
00286    template <typename T>
00287    T Median(T *xd, const int nd, bool save_flag=true)
00288       throw(Exception)
00289    {
00290       if(!xd || nd < 2) {
00291          Exception e("Invalid input");
00292          GPSTK_THROW(e);
00293       }
00294 
00295       try {
00296          int i;
00297          T med, *save;
00298 
00299          if(save_flag) {
00300             save = new T[nd];
00301             if(!save) {
00302                Exception e("Could not allocate temporary array");
00303                GPSTK_THROW(e);
00304             }
00305             for(i=0; i<nd; i++) save[i]=xd[i];
00306          }
00307 
00308          QSort(xd,nd);
00309 
00310          if(nd%2)
00311             med = xd[(nd+1)/2-1];
00312          else
00313             med = (xd[nd/2-1]+xd[nd/2])/T(2);
00314 
00315             // restore original data from temporary
00316          if(save_flag) {
00317             for(i=0; i<nd; i++) xd[i]=save[i];
00318             delete[] save;
00319          }
00320 
00321          return med;
00322       }
00323       catch(Exception& e) { GPSTK_RETHROW(e); }
00324 
00325    }  // end Median
00326 
00335    template <typename T>
00336    void Quartiles(const T *xd, const int nd, T& Q1, T& Q3)
00337       throw(Exception)
00338    {
00339       if(!xd || nd < 2) {
00340          Exception e("Invalid input");
00341          GPSTK_THROW(e);
00342       }
00343 
00344       int q;
00345       if(nd % 2) q = (nd+1)/2;
00346       else       q = nd/2;
00347 
00348       if(q % 2) {
00349          Q1 = xd[(q+1)/2-1];
00350          Q3 = xd[nd-(q+1)/2];
00351       }
00352       else {
00353          Q1 = (xd[q/2-1]+xd[q/2])/T(2);
00354          Q3 = (xd[nd-q/2]+xd[nd-q/2-1])/T(2);
00355       }
00356    }  // end Quartiles
00357 
00366    template <typename T>
00367    T MedianAbsoluteDeviation(T *xd, int nd, T& M, bool save_flag=true)
00368       throw(Exception)
00369    {
00370       int i;
00371       T mad, *save;
00372 
00373       if(!xd || nd < 2) {
00374          Exception e("Invalid input");
00375          GPSTK_THROW(e);
00376       }
00377 
00378          // store data in a temporary array
00379       if(save_flag) {
00380          save = new T[nd];
00381          if(!save) {
00382             Exception e("Could not allocate temporary array");
00383             GPSTK_THROW(e);
00384          }
00385          for(i=0; i<nd; i++) save[i]=xd[i];
00386       }
00387 
00388          // get the median (don't care if xd gets sorted...)
00389       M = Median(xd, nd, false);
00390 
00391          // compute xd=abs(xd-M)
00392       for(i=0; i<nd; i++) xd[i] = ABSOLUTE(xd[i]-M);
00393 
00394          // sort xd in ascending order
00395       QSort(xd, nd);
00396 
00397          // find median and normalize to get mad
00398       mad = Median(xd, nd, false) / T(RobustTuningE);
00399 
00400          // restore original data from temporary
00401       if(save_flag) {
00402          for(i=0; i<nd; i++) xd[i]=save[i];
00403          delete[] save;
00404       }
00405 
00406       return mad;
00407 
00408    }  // end MedianAbsoluteDeviation
00409 
00412    template <typename T>
00413    T MAD(T *xd, int nd, T& M, bool save_flag=true)
00414       throw(Exception)
00415    { return MedianAbsoluteDeviation(xd,nd,M,save_flag); }
00416 
00429    template <typename T>
00430    T MEstimate(const T *xd, int nd, const T& M, const T& MAD, T *w=NULL)
00431       throw(Exception)
00432    {
00433       try {
00434          T tv, m, mold, sum, sumw, *wt, weight, *t;
00435          T tol=0.000001;
00436          int i, n, N=10;      // N is the max number of iterations
00437 
00438          if(!xd || nd < 2) {
00439             Exception e("Invalid input");
00440             GPSTK_THROW(e);
00441          }
00442 
00443          tv = T(RobustTuningT)*MAD;
00444          n = 0;
00445          m = M;
00446          do {
00447             mold = m;
00448             n++;
00449             sum = sumw = T();
00450             for(i=0; i<nd; i++) {
00451                if(w) wt=&w[i];
00452                else wt=&weight;
00453                *wt = T(1);
00454                if(xd[i]<m-tv)      *wt = -tv/(xd[i]-m);
00455                else if(xd[i]>m+tv) *wt =  tv/(xd[i]-m);
00456                sumw += (*wt);
00457                sum += (*wt)*xd[i];
00458             }
00459             m = sum / sumw;
00460 
00461          } while(T(ABSOLUTE((m-mold)/m)) > tol && n < N);
00462 
00463          return m;
00464       }
00465       catch(Exception& e) { GPSTK_RETHROW(e); }
00466 
00467    }  // end MEstimate
00468 
00488    int RobustPolyFit(double *xd, const double *td, int nd,
00489                      int n, double *c, double *w=NULL)
00490       throw(Exception);
00491 
00499    void StemLeafPlot(std::ostream& os, double *xd, long nd,
00500          std::string msg=std::string(""))
00501       throw(Exception);
00502 
00513    void QuantilePlot(double *yd, long nd, double *xd)
00514       throw(Exception);
00515 
00516    }  // end Robust namespace
00517 
00519 
00520 }  // end namespace gpstk
00521 
00522 #undef ABSOLUTE
00523 
00524 #endif

Generated on Wed Feb 8 03:31:02 2012 for GPS ToolKit Software Library by  doxygen 1.3.9.1