|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Written (W) 2007 Konrad Rieck 00010 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00011 */ 00012 00013 #ifndef __MATHEMATICS_H_ 00014 #define __MATHEMATICS_H_ 00015 00016 #include "lib/common.h" 00017 #include "lib/io.h" 00018 #include "lib/lapack.h" 00019 #include "base/SGObject.h" 00020 #include "base/Parallel.h" 00021 00022 #include <math.h> 00023 #include <stdio.h> 00024 #include <float.h> 00025 #include <pthread.h> 00026 #include <unistd.h> 00027 #include <sys/types.h> 00028 #include <sys/time.h> 00029 #include <time.h> 00030 00031 #ifdef SUNOS 00032 #include <ieeefp.h> 00033 #endif 00034 00036 #ifdef log2 00037 #define cygwin_log2 log2 00038 #undef log2 00039 #endif 00040 00041 00042 00044 #ifdef _GLIBCXX_CMATH 00045 #if _GLIBCXX_USE_C99_MATH 00046 #if !_GLIBCXX_USE_C99_FP_MACROS_DYNAMIC 00047 00049 using std::signbit; 00050 00051 using std::fpclassify; 00052 00053 using std::isfinite; 00054 using std::isinf; 00055 using std::isnan; 00056 using std::isnormal; 00057 00058 using std::isgreater; 00059 using std::isgreaterequal; 00060 using std::isless; 00061 using std::islessequal; 00062 using std::islessgreater; 00063 using std::isunordered; 00064 #endif 00065 #endif 00066 #endif 00067 00068 00069 #ifdef _WIN32 00070 #ifndef isnan 00071 #define isnan _isnan 00072 #endif 00073 00074 #ifndef isfinite 00075 #define isfinite _isfinite 00076 #endif 00077 #endif //_WIN32 00078 00079 #ifndef NAN 00080 #include <stdlib.h> 00081 #define NAN (strtod("NAN",NULL)) 00082 #endif 00083 00084 /* Size of RNG seed */ 00085 #define RNG_SEED_SIZE 256 00086 00087 /* Maximum stack size */ 00088 #define RADIX_STACK_SIZE 512 00089 00090 /* Stack macros */ 00091 #define radix_push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i 00092 #define radix_pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si 00093 00094 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00095 00096 template <class T> struct radix_stack_t 00097 { 00099 T *sa; 00101 size_t sn; 00103 uint16_t si; 00104 }; 00105 00107 00109 template <class T1, class T2> struct thread_qsort 00110 { 00112 T1* output; 00114 T2* index; 00116 uint32_t size; 00117 00119 int32_t* qsort_threads; 00121 int32_t sort_limit; 00123 int32_t num_threads; 00124 }; 00125 #endif // DOXYGEN_SHOULD_SKIP_THIS 00126 00127 namespace shogun 00128 { 00131 class CMath : public CSGObject 00132 { 00133 public: 00137 00138 CMath(); 00139 00141 virtual ~CMath(); 00143 00147 00149 // 00150 template <class T> 00151 static inline T min(T a, T b) 00152 { 00153 return (a<=b) ? a : b; 00154 } 00155 00157 template <class T> 00158 static inline T max(T a, T b) 00159 { 00160 return (a>=b) ? a : b; 00161 } 00162 00164 template <class T> 00165 static inline T clamp(T value, T lb, T ub) 00166 { 00167 if (value<=lb) 00168 return lb; 00169 else if (value>=ub) 00170 return ub; 00171 else 00172 return value; 00173 } 00174 00176 template <class T> 00177 static inline T abs(T a) 00178 { 00179 // can't be a>=0?(a):(-a), because compiler complains about 00180 // 'comparison always true' when T is unsigned 00181 if (a==0) 00182 return 0; 00183 else if (a>0) 00184 return a; 00185 else 00186 return -a; 00187 } 00189 00192 00193 static inline float64_t round(float64_t d) 00194 { 00195 return ::floor(d+0.5); 00196 } 00197 00198 static inline float64_t floor(float64_t d) 00199 { 00200 return ::floor(d); 00201 } 00202 00203 static inline float64_t ceil(float64_t d) 00204 { 00205 return ::ceil(d); 00206 } 00207 00209 template <class T> 00210 static inline T sign(T a) 00211 { 00212 if (a==0) 00213 return 0; 00214 else return (a<0) ? (-1) : (+1); 00215 } 00216 00218 template <class T> 00219 static inline void swap(T &a,T &b) 00220 { 00221 T c=a; 00222 a=b; 00223 b=c; 00224 } 00225 00229 template <class T> 00230 static inline void resize(T* &data, int64_t old_size, int64_t new_size) 00231 { 00232 if (old_size==new_size) 00233 return; 00234 T* new_data = new T[new_size]; 00235 for (int64_t i=0; i<old_size && i<new_size; i++) 00236 new_data[i]=data[i]; 00237 delete[] data; 00238 data=new_data; 00239 } 00240 00242 template <class T> 00243 static inline T twonorm(T* x, int32_t len) 00244 { 00245 float64_t result=0; 00246 for (int32_t i=0; i<len; i++) 00247 result+=x[i]*x[i]; 00248 00249 return CMath::sqrt(result); 00250 } 00251 00253 template <class T> 00254 static inline T qsq(T* x, int32_t len, float64_t q) 00255 { 00256 float64_t result=0; 00257 for (int32_t i=0; i<len; i++) 00258 result+=CMath::pow(x[i], q); 00259 00260 return result; 00261 } 00262 00264 template <class T> 00265 static inline T qnorm(T* x, int32_t len, float64_t q) 00266 { 00267 ASSERT(q!=0); 00268 return CMath::pow(qsq(x, len, q), 1/q); 00269 } 00270 00272 template <class T> 00273 static inline T sq(T x) 00274 { 00275 return x*x; 00276 } 00277 00279 static inline float32_t sqrt(float32_t x) 00280 { 00281 return ::sqrtf(x); 00282 } 00283 00285 static inline float64_t sqrt(float64_t x) 00286 { 00287 return ::sqrt(x); 00288 } 00289 00291 static inline floatmax_t sqrt(floatmax_t x) 00292 { 00293 //fall back to double precision sqrt if sqrtl is not 00294 //available 00295 #ifdef HAVE_SQRTL 00296 return ::sqrtl(x); 00297 #else 00298 return ::sqrt(x); 00299 #endif 00300 } 00301 00302 00304 static inline floatmax_t powl(floatmax_t x, floatmax_t n) 00305 { 00306 //fall back to double precision pow if powl is not 00307 //available 00308 #ifdef HAVE_POWL 00309 return ::powl((long double) x, (long double) n); 00310 #else 00311 return ::pow((double) x, (double) n); 00312 #endif 00313 } 00314 00315 static inline int32_t pow(int32_t x, int32_t n) 00316 { 00317 ASSERT(n>=0); 00318 int32_t result=1; 00319 while (n--) 00320 result*=x; 00321 00322 return result; 00323 } 00324 00325 static inline float64_t pow(float64_t x, int32_t n) 00326 { 00327 ASSERT(n>=0); 00328 float64_t result=1; 00329 while (n--) 00330 result*=x; 00331 00332 return result; 00333 } 00334 00335 static inline float64_t pow(float64_t x, float64_t n) 00336 { 00337 return ::pow((double) x, (double) n); 00338 } 00339 00340 static inline float64_t exp(float64_t x) 00341 { 00342 return ::exp((double) x); 00343 } 00344 00345 static inline float64_t log10(float64_t v) 00346 { 00347 return ::log(v)/::log(10.0); 00348 } 00349 00350 static inline float64_t log2(float64_t v) 00351 { 00352 #ifdef HAVE_LOG2 00353 return ::log2(v); 00354 #else 00355 return ::log(v)/::log(2.0); 00356 #endif //HAVE_LOG2 00357 } 00358 00359 static inline float64_t log(float64_t v) 00360 { 00361 return ::log(v); 00362 } 00363 00364 template <class T> 00365 static void transpose_matrix( 00366 T*& matrix, int32_t& num_feat, int32_t& num_vec) 00367 { 00368 T* transposed=new T[num_vec*num_feat]; 00369 for (int32_t i=0; i<num_vec; i++) 00370 { 00371 for (int32_t j=0; j<num_feat; j++) 00372 transposed[i+j*num_vec]=matrix[i*num_feat+j]; 00373 } 00374 00375 delete[] matrix; 00376 matrix=transposed; 00377 00378 CMath::swap(num_feat, num_vec); 00379 } 00380 00381 #ifdef HAVE_LAPACK 00382 00383 00384 static float64_t* pinv( 00385 float64_t* matrix, int32_t rows, int32_t cols, 00386 float64_t* target=NULL); 00387 00388 00389 //C := alpha*op( A )*op( B ) + beta*C 00390 //op( X ) = X or op( X ) = X', 00391 static inline void dgemm( 00392 double alpha, double* A, int rows, int cols, 00393 CBLAS_TRANSPOSE transposeA, double *B, int cols_B, 00394 CBLAS_TRANSPOSE transposeB, double beta, double *C) 00395 { 00396 cblas_dgemm(CblasColMajor, transposeA, transposeB, rows, cols, cols_B, 00397 alpha, A, cols, B, cols_B, beta, C, cols); 00398 } 00399 00400 //y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, 00401 static inline void dgemv( 00402 double alpha, double *A, int rows, int cols, 00403 const CBLAS_TRANSPOSE transposeA, double* X, double beta, 00404 double* Y) 00405 { 00406 cblas_dgemv(CblasColMajor, transposeA, 00407 rows, cols, alpha, A, cols, 00408 X, 1, beta, Y, 1); 00409 } 00410 #endif 00411 00412 static inline int64_t factorial(int32_t n) 00413 { 00414 int64_t res=1; 00415 for (int i=2; i<=n; i++) 00416 res*=i ; 00417 return res ; 00418 } 00419 00420 static void init_random(uint32_t initseed=0) 00421 { 00422 if (initseed==0) 00423 { 00424 struct timeval tv; 00425 gettimeofday(&tv, NULL); 00426 seed=(uint32_t) (4223517*getpid()*tv.tv_sec*tv.tv_usec); 00427 } 00428 else 00429 seed=initseed; 00430 #if !defined(CYGWIN) && !defined(__INTERIX) 00431 //seed=42 00432 //SG_SPRINT("initializing random number generator with %d (seed size %d)\n", seed, RNG_SEED_SIZE); 00433 initstate(seed, CMath::rand_state, RNG_SEED_SIZE); 00434 #endif 00435 } 00436 00437 static inline int64_t random() 00438 { 00439 #if defined(CYGWIN) || defined(__INTERIX) 00440 return rand(); 00441 #else 00442 return ::random(); 00443 #endif 00444 } 00445 00446 static inline int32_t random(int32_t min_value, int32_t max_value) 00447 { 00448 int32_t ret = min_value + (int32_t) ((max_value-min_value+1) * (random() / (RAND_MAX+1.0))); 00449 ASSERT(ret>=min_value && ret<=max_value); 00450 return ret ; 00451 } 00452 00453 static inline float32_t random(float32_t min_value, float32_t max_value) 00454 { 00455 float32_t ret = min_value + ((max_value-min_value) * (random() / (1.0*RAND_MAX))); 00456 00457 if (ret<min_value || ret>max_value) 00458 SG_SPRINT("min_value:%10.10f value: %10.10f max_value:%10.10f", min_value, ret, max_value); 00459 ASSERT(ret>=min_value && ret<=max_value); 00460 return ret; 00461 } 00462 00463 static inline float64_t random(float64_t min_value, float64_t max_value) 00464 { 00465 float64_t ret = min_value + ((max_value-min_value) * (random() / (1.0*RAND_MAX))); 00466 00467 if (ret<min_value || ret>max_value) 00468 SG_SPRINT("min_value:%10.10f value: %10.10f max_value:%10.10f", min_value, ret, max_value); 00469 ASSERT(ret>=min_value && ret<=max_value); 00470 return ret; 00471 } 00472 00473 template <class T> 00474 static T* clone_vector(const T* vec, int32_t len) 00475 { 00476 T* result = new T[len]; 00477 for (int32_t i=0; i<len; i++) 00478 result[i]=vec[i]; 00479 00480 return result; 00481 } 00482 template <class T> 00483 static void fill_vector(T* vec, int32_t len, T value) 00484 { 00485 for (int32_t i=0; i<len; i++) 00486 vec[i]=value; 00487 } 00488 template <class T> 00489 static void range_fill_vector(T* vec, int32_t len, T start=0) 00490 { 00491 for (int32_t i=0; i<len; i++) 00492 vec[i]=i+start; 00493 } 00494 00495 template <class T> 00496 static void random_vector(T* vec, int32_t len, T min_value, T max_value) 00497 { 00498 for (int32_t i=0; i<len; i++) 00499 vec[i]=CMath::random(min_value, max_value); 00500 } 00501 00502 static inline int32_t* randperm(int32_t n) 00503 { 00504 int32_t* perm = new int32_t[n]; 00505 00506 if (!perm) 00507 return NULL; 00508 for (int32_t i = 0; i < n; i++) 00509 perm[i] = i; 00510 for (int32_t i = 0; i < n; i++) 00511 swap(perm[random(0, n - 1)], perm[i]); 00512 return perm; 00513 } 00514 00515 static inline int64_t nchoosek(int32_t n, int32_t k) 00516 { 00517 int64_t res=1; 00518 00519 for (int32_t i=n-k+1; i<=n; i++) 00520 res*=i; 00521 00522 return res/factorial(k); 00523 } 00524 00526 template <class T> 00527 static inline void vec1_plus_scalar_times_vec2(T* vec1, 00528 T scalar, const T* vec2, int32_t n) 00529 { 00530 for (int32_t i=0; i<n; i++) 00531 vec1[i]+=scalar*vec2[i]; 00532 } 00533 00535 static inline float64_t dot(const bool* v1, const bool* v2, int32_t n) 00536 { 00537 float64_t r=0; 00538 for (int32_t i=0; i<n; i++) 00539 r+=((v1[i]) ? 1 : 0) * ((v2[i]) ? 1 : 0); 00540 return r; 00541 } 00542 00544 static inline floatmax_t dot(const floatmax_t* v1, const floatmax_t* v2, int32_t n) 00545 { 00546 floatmax_t r=0; 00547 for (int32_t i=0; i<n; i++) 00548 r+=v1[i]*v2[i]; 00549 return r; 00550 } 00551 00553 static inline float64_t dot(float64_t* v1, float64_t* v2, int32_t n) 00554 { 00555 float64_t r=0; 00556 #ifdef HAVE_LAPACK 00557 int32_t skip=1; 00558 r = cblas_ddot(n, v1, skip, v2, skip); 00559 #else 00560 for (int32_t i=0; i<n; i++) 00561 r+=v1[i]*v2[i]; 00562 #endif 00563 return r; 00564 } 00565 00567 static inline float32_t dot( 00568 float32_t* v1, float32_t* v2, int32_t n) 00569 { 00570 float64_t r=0; 00571 #ifdef HAVE_LAPACK 00572 int32_t skip=1; 00573 r = cblas_sdot(n, v1, skip, v2, skip); 00574 #else 00575 for (int32_t i=0; i<n; i++) 00576 r+=v1[i]*v2[i]; 00577 #endif 00578 return r; 00579 } 00580 00582 static inline float64_t dot( 00583 const uint64_t* v1, const uint64_t* v2, int32_t n) 00584 { 00585 float64_t r=0; 00586 for (int32_t i=0; i<n; i++) 00587 r+=((float64_t) v1[i])*v2[i]; 00588 00589 return r; 00590 } 00592 static inline float64_t dot( 00593 const int64_t* v1, const int64_t* v2, int32_t n) 00594 { 00595 float64_t r=0; 00596 for (int32_t i=0; i<n; i++) 00597 r+=((float64_t) v1[i])*v2[i]; 00598 00599 return r; 00600 } 00601 00603 static inline float64_t dot( 00604 const int32_t* v1, const int32_t* v2, int32_t n) 00605 { 00606 float64_t r=0; 00607 for (int32_t i=0; i<n; i++) 00608 r+=((float64_t) v1[i])*v2[i]; 00609 00610 return r; 00611 } 00612 00614 static inline float64_t dot( 00615 const uint32_t* v1, const uint32_t* v2, int32_t n) 00616 { 00617 float64_t r=0; 00618 for (int32_t i=0; i<n; i++) 00619 r+=((float64_t) v1[i])*v2[i]; 00620 00621 return r; 00622 } 00623 00625 static inline float64_t dot( 00626 const uint16_t* v1, const uint16_t* v2, int32_t n) 00627 { 00628 float64_t r=0; 00629 for (int32_t i=0; i<n; i++) 00630 r+=((float64_t) v1[i])*v2[i]; 00631 00632 return r; 00633 } 00634 00636 static inline float64_t dot( 00637 const int16_t* v1, const int16_t* v2, int32_t n) 00638 { 00639 float64_t r=0; 00640 for (int32_t i=0; i<n; i++) 00641 r+=((float64_t) v1[i])*v2[i]; 00642 00643 return r; 00644 } 00645 00647 static inline float64_t dot( 00648 const char* v1, const char* v2, int32_t n) 00649 { 00650 float64_t r=0; 00651 for (int32_t i=0; i<n; i++) 00652 r+=((float64_t) v1[i])*v2[i]; 00653 00654 return r; 00655 } 00656 00658 static inline float64_t dot( 00659 const uint8_t* v1, const uint8_t* v2, int32_t n) 00660 { 00661 float64_t r=0; 00662 for (int32_t i=0; i<n; i++) 00663 r+=((float64_t) v1[i])*v2[i]; 00664 00665 return r; 00666 } 00667 00669 static inline float64_t dot( 00670 const float64_t* v1, const char* v2, int32_t n) 00671 { 00672 float64_t r=0; 00673 for (int32_t i=0; i<n; i++) 00674 r+=((float64_t) v1[i])*v2[i]; 00675 00676 return r; 00677 } 00678 00680 template <class T> 00681 static inline void add( 00682 T* target, T alpha, const T* v1, T beta, const T* v2, 00683 int32_t len) 00684 { 00685 for (int32_t i=0; i<len; i++) 00686 target[i]=alpha*v1[i]+beta*v2[i]; 00687 } 00688 00690 template <class T> 00691 static inline void add_scalar(T alpha, T* vec, int32_t len) 00692 { 00693 for (int32_t i=0; i<len; i++) 00694 vec[i]+=alpha; 00695 } 00696 00698 template <class T> 00699 static inline void scale_vector(T alpha, T* vec, int32_t len) 00700 { 00701 for (int32_t i=0; i<len; i++) 00702 vec[i]*=alpha; 00703 } 00704 00706 template <class T> 00707 static inline T sum(T* vec, int32_t len) 00708 { 00709 T result=0; 00710 for (int32_t i=0; i<len; i++) 00711 result+=vec[i]; 00712 00713 return result; 00714 } 00715 00717 template <class T> 00718 static inline T max(T* vec, int32_t len) 00719 { 00720 ASSERT(len>0); 00721 T maxv=vec[0]; 00722 00723 for (int32_t i=1; i<len; i++) 00724 maxv=CMath::max(vec[i], maxv); 00725 00726 return maxv; 00727 } 00728 00730 template <class T> 00731 static inline T sum_abs(T* vec, int32_t len) 00732 { 00733 T result=0; 00734 for (int32_t i=0; i<len; i++) 00735 result+=CMath::abs(vec[i]); 00736 00737 return result; 00738 } 00739 00741 template <class T> 00742 static inline bool fequal(T x, T y, float64_t precision=1e-6) 00743 { 00744 return CMath::abs(x-y)<precision; 00745 } 00746 00747 static inline float64_t mean(float64_t* vec, int32_t len) 00748 { 00749 ASSERT(vec); 00750 ASSERT(len>0); 00751 00752 float64_t mean=0; 00753 for (int32_t i=0; i<len; i++) 00754 mean+=vec[i]; 00755 return mean/len; 00756 } 00757 00758 static inline float64_t trace( 00759 float64_t* mat, int32_t cols, int32_t rows) 00760 { 00761 float64_t trace=0; 00762 for (int32_t i=0; i<rows; i++) 00763 trace+=mat[i*cols+i]; 00764 return trace; 00765 } 00766 00770 static void sort(int32_t *a, int32_t cols, int32_t sort_col=0); 00771 static void sort(float64_t *a, int32_t*idx, int32_t N); 00772 00773 /* 00774 * Inline function to extract the byte at position p (from left) 00775 * of an 64 bit integer. The function is somewhat identical to 00776 * accessing an array of characters via []. 00777 */ 00778 00780 template <class T> 00781 inline static void radix_sort(T* array, int32_t size) 00782 { 00783 radix_sort_helper(array,size,0); 00784 } 00785 00786 template <class T> 00787 static inline uint8_t byte(T word, uint16_t p) 00788 { 00789 return (word >> (sizeof(T)-p-1) * 8) & 0xff; 00790 } 00791 00792 template <class T> 00793 static void radix_sort_helper(T* array, int32_t size, uint16_t i) 00794 { 00795 static size_t count[256], nc, cmin; 00796 T *ak; 00797 uint8_t c=0; 00798 radix_stack_t<T> s[RADIX_STACK_SIZE], *sp, *olds, *bigs; 00799 T *an, *aj, *pile[256]; 00800 size_t *cp, cmax; 00801 00802 /* Push initial array to stack */ 00803 sp = s; 00804 radix_push(array, size, i); 00805 00806 /* Loop until all digits have been sorted */ 00807 while (sp>s) { 00808 radix_pop(array, size, i); 00809 an = array + size; 00810 00811 /* Make character histogram */ 00812 if (nc == 0) { 00813 cmin = 0xff; 00814 for (ak = array; ak < an; ak++) { 00815 c = byte(*ak, i); 00816 count[c]++; 00817 if (count[c] == 1) { 00818 /* Determine smallest character */ 00819 if (c < cmin) 00820 cmin = c; 00821 nc++; 00822 } 00823 } 00824 00825 /* Sort recursively if stack size too small */ 00826 if (sp + nc > s + RADIX_STACK_SIZE) { 00827 radix_sort_helper(array, size, i); 00828 continue; 00829 } 00830 } 00831 00832 /* 00833 * Set pile[]; push incompletely sorted bins onto stack. 00834 * pile[] = pointers to last out-of-place element in bins. 00835 * Before permuting: pile[c-1] + count[c] = pile[c]; 00836 * during deal: pile[c] counts down to pile[c-1]. 00837 */ 00838 olds = bigs = sp; 00839 cmax = 2; 00840 ak = array; 00841 pile[0xff] = an; 00842 for (cp = count + cmin; nc > 0; cp++) { 00843 /* Find next non-empty pile */ 00844 while (*cp == 0) 00845 cp++; 00846 /* Pile with several entries */ 00847 if (*cp > 1) { 00848 /* Determine biggest pile */ 00849 if (*cp > cmax) { 00850 cmax = *cp; 00851 bigs = sp; 00852 } 00853 00854 if (i < sizeof(T)-1) 00855 radix_push(ak, *cp, (uint16_t) (i + 1)); 00856 } 00857 pile[cp - count] = ak += *cp; 00858 nc--; 00859 } 00860 00861 /* Play it safe -- biggest bin last. */ 00862 swap(*olds, *bigs); 00863 00864 /* 00865 * Permute misplacements home. Already home: everything 00866 * before aj, and in pile[c], items from pile[c] on. 00867 * Inner loop: 00868 * r = next element to put in place; 00869 * ak = pile[r[i]] = location to put the next element. 00870 * aj = bottom of 1st disordered bin. 00871 * Outer loop: 00872 * Once the 1st disordered bin is done, ie. aj >= ak, 00873 * aj<-aj + count[c] connects the bins in array linked list; 00874 * reset count[c]. 00875 */ 00876 aj = array; 00877 while (aj<an) 00878 { 00879 T r; 00880 00881 for (r = *aj; aj < (ak = --pile[c = byte(r, i)]);) 00882 swap(*ak, r); 00883 00884 *aj = r; 00885 aj += count[c]; 00886 count[c] = 0; 00887 } 00888 } 00889 } 00890 00893 template <class T> 00894 static void insertion_sort(T* output, int32_t size) 00895 { 00896 for (int32_t i=0; i<size-1; i++) 00897 { 00898 int32_t j=i-1; 00899 T value=output[i]; 00900 while (j >= 0 && output[j] > value) 00901 { 00902 output[j+1] = output[j]; 00903 j--; 00904 } 00905 output[j+1]=value; 00906 } 00907 } 00908 00909 00912 template <class T> 00913 static void qsort(T* output, int32_t size) 00914 { 00915 if (size==2) 00916 { 00917 if (output[0] > output [1]) 00918 swap(output[0],output[1]); 00919 return; 00920 } 00921 //T split=output[random(0,size-1)]; 00922 T split=output[size/2]; 00923 00924 int32_t left=0; 00925 int32_t right=size-1; 00926 00927 while (left<=right) 00928 { 00929 while (output[left] < split) 00930 left++; 00931 while (output[right] > split) 00932 right--; 00933 00934 if (left<=right) 00935 { 00936 swap(output[left],output[right]); 00937 left++; 00938 right--; 00939 } 00940 } 00941 00942 if (right+1> 1) 00943 qsort(output,right+1); 00944 00945 if (size-left> 1) 00946 qsort(&output[left],size-left); 00947 } 00948 00950 template <class T> static void display_bits(T word, int32_t width=8*sizeof(T)) 00951 { 00952 ASSERT(width>=0); 00953 for (int i=0; i<width; i++) 00954 { 00955 T mask = ((T) 1)<<(sizeof(T)*8-1); 00956 while (mask) 00957 { 00958 if (mask & word) 00959 SG_SPRINT("1"); 00960 else 00961 SG_SPRINT("0"); 00962 00963 mask>>=1; 00964 } 00965 } 00966 } 00967 00969 template <class T> static void display_vector( 00970 const T* vector, int32_t n, const char* name="vector"); 00971 00973 template <class T> static void display_matrix( 00974 const T* matrix, int32_t rows, int32_t cols, const char* name="matrix"); 00975 00981 template <class T1,class T2> 00982 static void qsort_index(T1* output, T2* index, uint32_t size); 00983 00989 template <class T1,class T2> 00990 static void qsort_backward_index( 00991 T1* output, T2* index, int32_t size); 00992 01000 template <class T1,class T2> 01001 inline static void parallel_qsort_index(T1* output, T2* index, uint32_t size, int32_t n_threads, int32_t limit=262144) 01002 { 01003 int32_t n=0; 01004 thread_qsort<T1,T2> t; 01005 t.output=output; 01006 t.index=index; 01007 t.size=size; 01008 t.qsort_threads=&n; 01009 t.sort_limit=limit; 01010 t.num_threads=n_threads; 01011 parallel_qsort_index<T1,T2>(&t); 01012 } 01013 01014 01015 template <class T1,class T2> 01016 static void* parallel_qsort_index(void* p); 01017 01018 01019 /* finds the smallest element in output and puts that element as the 01020 first element */ 01021 template <class T> 01022 static void min(float64_t* output, T* index, int32_t size); 01023 01024 /* finds the n smallest elements in output and puts these elements as the 01025 first n elements */ 01026 template <class T> 01027 static void nmin( 01028 float64_t* output, T* index, int32_t size, int32_t n); 01029 01030 /* performs a inplace unique of a vector of type T using quicksort 01031 * returns the new number of elements */ 01032 template <class T> 01033 static int32_t unique(T* output, int32_t size) 01034 { 01035 qsort(output, size); 01036 int32_t i,j=0 ; 01037 for (i=0; i<size; i++) 01038 if (i==0 || output[i]!=output[i-1]) 01039 output[j++]=output[i]; 01040 return j ; 01041 } 01042 01043 /* finds an element in a sorted array via binary search 01044 * returns -1 if not found */ 01045 template <class T> 01046 static int32_t binary_search_helper(T* output, int32_t size, T elem) 01047 { 01048 int32_t start=0; 01049 int32_t end=size-1; 01050 01051 if (size<1) 01052 return 0; 01053 01054 while (start<end) 01055 { 01056 int32_t middle=(start+end)/2; 01057 01058 if (output[middle]>elem) 01059 end=middle-1; 01060 else if (output[middle]<elem) 01061 start=middle+1; 01062 else 01063 return middle; 01064 } 01065 01066 return start; 01067 } 01068 01069 /* finds an element in a sorted array via binary search 01070 * * returns -1 if not found */ 01071 template <class T> 01072 static inline int32_t binary_search(T* output, int32_t size, T elem) 01073 { 01074 int32_t ind = binary_search_helper(output, size, elem); 01075 if (ind >= 0 && output[ind] == elem) 01076 return ind; 01077 return -1; 01078 } 01079 01080 /* finds an element in a sorted array via binary search 01081 * if it exists, else the index the largest smaller element 01082 * is returned 01083 * note: a successor is not mandatory */ 01084 template <class T> 01085 static int32_t binary_search_max_lower_equal( 01086 T* output, int32_t size, T elem) 01087 { 01088 int32_t ind = binary_search_helper(output, size, elem); 01089 01090 if (output[ind]<=elem) 01091 return ind; 01092 if (ind>0 && output[ind-1] <= elem) 01093 return ind-1; 01094 return -1; 01095 } 01096 01099 static float64_t Align( 01100 char * seq1, char* seq2, int32_t l1, int32_t l2, float64_t gapCost); 01101 01106 static int32_t calcroc( 01107 float64_t* fp, float64_t* tp, float64_t* output, int32_t* label, 01108 int32_t& size, int32_t& possize, int32_t& negsize, 01109 float64_t& tresh, FILE* rocfile); 01111 01114 static float64_t mutual_info(float64_t* p1, float64_t* p2, int32_t len); 01115 01118 static float64_t relative_entropy( 01119 float64_t* p, float64_t* q, int32_t len); 01120 01122 static float64_t entropy(float64_t* p, int32_t len); 01123 01125 inline static uint32_t get_seed() 01126 { 01127 return CMath::seed; 01128 } 01129 01131 inline static int is_finite(double f) 01132 { 01133 #if defined(isfinite) && !defined(SUNOS) 01134 return isfinite(f); 01135 #else 01136 return finite(f); 01137 #endif 01138 } 01139 01141 inline static int is_infinity(double f) 01142 { 01143 #ifdef SUNOS 01144 if (fpclass(f) == FP_NINF || fpclass(f) == FP_PINF) 01145 return 1; 01146 else 01147 return 0; 01148 #else 01149 return isinf(f); 01150 #endif 01151 } 01152 01154 inline static int is_nan(double f) 01155 { 01156 #ifdef SUNOS 01157 return isnand(f); 01158 #else 01159 return isnan(f); 01160 #endif 01161 } 01162 01163 01174 #ifdef USE_LOGCACHE 01175 static inline float64_t logarithmic_sum(float64_t p, float64_t q) 01176 { 01177 float64_t diff; 01178 01179 if (!CMath::finite(p)) 01180 return q; 01181 01182 if (!CMath::finite(q)) 01183 { 01184 SG_SWARNING("INVALID second operand to logsum(%f,%f) expect undefined results\n", p, q); 01185 return NAN; 01186 } 01187 diff = p - q; 01188 if (diff > 0) 01189 return diff > LOGRANGE? p : p + logtable[(int)(diff * LOGACCURACY)]; 01190 return -diff > LOGRANGE? q : q + logtable[(int)(-diff * LOGACCURACY)]; 01191 } 01192 01194 static void init_log_table(); 01195 01197 static int32_t determine_logrange(); 01198 01200 static int32_t determine_logaccuracy(int32_t range); 01201 #else 01202 static inline float64_t logarithmic_sum( 01203 float64_t p, float64_t q) 01204 { 01205 float64_t diff; 01206 01207 if (!CMath::is_finite(p)) 01208 return q; 01209 if (!CMath::is_finite(q)) 01210 return p; 01211 diff = p - q; 01212 if (diff > 0) 01213 return diff > LOGRANGE? p : p + log(1 + exp(-diff)); 01214 return -diff > LOGRANGE? q : q + log(1 + exp(diff)); 01215 } 01216 #endif 01217 #ifdef LOG_SUM_ARRAY 01218 01223 static inline float64_t logarithmic_sum_array( 01224 float64_t *p, int32_t len) 01225 { 01226 if (len<=2) 01227 { 01228 if (len==2) 01229 return logarithmic_sum(p[0],p[1]) ; 01230 if (len==1) 01231 return p[0]; 01232 return -INFTY ; 01233 } 01234 else 01235 { 01236 register float64_t *pp=p ; 01237 if (len%2==1) pp++ ; 01238 for (register int32_t j=0; j < len>>1; j++) 01239 pp[j]=logarithmic_sum(pp[j<<1], pp[1+(j<<1)]) ; 01240 } 01241 return logarithmic_sum_array(p,len%2+len>>1) ; 01242 } 01243 #endif 01244 01245 01247 inline virtual const char* get_name() const { return "Mathematics"; } 01248 public: 01251 01252 static const float64_t INFTY; 01253 static const float64_t ALMOST_INFTY; 01254 01256 static const float64_t ALMOST_NEG_INFTY; 01257 01259 static int32_t LOGRANGE; 01260 01262 static uint32_t seed; 01263 static char* rand_state; 01264 01265 #ifdef USE_LOGCACHE 01266 01268 static int32_t LOGACCURACY; 01270 protected: 01272 static float64_t* logtable; 01273 #endif 01274 }; 01275 01276 //implementations of template functions 01277 template <class T1,class T2> 01278 void* CMath::parallel_qsort_index(void* p) 01279 { 01280 struct thread_qsort<T1,T2>* ps=(thread_qsort<T1,T2>*) p; 01281 T1* output=ps->output; 01282 T2* index=ps->index; 01283 uint32_t size=ps->size; 01284 int32_t* qsort_threads=ps->qsort_threads; 01285 int32_t sort_limit=ps->sort_limit; 01286 int32_t num_threads=ps->num_threads; 01287 01288 if (size==2) 01289 { 01290 if (output[0] > output [1]) 01291 { 01292 swap(output[0], output[1]); 01293 swap(index[0], index[1]); 01294 } 01295 return NULL; 01296 } 01297 //T1 split=output[random(0,size-1)]; 01298 T1 split=output[size/2]; 01299 01300 int32_t left=0; 01301 int32_t right=size-1; 01302 01303 while (left<=right) 01304 { 01305 while (output[left] < split) 01306 left++; 01307 while (output[right] > split) 01308 right--; 01309 01310 if (left<=right) 01311 { 01312 swap(output[left], output[right]); 01313 swap(index[left], index[right]); 01314 left++; 01315 right--; 01316 } 01317 } 01318 bool lthread_start=false; 01319 bool rthread_start=false; 01320 pthread_t lthread; 01321 pthread_t rthread; 01322 struct thread_qsort<T1,T2> t1; 01323 struct thread_qsort<T1,T2> t2; 01324 01325 if (right+1> 1 && (right+1< sort_limit || *qsort_threads >= num_threads-1)) 01326 qsort_index(output,index,right+1); 01327 else if (right+1> 1) 01328 { 01329 (*qsort_threads)++; 01330 lthread_start=true; 01331 t1.output=output; 01332 t1.index=index; 01333 t1.size=right+1; 01334 t1.qsort_threads=qsort_threads; 01335 t1.sort_limit=sort_limit; 01336 t1.num_threads=num_threads; 01337 if (pthread_create(<hread, NULL, parallel_qsort_index<T1,T2>, &t1) != 0) 01338 { 01339 lthread_start=false; 01340 (*qsort_threads)--; 01341 qsort_index(output,index,right+1); 01342 } 01343 } 01344 01345 01346 if (size-left> 1 && (size-left< sort_limit || *qsort_threads >= num_threads-1)) 01347 qsort_index(&output[left],&index[left], size-left); 01348 else if (size-left> 1) 01349 { 01350 (*qsort_threads)++; 01351 rthread_start=true; 01352 t2.output=&output[left]; 01353 t2.index=&index[left]; 01354 t2.size=size-left; 01355 t2.qsort_threads=qsort_threads; 01356 t2.sort_limit=sort_limit; 01357 t2.num_threads=num_threads; 01358 if (pthread_create(&rthread, NULL, parallel_qsort_index<T1,T2>, &t2) != 0) 01359 { 01360 rthread_start=false; 01361 (*qsort_threads)--; 01362 qsort_index(&output[left],&index[left], size-left); 01363 } 01364 } 01365 01366 if (lthread_start) 01367 { 01368 pthread_join(lthread, NULL); 01369 (*qsort_threads)--; 01370 } 01371 01372 if (rthread_start) 01373 { 01374 pthread_join(rthread, NULL); 01375 (*qsort_threads)--; 01376 } 01377 01378 return NULL; 01379 } 01380 01381 template <class T1,class T2> 01382 void CMath::qsort_index(T1* output, T2* index, uint32_t size) 01383 { 01384 if (size==2) 01385 { 01386 if (output[0] > output [1]) 01387 { 01388 swap(output[0],output[1]); 01389 swap(index[0],index[1]); 01390 } 01391 return; 01392 } 01393 //T1 split=output[random(0,size-1)]; 01394 T1 split=output[size/2]; 01395 01396 int32_t left=0; 01397 int32_t right=size-1; 01398 01399 while (left<=right) 01400 { 01401 while (output[left] < split) 01402 left++; 01403 while (output[right] > split) 01404 right--; 01405 01406 if (left<=right) 01407 { 01408 swap(output[left],output[right]); 01409 swap(index[left],index[right]); 01410 left++; 01411 right--; 01412 } 01413 } 01414 01415 if (right+1> 1) 01416 qsort_index(output,index,right+1); 01417 01418 if (size-left> 1) 01419 qsort_index(&output[left],&index[left], size-left); 01420 } 01421 01422 template <class T1,class T2> 01423 void CMath::qsort_backward_index(T1* output, T2* index, int32_t size) 01424 { 01425 if (size==2) 01426 { 01427 if (output[0] < output [1]) 01428 { 01429 swap(output[0],output[1]); 01430 swap(index[0],index[1]); 01431 } 01432 return; 01433 } 01434 01435 //T1 split=output[random(0,size-1)]; 01436 T1 split=output[size/2]; 01437 01438 int32_t left=0; 01439 int32_t right=size-1; 01440 01441 while (left<=right) 01442 { 01443 while (output[left] > split) 01444 left++; 01445 while (output[right] < split) 01446 right--; 01447 01448 if (left<=right) 01449 { 01450 swap(output[left],output[right]); 01451 swap(index[left],index[right]); 01452 left++; 01453 right--; 01454 } 01455 } 01456 01457 if (right+1> 1) 01458 qsort_backward_index(output,index,right+1); 01459 01460 if (size-left> 1) 01461 qsort_backward_index(&output[left],&index[left], size-left); 01462 } 01463 01464 template <class T> 01465 void CMath::nmin(float64_t* output, T* index, int32_t size, int32_t n) 01466 { 01467 if (6*n*size<13*size*CMath::log(size)) 01468 for (int32_t i=0; i<n; i++) 01469 min(&output[i], &index[i], size-i) ; 01470 else 01471 qsort_index(output, index, size) ; 01472 } 01473 01474 /* move the smallest entry in the array to the beginning */ 01475 template <class T> 01476 void CMath::min(float64_t* output, T* index, int32_t size) 01477 { 01478 if (size<=1) 01479 return; 01480 float64_t min_elem=output[0]; 01481 int32_t min_index=0; 01482 for (int32_t i=1; i<size; i++) 01483 { 01484 if (output[i]<min_elem) 01485 { 01486 min_index=i; 01487 min_elem=output[i]; 01488 } 01489 } 01490 swap(output[0], output[min_index]); 01491 swap(index[0], index[min_index]); 01492 } 01493 } 01494 #endif