|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "lib/common.h" 00013 #include "lib/io.h" 00014 #include "lib/Signal.h" 00015 #include "lib/Trie.h" 00016 #include "base/Parallel.h" 00017 00018 #include "kernel/WeightedDegreePositionStringKernel.h" 00019 #include "kernel/SqrtDiagKernelNormalizer.h" 00020 #include "features/Features.h" 00021 #include "features/StringFeatures.h" 00022 00023 #include "classifier/svm/SVM.h" 00024 00025 #ifndef WIN32 00026 #include <pthread.h> 00027 #endif 00028 00029 using namespace shogun; 00030 00031 #define TRIES(X) ((use_poim_tries) ? (poim_tries.X) : (tries.X)) 00032 00033 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00034 template <class Trie> struct S_THREAD_PARAM 00035 { 00036 int32_t* vec; 00037 float64_t* result; 00038 float64_t* weights; 00039 CWeightedDegreePositionStringKernel* kernel; 00040 CTrie<Trie>* tries; 00041 float64_t factor; 00042 int32_t j; 00043 int32_t start; 00044 int32_t end; 00045 int32_t length; 00046 int32_t max_shift; 00047 int32_t* shift; 00048 int32_t* vec_idx; 00049 }; 00050 #endif // DOXYGEN_SHOULD_SKIP_THIS 00051 00052 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel( 00053 int32_t size, int32_t d, int32_t mm, int32_t mkls) 00054 : CStringKernel<char>(size), weights(NULL), position_weights(NULL), 00055 position_weights_lhs(NULL), position_weights_rhs(NULL), 00056 weights_buffer(NULL), mkl_stepsize(mkls), degree(d), length(0), 00057 max_mismatch(mm), seq_length(0), shift(NULL), shift_len(0), 00058 num_block_weights_external(0), block_weights_external(NULL), 00059 block_weights(NULL), type(E_EXTERNAL), tries(d), poim_tries(d), 00060 tree_initialized(false), use_poim_tries(false), m_poim_distrib(NULL), 00061 m_poim(NULL), m_poim_num_sym(0), m_poim_num_feat(0), m_poim_result_len(0), 00062 alphabet(NULL) 00063 { 00064 properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION; 00065 set_wd_weights(); 00066 ASSERT(weights); 00067 00068 set_normalizer(new CSqrtDiagKernelNormalizer()); 00069 } 00070 00071 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel( 00072 int32_t size, float64_t* w, int32_t d, int32_t mm, int32_t* s, int32_t sl, 00073 int32_t mkls) 00074 : CStringKernel<char>(size), weights(NULL), position_weights(NULL), 00075 position_weights_lhs(NULL), position_weights_rhs(NULL), 00076 weights_buffer(NULL), mkl_stepsize(mkls), degree(d), length(0), 00077 max_mismatch(mm), seq_length(0), shift(NULL), shift_len(0), 00078 num_block_weights_external(0), block_weights_external(NULL), 00079 block_weights(NULL), type(E_EXTERNAL), tries(d), poim_tries(d), 00080 tree_initialized(false), use_poim_tries(false), m_poim_distrib(NULL), 00081 m_poim(NULL), m_poim_num_sym(0), m_poim_num_feat(0), m_poim_result_len(0), 00082 alphabet(NULL) 00083 { 00084 properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION; 00085 00086 weights=new float64_t[d*(1+max_mismatch)]; 00087 for (int32_t i=0; i<d*(1+max_mismatch); i++) 00088 weights[i]=w[i]; 00089 00090 set_shifts(s, sl); 00091 00092 set_normalizer(new CSqrtDiagKernelNormalizer()); 00093 } 00094 00095 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel( 00096 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t d) 00097 : CStringKernel<char>(10), weights(NULL), position_weights(NULL), 00098 position_weights_lhs(NULL), position_weights_rhs(NULL), 00099 weights_buffer(NULL), mkl_stepsize(1), degree(d), length(0), 00100 max_mismatch(0), seq_length(0), shift(NULL), shift_len(0), 00101 num_block_weights_external(0), block_weights_external(NULL), 00102 block_weights(NULL), type(E_EXTERNAL), tries(d), poim_tries(d), 00103 tree_initialized(false), use_poim_tries(false), m_poim_distrib(NULL), 00104 m_poim(NULL), m_poim_num_sym(0), m_poim_num_feat(0), m_poim_result_len(0), 00105 alphabet(NULL) 00106 { 00107 properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION; 00108 set_wd_weights(); 00109 ASSERT(weights); 00110 set_normalizer(new CSqrtDiagKernelNormalizer()); 00111 00112 init(l, r); 00113 } 00114 00115 00116 CWeightedDegreePositionStringKernel::~CWeightedDegreePositionStringKernel() 00117 { 00118 cleanup(); 00119 cleanup_POIM2(); 00120 00121 delete[] shift; 00122 shift=NULL; 00123 00124 delete[] weights; 00125 weights=NULL ; 00126 00127 delete[] block_weights; 00128 block_weights=NULL; 00129 00130 delete[] position_weights; 00131 position_weights=NULL; 00132 00133 delete[] position_weights_lhs; 00134 position_weights_lhs=NULL; 00135 00136 delete[] position_weights_rhs; 00137 position_weights_rhs=NULL; 00138 00139 delete[] weights_buffer; 00140 weights_buffer=NULL; 00141 } 00142 00143 void CWeightedDegreePositionStringKernel::remove_lhs() 00144 { 00145 SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n"); 00146 delete_optimization(); 00147 00148 tries.destroy(); 00149 poim_tries.destroy(); 00150 00151 CKernel::remove_lhs(); 00152 } 00153 00154 void CWeightedDegreePositionStringKernel::create_empty_tries() 00155 { 00156 ASSERT(lhs); 00157 seq_length = ((CStringFeatures<char>*) lhs)->get_max_vector_length(); 00158 00159 if (opt_type==SLOWBUTMEMEFFICIENT) 00160 { 00161 tries.create(seq_length, true); 00162 poim_tries.create(seq_length, true); 00163 } 00164 else if (opt_type==FASTBUTMEMHUNGRY) 00165 { 00166 tries.create(seq_length, false); // still buggy 00167 poim_tries.create(seq_length, false); // still buggy 00168 } 00169 else 00170 SG_ERROR( "unknown optimization type\n"); 00171 } 00172 00173 bool CWeightedDegreePositionStringKernel::init(CFeatures* l, CFeatures* r) 00174 { 00175 int32_t lhs_changed = (lhs!=l) ; 00176 int32_t rhs_changed = (rhs!=r) ; 00177 00178 CStringKernel<char>::init(l,r); 00179 00180 SG_DEBUG( "lhs_changed: %i\n", lhs_changed) ; 00181 SG_DEBUG( "rhs_changed: %i\n", rhs_changed) ; 00182 00183 CStringFeatures<char>* sf_l=(CStringFeatures<char>*) l; 00184 CStringFeatures<char>* sf_r=(CStringFeatures<char>*) r; 00185 00186 /* set shift */ 00187 if (shift_len==0) { 00188 shift_len=sf_l->get_vector_length(0); 00189 int32_t *shifts=new int32_t[shift_len]; 00190 for (int32_t i=0; i<shift_len; i++) { 00191 shifts[i]=1; 00192 } 00193 set_shifts(shifts, shift_len); 00194 delete[] shifts; 00195 } 00196 00197 00198 int32_t len=sf_l->get_max_vector_length(); 00199 if (lhs_changed && !sf_l->have_same_length(len)) 00200 SG_ERROR("All strings in WD kernel must have same length (lhs wrong)!\n"); 00201 00202 if (rhs_changed && !sf_r->have_same_length(len)) 00203 SG_ERROR("All strings in WD kernel must have same length (rhs wrong)!\n"); 00204 00205 SG_UNREF(alphabet); 00206 alphabet= sf_l->get_alphabet(); 00207 CAlphabet* ralphabet=sf_r->get_alphabet(); 00208 00209 if (!((alphabet->get_alphabet()==DNA) || (alphabet->get_alphabet()==RNA))) 00210 properties &= ((uint64_t) (-1)) ^ (KP_LINADD | KP_BATCHEVALUATION); 00211 00212 ASSERT(ralphabet->get_alphabet()==alphabet->get_alphabet()); 00213 SG_UNREF(ralphabet); 00214 00215 //whenever init is called also init tries and block weights 00216 create_empty_tries(); 00217 init_block_weights(); 00218 00219 return init_normalizer(); 00220 } 00221 00222 void CWeightedDegreePositionStringKernel::cleanup() 00223 { 00224 SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n"); 00225 delete_optimization(); 00226 00227 delete[] block_weights; 00228 block_weights=NULL; 00229 00230 tries.destroy(); 00231 poim_tries.destroy(); 00232 00233 seq_length = 0; 00234 tree_initialized = false; 00235 00236 SG_UNREF(alphabet); 00237 alphabet=NULL; 00238 00239 CKernel::cleanup(); 00240 } 00241 00242 bool CWeightedDegreePositionStringKernel::init_optimization( 00243 int32_t p_count, int32_t * IDX, float64_t * alphas, int32_t tree_num, 00244 int32_t upto_tree) 00245 { 00246 ASSERT(position_weights_lhs==NULL); 00247 ASSERT(position_weights_rhs==NULL); 00248 00249 if (upto_tree<0) 00250 upto_tree=tree_num; 00251 00252 if (max_mismatch!=0) 00253 { 00254 SG_ERROR( "CWeightedDegreePositionStringKernel optimization not implemented for mismatch!=0\n"); 00255 return false ; 00256 } 00257 00258 if (tree_num<0) 00259 SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n"); 00260 00261 delete_optimization(); 00262 00263 if (tree_num<0) 00264 SG_DEBUG( "initializing CWeightedDegreePositionStringKernel optimization\n") ; 00265 00266 for (int32_t i=0; i<p_count; i++) 00267 { 00268 if (tree_num<0) 00269 { 00270 if ( (i % (p_count/10+1)) == 0) 00271 SG_PROGRESS(i,0,p_count); 00272 add_example_to_tree(IDX[i], alphas[i]); 00273 } 00274 else 00275 { 00276 for (int32_t t=tree_num; t<=upto_tree; t++) 00277 add_example_to_single_tree(IDX[i], alphas[i], t); 00278 } 00279 } 00280 00281 if (tree_num<0) 00282 SG_DONE(); 00283 00284 set_is_initialized(true) ; 00285 return true ; 00286 } 00287 00288 bool CWeightedDegreePositionStringKernel::delete_optimization() 00289 { 00290 if ((opt_type==FASTBUTMEMHUNGRY) && (tries.get_use_compact_terminal_nodes())) 00291 { 00292 tries.set_use_compact_terminal_nodes(false) ; 00293 SG_DEBUG( "disabling compact trie nodes with FASTBUTMEMHUNGRY\n") ; 00294 } 00295 00296 if (get_is_initialized()) 00297 { 00298 if (opt_type==SLOWBUTMEMEFFICIENT) 00299 tries.delete_trees(true); 00300 else if (opt_type==FASTBUTMEMHUNGRY) 00301 tries.delete_trees(false); // still buggy 00302 else { 00303 SG_ERROR( "unknown optimization type\n"); 00304 } 00305 set_is_initialized(false); 00306 00307 return true; 00308 } 00309 00310 return false; 00311 } 00312 00313 float64_t CWeightedDegreePositionStringKernel::compute_with_mismatch( 00314 char* avec, int32_t alen, char* bvec, int32_t blen) 00315 { 00316 float64_t* max_shift_vec= new float64_t[max_shift]; 00317 float64_t sum0=0 ; 00318 for (int32_t i=0; i<max_shift; i++) 00319 max_shift_vec[i]=0 ; 00320 00321 // no shift 00322 for (int32_t i=0; i<alen; i++) 00323 { 00324 if ((position_weights!=NULL) && (position_weights[i]==0.0)) 00325 continue ; 00326 00327 int32_t mismatches=0; 00328 float64_t sumi = 0.0 ; 00329 for (int32_t j=0; (j<degree) && (i+j<alen); j++) 00330 { 00331 if (avec[i+j]!=bvec[i+j]) 00332 { 00333 mismatches++ ; 00334 if (mismatches>max_mismatch) 00335 break ; 00336 } ; 00337 sumi += weights[j+degree*mismatches]; 00338 } 00339 if (position_weights!=NULL) 00340 sum0 += position_weights[i]*sumi ; 00341 else 00342 sum0 += sumi ; 00343 } ; 00344 00345 for (int32_t i=0; i<alen; i++) 00346 { 00347 for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++) 00348 { 00349 if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0)) 00350 continue ; 00351 00352 float64_t sumi1 = 0.0 ; 00353 // shift in sequence a 00354 int32_t mismatches=0; 00355 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00356 { 00357 if (avec[i+j+k]!=bvec[i+j]) 00358 { 00359 mismatches++ ; 00360 if (mismatches>max_mismatch) 00361 break ; 00362 } ; 00363 sumi1 += weights[j+degree*mismatches]; 00364 } 00365 float64_t sumi2 = 0.0 ; 00366 // shift in sequence b 00367 mismatches=0; 00368 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00369 { 00370 if (avec[i+j]!=bvec[i+j+k]) 00371 { 00372 mismatches++ ; 00373 if (mismatches>max_mismatch) 00374 break ; 00375 } ; 00376 sumi2 += weights[j+degree*mismatches]; 00377 } 00378 if (position_weights!=NULL) 00379 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ; 00380 else 00381 max_shift_vec[k-1] += sumi1 + sumi2 ; 00382 } ; 00383 } 00384 00385 float64_t result = sum0 ; 00386 for (int32_t i=0; i<max_shift; i++) 00387 result += max_shift_vec[i]/(2*(i+1)) ; 00388 00389 delete[] max_shift_vec; 00390 return result ; 00391 } 00392 00393 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch( 00394 char* avec, int32_t alen, char* bvec, int32_t blen) 00395 { 00396 float64_t* max_shift_vec = new float64_t[max_shift]; 00397 float64_t sum0=0 ; 00398 for (int32_t i=0; i<max_shift; i++) 00399 max_shift_vec[i]=0 ; 00400 00401 // no shift 00402 for (int32_t i=0; i<alen; i++) 00403 { 00404 if ((position_weights!=NULL) && (position_weights[i]==0.0)) 00405 continue ; 00406 00407 float64_t sumi = 0.0 ; 00408 for (int32_t j=0; (j<degree) && (i+j<alen); j++) 00409 { 00410 if (avec[i+j]!=bvec[i+j]) 00411 break ; 00412 sumi += weights[j]; 00413 } 00414 if (position_weights!=NULL) 00415 sum0 += position_weights[i]*sumi ; 00416 else 00417 sum0 += sumi ; 00418 } ; 00419 00420 for (int32_t i=0; i<alen; i++) 00421 { 00422 for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++) 00423 { 00424 if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0)) 00425 continue ; 00426 00427 float64_t sumi1 = 0.0 ; 00428 // shift in sequence a 00429 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00430 { 00431 if (avec[i+j+k]!=bvec[i+j]) 00432 break ; 00433 sumi1 += weights[j]; 00434 } 00435 float64_t sumi2 = 0.0 ; 00436 // shift in sequence b 00437 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00438 { 00439 if (avec[i+j]!=bvec[i+j+k]) 00440 break ; 00441 sumi2 += weights[j]; 00442 } 00443 if (position_weights!=NULL) 00444 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ; 00445 else 00446 max_shift_vec[k-1] += sumi1 + sumi2 ; 00447 } ; 00448 } 00449 00450 float64_t result = sum0 ; 00451 for (int32_t i=0; i<max_shift; i++) 00452 result += max_shift_vec[i]/(2*(i+1)) ; 00453 00454 delete[] max_shift_vec; 00455 return result ; 00456 } 00457 00458 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_matrix( 00459 char* avec, int32_t alen, char* bvec, int32_t blen) 00460 { 00461 float64_t* max_shift_vec = new float64_t[max_shift]; 00462 float64_t sum0=0 ; 00463 for (int32_t i=0; i<max_shift; i++) 00464 max_shift_vec[i]=0 ; 00465 00466 // no shift 00467 for (int32_t i=0; i<alen; i++) 00468 { 00469 if ((position_weights!=NULL) && (position_weights[i]==0.0)) 00470 continue ; 00471 float64_t sumi = 0.0 ; 00472 for (int32_t j=0; (j<degree) && (i+j<alen); j++) 00473 { 00474 if (avec[i+j]!=bvec[i+j]) 00475 break ; 00476 sumi += weights[i*degree+j]; 00477 } 00478 if (position_weights!=NULL) 00479 sum0 += position_weights[i]*sumi ; 00480 else 00481 sum0 += sumi ; 00482 } ; 00483 00484 for (int32_t i=0; i<alen; i++) 00485 { 00486 for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++) 00487 { 00488 if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0)) 00489 continue ; 00490 00491 float64_t sumi1 = 0.0 ; 00492 // shift in sequence a 00493 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00494 { 00495 if (avec[i+j+k]!=bvec[i+j]) 00496 break ; 00497 sumi1 += weights[i*degree+j]; 00498 } 00499 float64_t sumi2 = 0.0 ; 00500 // shift in sequence b 00501 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00502 { 00503 if (avec[i+j]!=bvec[i+j+k]) 00504 break ; 00505 sumi2 += weights[i*degree+j]; 00506 } 00507 if (position_weights!=NULL) 00508 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ; 00509 else 00510 max_shift_vec[k-1] += sumi1 + sumi2 ; 00511 } ; 00512 } 00513 00514 float64_t result = sum0 ; 00515 for (int32_t i=0; i<max_shift; i++) 00516 result += max_shift_vec[i]/(2*(i+1)) ; 00517 00518 delete[] max_shift_vec; 00519 return result ; 00520 } 00521 00522 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_position_weights( 00523 char* avec, float64_t* pos_weights_lhs, int32_t alen, char* bvec, 00524 float64_t* pos_weights_rhs, int32_t blen) 00525 { 00526 00527 float64_t* max_shift_vec = new float64_t[max_shift]; 00528 float64_t sum0=0 ; 00529 for (int32_t i=0; i<max_shift; i++) 00530 max_shift_vec[i]=0 ; 00531 00532 // no shift 00533 for (int32_t i=0; i<alen; i++) 00534 { 00535 if ((position_weights!=NULL) && (position_weights[i]==0.0)) 00536 continue ; 00537 00538 float64_t sumi = 0.0 ; 00539 float64_t posweight_lhs = 0.0 ; 00540 float64_t posweight_rhs = 0.0 ; 00541 for (int32_t j=0; (j<degree) && (i+j<alen); j++) 00542 { 00543 posweight_lhs += pos_weights_lhs[i+j] ; 00544 posweight_rhs += pos_weights_rhs[i+j] ; 00545 00546 if (avec[i+j]!=bvec[i+j]) 00547 break ; 00548 sumi += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ; 00549 } 00550 if (position_weights!=NULL) 00551 sum0 += position_weights[i]*sumi ; 00552 else 00553 sum0 += sumi ; 00554 } ; 00555 00556 for (int32_t i=0; i<alen; i++) 00557 { 00558 for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++) 00559 { 00560 if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0)) 00561 continue ; 00562 00563 // shift in sequence a 00564 float64_t sumi1 = 0.0 ; 00565 float64_t posweight_lhs = 0.0 ; 00566 float64_t posweight_rhs = 0.0 ; 00567 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00568 { 00569 posweight_lhs += pos_weights_lhs[i+j+k] ; 00570 posweight_rhs += pos_weights_rhs[i+j] ; 00571 if (avec[i+j+k]!=bvec[i+j]) 00572 break ; 00573 sumi1 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ; 00574 } 00575 // shift in sequence b 00576 float64_t sumi2 = 0.0 ; 00577 posweight_lhs = 0.0 ; 00578 posweight_rhs = 0.0 ; 00579 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00580 { 00581 posweight_lhs += pos_weights_lhs[i+j] ; 00582 posweight_rhs += pos_weights_rhs[i+j+k] ; 00583 if (avec[i+j]!=bvec[i+j+k]) 00584 break ; 00585 sumi2 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ; 00586 } 00587 if (position_weights!=NULL) 00588 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ; 00589 else 00590 max_shift_vec[k-1] += sumi1 + sumi2 ; 00591 } ; 00592 } 00593 00594 float64_t result = sum0 ; 00595 for (int32_t i=0; i<max_shift; i++) 00596 result += max_shift_vec[i]/(2*(i+1)) ; 00597 00598 delete[] max_shift_vec; 00599 return result ; 00600 } 00601 00602 00603 float64_t CWeightedDegreePositionStringKernel::compute( 00604 int32_t idx_a, int32_t idx_b) 00605 { 00606 int32_t alen, blen; 00607 bool free_avec, free_bvec; 00608 00609 char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec); 00610 char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec); 00611 // can only deal with strings of same length 00612 ASSERT(alen==blen); 00613 ASSERT(shift_len==alen); 00614 00615 float64_t result = 0 ; 00616 if (position_weights_lhs!=NULL || position_weights_rhs!=NULL) 00617 { 00618 ASSERT(max_mismatch==0); 00619 float64_t* position_weights_rhs_ = position_weights_rhs ; 00620 if (lhs==rhs) 00621 { 00622 //fprintf(stdout, ".") ; 00623 position_weights_rhs_ = position_weights_lhs ; 00624 } 00625 else 00626 { 00627 //fprintf(stdout, "*") ; 00628 } 00629 00630 result = compute_without_mismatch_position_weights(avec, &position_weights_lhs[idx_a*alen], alen, bvec, &position_weights_rhs_[idx_b*blen], blen) ; 00631 } 00632 else if (max_mismatch > 0) 00633 result = compute_with_mismatch(avec, alen, bvec, blen) ; 00634 else if (length==0) 00635 result = compute_without_mismatch(avec, alen, bvec, blen) ; 00636 else 00637 result = compute_without_mismatch_matrix(avec, alen, bvec, blen) ; 00638 00639 ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec); 00640 ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec); 00641 00642 return result ; 00643 } 00644 00645 00646 void CWeightedDegreePositionStringKernel::add_example_to_tree( 00647 int32_t idx, float64_t alpha) 00648 { 00649 ASSERT(position_weights_lhs==NULL); 00650 ASSERT(position_weights_rhs==NULL); 00651 ASSERT(alphabet); 00652 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 00653 00654 int32_t len=0; 00655 bool free_vec; 00656 char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec); 00657 ASSERT(max_mismatch==0); 00658 int32_t *vec = new int32_t[len] ; 00659 00660 for (int32_t i=0; i<len; i++) 00661 vec[i]=alphabet->remap_to_bin(char_vec[i]); 00662 ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec); 00663 00664 if (opt_type==FASTBUTMEMHUNGRY) 00665 { 00666 //TRIES(set_use_compact_terminal_nodes(false)) ; 00667 ASSERT(!TRIES(get_use_compact_terminal_nodes())); 00668 } 00669 00670 for (int32_t i=0; i<len; i++) 00671 { 00672 int32_t max_s=-1; 00673 00674 if (opt_type==SLOWBUTMEMEFFICIENT) 00675 max_s=0; 00676 else if (opt_type==FASTBUTMEMHUNGRY) 00677 max_s=shift[i]; 00678 else { 00679 SG_ERROR( "unknown optimization type\n"); 00680 } 00681 00682 for (int32_t s=max_s; s>=0; s--) 00683 { 00684 float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx); 00685 TRIES(add_to_trie(i, s, vec, alpha_pw, weights, (length!=0))) ; 00686 //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", i, s, idx, alpha_pw) ; 00687 00688 if ((s==0) || (i+s>=len)) 00689 continue; 00690 00691 TRIES(add_to_trie(i+s, -s, vec, alpha_pw, weights, (length!=0))) ; 00692 //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", i+s, -s, idx, alpha_pw) ; 00693 } 00694 } 00695 00696 delete[] vec ; 00697 tree_initialized=true ; 00698 } 00699 00700 void CWeightedDegreePositionStringKernel::add_example_to_single_tree( 00701 int32_t idx, float64_t alpha, int32_t tree_num) 00702 { 00703 ASSERT(position_weights_lhs==NULL); 00704 ASSERT(position_weights_rhs==NULL); 00705 ASSERT(alphabet); 00706 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 00707 00708 int32_t len=0; 00709 bool free_vec; 00710 char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec); 00711 ASSERT(max_mismatch==0); 00712 int32_t *vec=new int32_t[len]; 00713 int32_t max_s=-1; 00714 00715 if (opt_type==SLOWBUTMEMEFFICIENT) 00716 max_s=0; 00717 else if (opt_type==FASTBUTMEMHUNGRY) 00718 { 00719 ASSERT(!tries.get_use_compact_terminal_nodes()); 00720 max_s=shift[tree_num]; 00721 } 00722 else { 00723 SG_ERROR( "unknown optimization type\n"); 00724 } 00725 for (int32_t i=CMath::max(0,tree_num-max_shift); 00726 i<CMath::min(len,tree_num+degree+max_shift); i++) 00727 { 00728 vec[i]=alphabet->remap_to_bin(char_vec[i]); 00729 } 00730 ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec); 00731 00732 for (int32_t s=max_s; s>=0; s--) 00733 { 00734 float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx); 00735 tries.add_to_trie(tree_num, s, vec, alpha_pw, weights, (length!=0)) ; 00736 //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", tree_num, s, idx, alpha_pw) ; 00737 } 00738 00739 if (opt_type==FASTBUTMEMHUNGRY) 00740 { 00741 for (int32_t i=CMath::max(0,tree_num-max_shift); i<CMath::min(len,tree_num+max_shift+1); i++) 00742 { 00743 int32_t s=tree_num-i; 00744 if ((i+s<len) && (s>=1) && (s<=shift[i])) 00745 { 00746 float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx); 00747 tries.add_to_trie(tree_num, -s, vec, alpha_pw, weights, (length!=0)) ; 00748 //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", tree_num, -s, idx, alpha_pw) ; 00749 } 00750 } 00751 } 00752 delete[] vec ; 00753 tree_initialized=true ; 00754 } 00755 00756 float64_t CWeightedDegreePositionStringKernel::compute_by_tree(int32_t idx) 00757 { 00758 ASSERT(position_weights_lhs==NULL); 00759 ASSERT(position_weights_rhs==NULL); 00760 ASSERT(alphabet); 00761 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 00762 00763 float64_t sum=0; 00764 int32_t len=0; 00765 bool free_vec; 00766 char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec); 00767 ASSERT(max_mismatch==0); 00768 int32_t *vec=new int32_t[len]; 00769 00770 for (int32_t i=0; i<len; i++) 00771 vec[i]=alphabet->remap_to_bin(char_vec[i]); 00772 00773 ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec); 00774 00775 for (int32_t i=0; i<len; i++) 00776 sum += tries.compute_by_tree_helper(vec, len, i, i, i, weights, (length!=0)) ; 00777 00778 if (opt_type==SLOWBUTMEMEFFICIENT) 00779 { 00780 for (int32_t i=0; i<len; i++) 00781 { 00782 for (int32_t s=1; (s<=shift[i]) && (i+s<len); s++) 00783 { 00784 sum+=tries.compute_by_tree_helper(vec, len, i, i+s, i, weights, (length!=0))/(2*s) ; 00785 sum+=tries.compute_by_tree_helper(vec, len, i+s, i, i+s, weights, (length!=0))/(2*s) ; 00786 } 00787 } 00788 } 00789 00790 delete[] vec ; 00791 00792 return normalizer->normalize_rhs(sum, idx); 00793 } 00794 00795 void CWeightedDegreePositionStringKernel::compute_by_tree( 00796 int32_t idx, float64_t* LevelContrib) 00797 { 00798 ASSERT(position_weights_lhs==NULL); 00799 ASSERT(position_weights_rhs==NULL); 00800 ASSERT(alphabet); 00801 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 00802 00803 int32_t len=0; 00804 bool free_vec; 00805 char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec); 00806 ASSERT(max_mismatch==0); 00807 int32_t *vec=new int32_t[len]; 00808 00809 for (int32_t i=0; i<len; i++) 00810 vec[i]=alphabet->remap_to_bin(char_vec[i]); 00811 00812 ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec); 00813 00814 for (int32_t i=0; i<len; i++) 00815 { 00816 tries.compute_by_tree_helper(vec, len, i, i, i, LevelContrib, 00817 normalizer->normalize_rhs(1.0, idx), mkl_stepsize, weights, 00818 (length!=0)); 00819 } 00820 00821 if (opt_type==SLOWBUTMEMEFFICIENT) 00822 { 00823 for (int32_t i=0; i<len; i++) 00824 for (int32_t k=1; (k<=shift[i]) && (i+k<len); k++) 00825 { 00826 tries.compute_by_tree_helper(vec, len, i, i+k, i, LevelContrib, 00827 normalizer->normalize_rhs(1.0/(2*k), idx), mkl_stepsize, 00828 weights, (length!=0)) ; 00829 tries.compute_by_tree_helper(vec, len, i+k, i, i+k, 00830 LevelContrib, normalizer->normalize_rhs(1.0/(2*k), idx), 00831 mkl_stepsize, weights, (length!=0)) ; 00832 } 00833 } 00834 00835 delete[] vec ; 00836 } 00837 00838 float64_t* CWeightedDegreePositionStringKernel::compute_abs_weights( 00839 int32_t &len) 00840 { 00841 return tries.compute_abs_weights(len); 00842 } 00843 00844 bool CWeightedDegreePositionStringKernel::set_shifts( 00845 int32_t* shift_, int32_t shift_len_) 00846 { 00847 delete[] shift; 00848 00849 shift_len = shift_len_ ; 00850 shift = new int32_t[shift_len] ; 00851 00852 if (shift) 00853 { 00854 max_shift = 0 ; 00855 00856 for (int32_t i=0; i<shift_len; i++) 00857 { 00858 shift[i] = shift_[i] ; 00859 if (shift[i]>max_shift) 00860 max_shift = shift[i] ; 00861 } 00862 00863 ASSERT(max_shift>=0 && max_shift<=shift_len); 00864 } 00865 00866 return false; 00867 } 00868 00869 bool CWeightedDegreePositionStringKernel::set_wd_weights() 00870 { 00871 ASSERT(degree>0); 00872 00873 delete[] weights; 00874 weights=new float64_t[degree]; 00875 if (weights) 00876 { 00877 int32_t i; 00878 float64_t sum=0; 00879 for (i=0; i<degree; i++) 00880 { 00881 weights[i]=degree-i; 00882 sum+=weights[i]; 00883 } 00884 for (i=0; i<degree; i++) 00885 weights[i]/=sum; 00886 00887 for (i=0; i<degree; i++) 00888 { 00889 for (int32_t j=1; j<=max_mismatch; j++) 00890 { 00891 if (j<i+1) 00892 { 00893 int32_t nk=CMath::nchoosek(i+1, j); 00894 weights[i+j*degree]=weights[i]/(nk*CMath::pow(3.0,j)); 00895 } 00896 else 00897 weights[i+j*degree]= 0; 00898 } 00899 } 00900 00901 return true; 00902 } 00903 else 00904 return false; 00905 } 00906 00907 bool CWeightedDegreePositionStringKernel::set_weights( 00908 float64_t* ws, int32_t d, int32_t len) 00909 { 00910 SG_DEBUG("degree = %i d=%i\n", degree, d); 00911 degree=d; 00912 length=len; 00913 00914 if (len <= 0) 00915 len=1; 00916 00917 delete[] weights; 00918 weights=new float64_t[d*len]; 00919 00920 if (weights) 00921 { 00922 for (int32_t i=0; i<degree*len; i++) 00923 weights[i]=ws[i]; 00924 return true; 00925 } 00926 else 00927 return false; 00928 } 00929 00930 bool CWeightedDegreePositionStringKernel::set_position_weights( 00931 float64_t* pws, int32_t len) 00932 { 00933 if (seq_length==0) 00934 seq_length=len; 00935 00936 if (seq_length!=len) 00937 { 00938 SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len); 00939 return false; 00940 } 00941 delete[] position_weights; 00942 position_weights=new float64_t[len]; 00943 tries.set_position_weights(position_weights); 00944 00945 if (position_weights) 00946 { 00947 for (int32_t i=0; i<len; i++) 00948 position_weights[i]=pws[i]; 00949 return true; 00950 } 00951 else 00952 return false; 00953 } 00954 00955 bool CWeightedDegreePositionStringKernel::set_position_weights_lhs(float64_t* pws, int32_t len, int32_t num) 00956 { 00957 fprintf(stderr, "set_position_weights_lhs %i %i %i\n", len, num, seq_length); 00958 00959 if (position_weights_rhs==position_weights_lhs) 00960 position_weights_rhs=NULL; 00961 else 00962 delete_position_weights_rhs(); 00963 00964 if (len==0) 00965 { 00966 return delete_position_weights_lhs(); 00967 } 00968 00969 if (seq_length!=len) 00970 { 00971 SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len); 00972 return false; 00973 } 00974 if (0) 00975 { 00976 00977 if (!lhs) 00978 { 00979 SG_ERROR("lhs=NULL\n"); 00980 return false ; 00981 } 00982 if (lhs->get_num_vectors()!=num) 00983 { 00984 SG_ERROR("lhs->get_num_vectors()=%i, num=%i\n", lhs->get_num_vectors(), num); 00985 return false; 00986 } 00987 } 00988 00989 delete[] position_weights_lhs; 00990 position_weights_lhs=new float64_t[len*num]; 00991 if (position_weights_lhs) 00992 { 00993 for (int32_t i=0; i<len*num; i++) 00994 position_weights_lhs[i]=pws[i]; 00995 return true; 00996 } 00997 else 00998 return false; 00999 } 01000 01001 bool CWeightedDegreePositionStringKernel::set_position_weights_rhs( 01002 float64_t* pws, int32_t len, int32_t num) 01003 { 01004 fprintf(stderr, "set_position_weights_rhs %i %i %i\n", len, num, seq_length); 01005 if (len==0) 01006 { 01007 if (position_weights_rhs==position_weights_lhs) 01008 { 01009 position_weights_rhs=NULL; 01010 return true; 01011 } 01012 return delete_position_weights_rhs(); 01013 } 01014 01015 if (seq_length!=len) 01016 { 01017 SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len); 01018 return false; 01019 } 01020 if (0) 01021 { 01022 01023 if (!rhs) 01024 { 01025 if (!lhs) 01026 { 01027 SG_ERROR("rhs=NULL and lhs=NULL\n"); 01028 return false; 01029 } 01030 if (lhs->get_num_vectors()!=num) 01031 { 01032 SG_ERROR("lhs->get_num_vectors()=%i, num=%i\n", lhs->get_num_vectors(), num); 01033 return false; 01034 } 01035 } else 01036 if (rhs->get_num_vectors()!=num) 01037 { 01038 SG_ERROR("rhs->get_num_vectors()=%i, num=%i\n", rhs->get_num_vectors(), num); 01039 return false; 01040 } 01041 } 01042 01043 delete[] position_weights_rhs; 01044 position_weights_rhs=new float64_t[len*num]; 01045 if (position_weights_rhs) 01046 { 01047 for (int32_t i=0; i<len*num; i++) 01048 position_weights_rhs[i]=pws[i]; 01049 return true; 01050 } 01051 else 01052 return false; 01053 } 01054 01055 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd() 01056 { 01057 delete[] block_weights; 01058 block_weights=new float64_t[CMath::max(seq_length,degree)]; 01059 01060 if (block_weights) 01061 { 01062 int32_t k; 01063 float64_t d=degree; // use float to evade rounding errors below 01064 01065 for (k=0; k<degree; k++) 01066 block_weights[k]= 01067 (-CMath::pow(k, 3)+(3*d-3)*CMath::pow(k, 2)+(9*d-2)*k+6*d)/(3*d*(d+1)); 01068 for (k=degree; k<seq_length; k++) 01069 block_weights[k]=(-d+3*k+4)/3; 01070 } 01071 01072 return (block_weights!=NULL); 01073 } 01074 01075 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd_external() 01076 { 01077 ASSERT(weights); 01078 delete[] block_weights; 01079 block_weights=new float64_t[CMath::max(seq_length,degree)]; 01080 01081 if (block_weights) 01082 { 01083 int32_t i=0; 01084 block_weights[0]=weights[0]; 01085 for (i=1; i<CMath::max(seq_length,degree); i++) 01086 block_weights[i]=0; 01087 01088 for (i=1; i<CMath::max(seq_length,degree); i++) 01089 { 01090 block_weights[i]=block_weights[i-1]; 01091 01092 float64_t contrib=0; 01093 for (int32_t j=0; j<CMath::min(degree,i+1); j++) 01094 contrib+=weights[j]; 01095 01096 block_weights[i]+=contrib; 01097 } 01098 } 01099 01100 return (block_weights!=NULL); 01101 } 01102 01103 bool CWeightedDegreePositionStringKernel::init_block_weights_const() 01104 { 01105 delete[] block_weights; 01106 block_weights=new float64_t[seq_length]; 01107 01108 if (block_weights) 01109 { 01110 for (int32_t i=1; i<seq_length+1 ; i++) 01111 block_weights[i-1]=1.0/seq_length; 01112 } 01113 01114 return (block_weights!=NULL); 01115 } 01116 01117 bool CWeightedDegreePositionStringKernel::init_block_weights_linear() 01118 { 01119 delete[] block_weights; 01120 block_weights=new float64_t[seq_length]; 01121 01122 if (block_weights) 01123 { 01124 for (int32_t i=1; i<seq_length+1 ; i++) 01125 block_weights[i-1]=degree*i; 01126 } 01127 01128 return (block_weights!=NULL); 01129 } 01130 01131 bool CWeightedDegreePositionStringKernel::init_block_weights_sqpoly() 01132 { 01133 delete[] block_weights; 01134 block_weights=new float64_t[seq_length]; 01135 01136 if (block_weights) 01137 { 01138 for (int32_t i=1; i<degree+1 ; i++) 01139 block_weights[i-1]=((float64_t) i)*i; 01140 01141 for (int32_t i=degree+1; i<seq_length+1 ; i++) 01142 block_weights[i-1]=i; 01143 } 01144 01145 return (block_weights!=NULL); 01146 } 01147 01148 bool CWeightedDegreePositionStringKernel::init_block_weights_cubicpoly() 01149 { 01150 delete[] block_weights; 01151 block_weights=new float64_t[seq_length]; 01152 01153 if (block_weights) 01154 { 01155 for (int32_t i=1; i<degree+1 ; i++) 01156 block_weights[i-1]=((float64_t) i)*i*i; 01157 01158 for (int32_t i=degree+1; i<seq_length+1 ; i++) 01159 block_weights[i-1]=i; 01160 } 01161 01162 return (block_weights!=NULL); 01163 } 01164 01165 bool CWeightedDegreePositionStringKernel::init_block_weights_exp() 01166 { 01167 delete[] block_weights; 01168 block_weights=new float64_t[seq_length]; 01169 01170 if (block_weights) 01171 { 01172 for (int32_t i=1; i<degree+1 ; i++) 01173 block_weights[i-1]=exp(((float64_t) i/10.0)); 01174 01175 for (int32_t i=degree+1; i<seq_length+1 ; i++) 01176 block_weights[i-1]=i; 01177 } 01178 01179 return (block_weights!=NULL); 01180 } 01181 01182 bool CWeightedDegreePositionStringKernel::init_block_weights_log() 01183 { 01184 delete[] block_weights; 01185 block_weights=new float64_t[seq_length]; 01186 01187 if (block_weights) 01188 { 01189 for (int32_t i=1; i<degree+1 ; i++) 01190 block_weights[i-1]=CMath::pow(CMath::log((float64_t) i),2); 01191 01192 for (int32_t i=degree+1; i<seq_length+1 ; i++) 01193 block_weights[i-1]=i-degree+1+CMath::pow(CMath::log(degree+1.0),2); 01194 } 01195 01196 return (block_weights!=NULL); 01197 } 01198 01199 bool CWeightedDegreePositionStringKernel::init_block_weights_external() 01200 { 01201 if (block_weights_external && (seq_length == num_block_weights_external) ) 01202 { 01203 delete[] block_weights; 01204 block_weights=new float64_t[seq_length]; 01205 01206 if (block_weights) 01207 { 01208 for (int32_t i=0; i<seq_length; i++) 01209 block_weights[i]=block_weights_external[i]; 01210 } 01211 } 01212 else { 01213 SG_ERROR( "sequence longer then weights (seqlen:%d, wlen:%d)\n", seq_length, block_weights_external); 01214 } 01215 return (block_weights!=NULL); 01216 } 01217 01218 bool CWeightedDegreePositionStringKernel::init_block_weights() 01219 { 01220 switch (type) 01221 { 01222 case E_WD: 01223 return init_block_weights_from_wd(); 01224 case E_EXTERNAL: 01225 return init_block_weights_from_wd_external(); 01226 case E_BLOCK_CONST: 01227 return init_block_weights_const(); 01228 case E_BLOCK_LINEAR: 01229 return init_block_weights_linear(); 01230 case E_BLOCK_SQPOLY: 01231 return init_block_weights_sqpoly(); 01232 case E_BLOCK_CUBICPOLY: 01233 return init_block_weights_cubicpoly(); 01234 case E_BLOCK_EXP: 01235 return init_block_weights_exp(); 01236 case E_BLOCK_LOG: 01237 return init_block_weights_log(); 01238 case E_BLOCK_EXTERNAL: 01239 return init_block_weights_external(); 01240 default: 01241 return false; 01242 }; 01243 } 01244 01245 01246 01247 void* CWeightedDegreePositionStringKernel::compute_batch_helper(void* p) 01248 { 01249 S_THREAD_PARAM<DNATrie>* params = (S_THREAD_PARAM<DNATrie>*) p; 01250 int32_t j=params->j; 01251 CWeightedDegreePositionStringKernel* wd=params->kernel; 01252 CTrie<DNATrie>* tries=params->tries; 01253 float64_t* weights=params->weights; 01254 int32_t length=params->length; 01255 int32_t max_shift=params->max_shift; 01256 int32_t* vec=params->vec; 01257 float64_t* result=params->result; 01258 float64_t factor=params->factor; 01259 int32_t* shift=params->shift; 01260 int32_t* vec_idx=params->vec_idx; 01261 01262 for (int32_t i=params->start; i<params->end; i++) 01263 { 01264 int32_t len=0; 01265 CStringFeatures<char>* rhs_feat=((CStringFeatures<char>*) wd->get_rhs()); 01266 CAlphabet* alpha=wd->alphabet; 01267 01268 bool free_vec; 01269 char* char_vec=rhs_feat->get_feature_vector(vec_idx[i], len, free_vec); 01270 for (int32_t k=CMath::max(0,j-max_shift); k<CMath::min(len,j+wd->get_degree()+max_shift); k++) 01271 vec[k]=alpha->remap_to_bin(char_vec[k]); 01272 rhs_feat->free_feature_vector(char_vec, vec_idx[i], free_vec); 01273 01274 SG_UNREF(rhs_feat); 01275 01276 result[i] += factor*wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, len, j, j, j, weights, (length!=0)), vec_idx[i]); 01277 01278 if (wd->get_optimization_type()==SLOWBUTMEMEFFICIENT) 01279 { 01280 for (int32_t q=CMath::max(0,j-max_shift); q<CMath::min(len,j+max_shift+1); q++) 01281 { 01282 int32_t s=j-q ; 01283 if ((s>=1) && (s<=shift[q]) && (q+s<len)) 01284 { 01285 result[i] += 01286 wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, 01287 len, q, q+s, q, weights, (length!=0)), 01288 vec_idx[i])/(2.0*s); 01289 } 01290 } 01291 01292 for (int32_t s=1; (s<=shift[j]) && (j+s<len); s++) 01293 { 01294 result[i] += 01295 wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, 01296 len, j+s, j, j+s, weights, (length!=0)), 01297 vec_idx[i])/(2.0*s); 01298 } 01299 } 01300 } 01301 01302 return NULL; 01303 } 01304 01305 void CWeightedDegreePositionStringKernel::compute_batch( 01306 int32_t num_vec, int32_t* vec_idx, float64_t* result, int32_t num_suppvec, 01307 int32_t* IDX, float64_t* alphas, float64_t factor) 01308 { 01309 ASSERT(alphabet); 01310 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 01311 ASSERT(position_weights_lhs==NULL); 01312 ASSERT(position_weights_rhs==NULL); 01313 ASSERT(rhs); 01314 ASSERT(num_vec<=rhs->get_num_vectors()); 01315 ASSERT(num_vec>0); 01316 ASSERT(vec_idx); 01317 ASSERT(result); 01318 create_empty_tries(); 01319 01320 int32_t num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01321 ASSERT(num_feat>0); 01322 int32_t num_threads=parallel->get_num_threads(); 01323 ASSERT(num_threads>0); 01324 int32_t* vec=new int32_t[num_threads*num_feat]; 01325 01326 if (num_threads < 2) 01327 { 01328 #ifdef WIN32 01329 for (int32_t j=0; j<num_feat; j++) 01330 #else 01331 CSignal::clear_cancel(); 01332 for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++) 01333 #endif 01334 { 01335 init_optimization(num_suppvec, IDX, alphas, j); 01336 S_THREAD_PARAM<DNATrie> params; 01337 params.vec=vec; 01338 params.result=result; 01339 params.weights=weights; 01340 params.kernel=this; 01341 params.tries=&tries; 01342 params.factor=factor; 01343 params.j=j; 01344 params.start=0; 01345 params.end=num_vec; 01346 params.length=length; 01347 params.max_shift=max_shift; 01348 params.shift=shift; 01349 params.vec_idx=vec_idx; 01350 compute_batch_helper((void*) ¶ms); 01351 01352 SG_PROGRESS(j,0,num_feat); 01353 } 01354 } 01355 #ifndef WIN32 01356 else 01357 { 01358 01359 CSignal::clear_cancel(); 01360 for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++) 01361 { 01362 init_optimization(num_suppvec, IDX, alphas, j); 01363 pthread_t* threads = new pthread_t[num_threads-1]; 01364 S_THREAD_PARAM<DNATrie>* params = new S_THREAD_PARAM<DNATrie>[num_threads]; 01365 int32_t step= num_vec/num_threads; 01366 int32_t t; 01367 01368 for (t=0; t<num_threads-1; t++) 01369 { 01370 params[t].vec=&vec[num_feat*t]; 01371 params[t].result=result; 01372 params[t].weights=weights; 01373 params[t].kernel=this; 01374 params[t].tries=&tries; 01375 params[t].factor=factor; 01376 params[t].j=j; 01377 params[t].start = t*step; 01378 params[t].end = (t+1)*step; 01379 params[t].length=length; 01380 params[t].max_shift=max_shift; 01381 params[t].shift=shift; 01382 params[t].vec_idx=vec_idx; 01383 pthread_create(&threads[t], NULL, CWeightedDegreePositionStringKernel::compute_batch_helper, (void*)¶ms[t]); 01384 } 01385 01386 params[t].vec=&vec[num_feat*t]; 01387 params[t].result=result; 01388 params[t].weights=weights; 01389 params[t].kernel=this; 01390 params[t].tries=&tries; 01391 params[t].factor=factor; 01392 params[t].j=j; 01393 params[t].start=t*step; 01394 params[t].end=num_vec; 01395 params[t].length=length; 01396 params[t].max_shift=max_shift; 01397 params[t].shift=shift; 01398 params[t].vec_idx=vec_idx; 01399 compute_batch_helper((void*) ¶ms[t]); 01400 01401 for (t=0; t<num_threads-1; t++) 01402 pthread_join(threads[t], NULL); 01403 SG_PROGRESS(j,0,num_feat); 01404 01405 delete[] params; 01406 delete[] threads; 01407 } 01408 } 01409 #endif 01410 01411 delete[] vec; 01412 01413 //really also free memory as this can be huge on testing especially when 01414 //using the combined kernel 01415 create_empty_tries(); 01416 } 01417 01418 float64_t* CWeightedDegreePositionStringKernel::compute_scoring( 01419 int32_t max_degree, int32_t& num_feat, int32_t& num_sym, float64_t* result, 01420 int32_t num_suppvec, int32_t* IDX, float64_t* alphas) 01421 { 01422 ASSERT(position_weights_lhs==NULL); 01423 ASSERT(position_weights_rhs==NULL); 01424 01425 num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01426 ASSERT(num_feat>0); 01427 ASSERT(alphabet); 01428 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 01429 01430 num_sym=4; //for now works only w/ DNA 01431 01432 ASSERT(max_degree>0); 01433 01434 // === variables 01435 int32_t* nofsKmers=new int32_t[max_degree]; 01436 float64_t** C=new float64_t*[max_degree]; 01437 float64_t** L=new float64_t*[max_degree]; 01438 float64_t** R=new float64_t*[max_degree]; 01439 01440 int32_t i; 01441 int32_t k; 01442 01443 // --- return table 01444 int32_t bigtabSize=0; 01445 for (k=0; k<max_degree; ++k ) 01446 { 01447 nofsKmers[k]=(int32_t) CMath::pow(num_sym, k+1); 01448 const int32_t tabSize=nofsKmers[k]*num_feat; 01449 bigtabSize+=tabSize; 01450 } 01451 result=new float64_t[bigtabSize]; 01452 01453 // --- auxilliary tables 01454 int32_t tabOffs=0; 01455 for( k = 0; k < max_degree; ++k ) 01456 { 01457 const int32_t tabSize = nofsKmers[k] * num_feat; 01458 C[k] = &result[tabOffs]; 01459 L[k] = new float64_t[ tabSize ]; 01460 R[k] = new float64_t[ tabSize ]; 01461 tabOffs+=tabSize; 01462 for(i = 0; i < tabSize; i++ ) 01463 { 01464 C[k][i] = 0.0; 01465 L[k][i] = 0.0; 01466 R[k][i] = 0.0; 01467 } 01468 } 01469 01470 // --- tree parsing info 01471 float64_t* margFactors=new float64_t[degree]; 01472 01473 int32_t* x = new int32_t[ degree+1 ]; 01474 int32_t* substrs = new int32_t[ degree+1 ]; 01475 // - fill arrays 01476 margFactors[0] = 1.0; 01477 substrs[0] = 0; 01478 for( k=1; k < degree; ++k ) { 01479 margFactors[k] = 0.25 * margFactors[k-1]; 01480 substrs[k] = -1; 01481 } 01482 substrs[degree] = -1; 01483 // - fill struct 01484 struct TreeParseInfo info; 01485 info.num_sym = num_sym; 01486 info.num_feat = num_feat; 01487 info.p = -1; 01488 info.k = -1; 01489 info.nofsKmers = nofsKmers; 01490 info.margFactors = margFactors; 01491 info.x = x; 01492 info.substrs = substrs; 01493 info.y0 = 0; 01494 info.C_k = NULL; 01495 info.L_k = NULL; 01496 info.R_k = NULL; 01497 01498 // === main loop 01499 i = 0; // total progress 01500 for( k = 0; k < max_degree; ++k ) 01501 { 01502 const int32_t nofKmers = nofsKmers[ k ]; 01503 info.C_k = C[k]; 01504 info.L_k = L[k]; 01505 info.R_k = R[k]; 01506 01507 // --- run over all trees 01508 for(int32_t p = 0; p < num_feat; ++p ) 01509 { 01510 init_optimization( num_suppvec, IDX, alphas, p ); 01511 int32_t tree = p ; 01512 for(int32_t j = 0; j < degree+1; j++ ) { 01513 x[j] = -1; 01514 } 01515 tries.traverse( tree, p, info, 0, x, k ); 01516 SG_PROGRESS(i++,0,num_feat*max_degree); 01517 } 01518 01519 // --- add partial overlap scores 01520 if( k > 0 ) { 01521 const int32_t j = k - 1; 01522 const int32_t nofJmers = (int32_t) CMath::pow( num_sym, j+1 ); 01523 for(int32_t p = 0; p < num_feat; ++p ) { 01524 const int32_t offsetJ = nofJmers * p; 01525 const int32_t offsetJ1 = nofJmers * (p+1); 01526 const int32_t offsetK = nofKmers * p; 01527 int32_t y; 01528 int32_t sym; 01529 for( y = 0; y < nofJmers; ++y ) { 01530 for( sym = 0; sym < num_sym; ++sym ) { 01531 const int32_t y_sym = num_sym*y + sym; 01532 const int32_t sym_y = nofJmers*sym + y; 01533 ASSERT(0<=y_sym && y_sym<nofKmers); 01534 ASSERT(0<=sym_y && sym_y<nofKmers); 01535 C[k][ y_sym + offsetK ] += L[j][ y + offsetJ ]; 01536 if( p < num_feat-1 ) { 01537 C[k][ sym_y + offsetK ] += R[j][ y + offsetJ1 ]; 01538 } 01539 } 01540 } 01541 } 01542 } 01543 // if( k > 1 ) 01544 // j = k-1 01545 // for all positions p 01546 // for all j-mers y 01547 // for n in {A,C,G,T} 01548 // C_k[ p, [y,n] ] += L_j[ p, y ] 01549 // C_k[ p, [n,y] ] += R_j[ p+1, y ] 01550 // end; 01551 // end; 01552 // end; 01553 // end; 01554 } 01555 01556 // === return a vector 01557 num_feat=1; 01558 num_sym = bigtabSize; 01559 // --- clean up 01560 delete[] nofsKmers; 01561 delete[] margFactors; 01562 delete[] substrs; 01563 delete[] x; 01564 delete[] C; 01565 for( k = 0; k < max_degree; ++k ) { 01566 delete[] L[k]; 01567 delete[] R[k]; 01568 } 01569 delete[] L; 01570 delete[] R; 01571 return result; 01572 } 01573 01574 char* CWeightedDegreePositionStringKernel::compute_consensus( 01575 int32_t &num_feat, int32_t num_suppvec, int32_t* IDX, float64_t* alphas) 01576 { 01577 ASSERT(position_weights_lhs==NULL); 01578 ASSERT(position_weights_rhs==NULL); 01579 //only works for order <= 32 01580 ASSERT(degree<=32); 01581 ASSERT(!tries.get_use_compact_terminal_nodes()); 01582 num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01583 ASSERT(num_feat>0); 01584 ASSERT(alphabet); 01585 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 01586 01587 //consensus 01588 char* result=new char[num_feat]; 01589 01590 //backtracking and scoring table 01591 int32_t num_tables=CMath::max(1,num_feat-degree+1); 01592 CDynamicArray<ConsensusEntry>** table=new CDynamicArray<ConsensusEntry>*[num_tables]; 01593 01594 for (int32_t i=0; i<num_tables; i++) 01595 table[i]=new CDynamicArray<ConsensusEntry>(num_suppvec/10); 01596 01597 //compute consensus via dynamic programming 01598 for (int32_t i=0; i<num_tables; i++) 01599 { 01600 bool cumulative=false; 01601 01602 if (i<num_tables-1) 01603 init_optimization(num_suppvec, IDX, alphas, i); 01604 else 01605 { 01606 init_optimization(num_suppvec, IDX, alphas, i, num_feat-1); 01607 cumulative=true; 01608 } 01609 01610 if (i==0) 01611 tries.fill_backtracking_table(i, NULL, table[i], cumulative, weights); 01612 else 01613 tries.fill_backtracking_table(i, table[i-1], table[i], cumulative, weights); 01614 01615 SG_PROGRESS(i,0,num_feat); 01616 } 01617 01618 01619 //int32_t n=table[0]->get_num_elements(); 01620 01621 //for (int32_t i=0; i<n; i++) 01622 //{ 01623 // ConsensusEntry e= table[0]->get_element(i); 01624 // SG_PRint32_t("first: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt); 01625 //} 01626 01627 //n=table[num_tables-1]->get_num_elements(); 01628 //for (int32_t i=0; i<n; i++) 01629 //{ 01630 // ConsensusEntry e= table[num_tables-1]->get_element(i); 01631 // SG_PRint32_t("last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt); 01632 //} 01633 //n=table[num_tables-2]->get_num_elements(); 01634 //for (int32_t i=0; i<n; i++) 01635 //{ 01636 // ConsensusEntry e= table[num_tables-2]->get_element(i); 01637 // SG_PRINT("second last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt); 01638 //} 01639 01640 const char* acgt="ACGT"; 01641 01642 //backtracking start 01643 int32_t max_idx=-1; 01644 float32_t max_score=0; 01645 int32_t num_elements=table[num_tables-1]->get_num_elements(); 01646 01647 for (int32_t i=0; i<num_elements; i++) 01648 { 01649 float64_t sc=table[num_tables-1]->get_element(i).score; 01650 if (sc>max_score || max_idx==-1) 01651 { 01652 max_idx=i; 01653 max_score=sc; 01654 } 01655 } 01656 uint64_t endstr=table[num_tables-1]->get_element(max_idx).string; 01657 01658 SG_INFO("max_idx:%d num_el:%d num_feat:%d num_tables:%d max_score:%f\n", max_idx, num_elements, num_feat, num_tables, max_score); 01659 01660 for (int32_t i=0; i<degree; i++) 01661 result[num_feat-1-i]=acgt[(endstr >> (2*i)) & 3]; 01662 01663 if (num_tables>1) 01664 { 01665 for (int32_t i=num_tables-1; i>=0; i--) 01666 { 01667 //SG_PRINT("max_idx: %d, i:%d\n", max_idx, i); 01668 result[i]=acgt[table[i]->get_element(max_idx).string >> (2*(degree-1)) & 3]; 01669 max_idx=table[i]->get_element(max_idx).bt; 01670 } 01671 } 01672 01673 //for (int32_t t=0; t<num_tables; t++) 01674 //{ 01675 // n=table[t]->get_num_elements(); 01676 // for (int32_t i=0; i<n; i++) 01677 // { 01678 // ConsensusEntry e= table[t]->get_element(i); 01679 // SG_PRINT("table[%d,%d]: str:0%0llx sc:%+f bt:%d\n",t,i, e.string,e.score,e.bt); 01680 // } 01681 //} 01682 01683 for (int32_t i=0; i<num_tables; i++) 01684 delete table[i]; 01685 01686 delete[] table; 01687 return result; 01688 } 01689 01690 01691 float64_t* CWeightedDegreePositionStringKernel::extract_w( 01692 int32_t max_degree, int32_t& num_feat, int32_t& num_sym, 01693 float64_t* w_result, int32_t num_suppvec, int32_t* IDX, float64_t* alphas) 01694 { 01695 delete_optimization(); 01696 use_poim_tries=true; 01697 poim_tries.delete_trees(false); 01698 01699 // === check 01700 ASSERT(position_weights_lhs==NULL); 01701 ASSERT(position_weights_rhs==NULL); 01702 num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01703 ASSERT(num_feat>0); 01704 ASSERT(alphabet->get_alphabet()==DNA); 01705 ASSERT(max_degree>0); 01706 01707 // === general variables 01708 static const int32_t NUM_SYMS = poim_tries.NUM_SYMS; 01709 const int32_t seqLen = num_feat; 01710 float64_t** subs; 01711 int32_t i; 01712 int32_t k; 01713 //int32_t y; 01714 01715 // === init tables "subs" for substring scores / POIMs 01716 // --- compute table sizes 01717 int32_t* offsets; 01718 int32_t offset; 01719 offsets = new int32_t[ max_degree ]; 01720 offset = 0; 01721 for( k = 0; k < max_degree; ++k ) { 01722 offsets[k] = offset; 01723 const int32_t nofsKmers = (int32_t) CMath::pow( NUM_SYMS, k+1 ); 01724 const int32_t tabSize = nofsKmers * seqLen; 01725 offset += tabSize; 01726 } 01727 // --- allocate memory 01728 const int32_t bigTabSize = offset; 01729 w_result=new float64_t[bigTabSize]; 01730 for (i=0; i<bigTabSize; ++i) 01731 w_result[i]=0; 01732 01733 // --- set pointers for tables 01734 subs = new float64_t*[ max_degree ]; 01735 ASSERT( subs != NULL ); 01736 for( k = 0; k < max_degree; ++k ) { 01737 subs[k] = &w_result[ offsets[k] ]; 01738 } 01739 delete[] offsets; 01740 01741 // === init trees; extract "w" 01742 init_optimization( num_suppvec, IDX, alphas, -1); 01743 poim_tries.POIMs_extract_W( subs, max_degree ); 01744 01745 // === clean; return "subs" as vector 01746 delete[] subs; 01747 num_feat = 1; 01748 num_sym = bigTabSize; 01749 use_poim_tries=false; 01750 poim_tries.delete_trees(false); 01751 return w_result; 01752 } 01753 01754 float64_t* CWeightedDegreePositionStringKernel::compute_POIM( 01755 int32_t max_degree, int32_t& num_feat, int32_t& num_sym, 01756 float64_t* poim_result, int32_t num_suppvec, int32_t* IDX, 01757 float64_t* alphas, float64_t* distrib ) 01758 { 01759 delete_optimization(); 01760 use_poim_tries=true; 01761 poim_tries.delete_trees(false); 01762 01763 // === check 01764 ASSERT(position_weights_lhs==NULL); 01765 ASSERT(position_weights_rhs==NULL); 01766 num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01767 ASSERT(num_feat>0); 01768 ASSERT(alphabet->get_alphabet()==DNA); 01769 ASSERT(max_degree!=0); 01770 ASSERT(distrib); 01771 01772 // === general variables 01773 static const int32_t NUM_SYMS = poim_tries.NUM_SYMS; 01774 const int32_t seqLen = num_feat; 01775 float64_t** subs; 01776 int32_t i; 01777 int32_t k; 01778 01779 // === DEBUGGING mode 01780 // 01781 // Activated if "max_degree" < 0. 01782 // Allows to output selected partial score. 01783 // 01784 // |max_degree| mod 4 01785 // 0: substring 01786 // 1: superstring 01787 // 2: left overlap 01788 // 3: right overlap 01789 // 01790 const int32_t debug = ( max_degree < 0 ) ? ( abs(max_degree) % 4 + 1 ) : 0; 01791 if( debug ) { 01792 max_degree = abs(max_degree) / 4; 01793 switch( debug ) { 01794 case 1: { 01795 printf( "POIM DEBUGGING: substring only (max order=%d)\n", max_degree ); 01796 break; 01797 } 01798 case 2: { 01799 printf( "POIM DEBUGGING: superstring only (max order=%d)\n", max_degree ); 01800 break; 01801 } 01802 case 3: { 01803 printf( "POIM DEBUGGING: left overlap only (max order=%d)\n", max_degree ); 01804 break; 01805 } 01806 case 4: { 01807 printf( "POIM DEBUGGING: right overlap only (max order=%d)\n", max_degree ); 01808 break; 01809 } 01810 default: { 01811 printf( "POIM DEBUGGING: something is wrong (max order=%d)\n", max_degree ); 01812 ASSERT(0); 01813 break; 01814 } 01815 } 01816 } 01817 01818 // --- compute table sizes 01819 int32_t* offsets; 01820 int32_t offset; 01821 offsets = new int32_t[ max_degree ]; 01822 offset = 0; 01823 for( k = 0; k < max_degree; ++k ) { 01824 offsets[k] = offset; 01825 const int32_t nofsKmers = (int32_t) CMath::pow( NUM_SYMS, k+1 ); 01826 const int32_t tabSize = nofsKmers * seqLen; 01827 offset += tabSize; 01828 } 01829 // --- allocate memory 01830 const int32_t bigTabSize=offset; 01831 poim_result=new float64_t[bigTabSize]; 01832 for (i=0; i<bigTabSize; ++i ) 01833 poim_result[i]=0; 01834 01835 // --- set pointers for tables 01836 subs=new float64_t*[max_degree]; 01837 for (k=0; k<max_degree; ++k) 01838 subs[k]=&poim_result[offsets[k]]; 01839 01840 delete[] offsets; 01841 01842 // === init trees; precalc S, L and R 01843 init_optimization( num_suppvec, IDX, alphas, -1); 01844 poim_tries.POIMs_precalc_SLR( distrib ); 01845 01846 // === compute substring scores 01847 if( debug==0 || debug==1 ) { 01848 poim_tries.POIMs_extract_W( subs, max_degree ); 01849 for( k = 1; k < max_degree; ++k ) { 01850 const int32_t nofKmers2 = ( k > 1 ) ? (int32_t) CMath::pow(NUM_SYMS,k-1) : 0; 01851 const int32_t nofKmers1 = (int32_t) CMath::pow( NUM_SYMS, k ); 01852 const int32_t nofKmers0 = nofKmers1 * NUM_SYMS; 01853 for( i = 0; i < seqLen; ++i ) { 01854 float64_t* const subs_k2i1 = ( k>1 && i<seqLen-1 ) ? &subs[k-2][(i+1)*nofKmers2] : NULL; 01855 float64_t* const subs_k1i1 = ( i < seqLen-1 ) ? &subs[k-1][(i+1)*nofKmers1] : NULL; 01856 float64_t* const subs_k1i0 = & subs[ k-1 ][ i*nofKmers1 ]; 01857 float64_t* const subs_k0i = & subs[ k-0 ][ i*nofKmers0 ]; 01858 int32_t y0; 01859 for( y0 = 0; y0 < nofKmers0; ++y0 ) { 01860 const int32_t y1l = y0 / NUM_SYMS; 01861 const int32_t y1r = y0 % nofKmers1; 01862 const int32_t y2 = y1r / NUM_SYMS; 01863 subs_k0i[ y0 ] += subs_k1i0[ y1l ]; 01864 if( i < seqLen-1 ) { 01865 subs_k0i[ y0 ] += subs_k1i1[ y1r ]; 01866 if( k > 1 ) { 01867 subs_k0i[ y0 ] -= subs_k2i1[ y2 ]; 01868 } 01869 } 01870 } 01871 } 01872 } 01873 } 01874 01875 // === compute POIMs 01876 poim_tries.POIMs_add_SLR( subs, max_degree, debug ); 01877 01878 // === clean; return "subs" as vector 01879 delete[] subs; 01880 num_feat = 1; 01881 num_sym = bigTabSize; 01882 01883 use_poim_tries=false; 01884 poim_tries.delete_trees(false); 01885 01886 return poim_result; 01887 } 01888 01889 01890 void CWeightedDegreePositionStringKernel::prepare_POIM2( 01891 float64_t* distrib, int32_t num_sym, int32_t num_feat) 01892 { 01893 free(m_poim_distrib); 01894 m_poim_distrib=(float64_t*)malloc(num_sym*num_feat*sizeof(float64_t)); 01895 ASSERT(m_poim_distrib); 01896 01897 memcpy(m_poim_distrib, distrib, num_sym*num_feat*sizeof(float64_t)); 01898 m_poim_num_sym=num_sym; 01899 m_poim_num_feat=num_feat; 01900 } 01901 01902 void CWeightedDegreePositionStringKernel::compute_POIM2( 01903 int32_t max_degree, CSVM* svm) 01904 { 01905 ASSERT(svm); 01906 int32_t num_suppvec=svm->get_num_support_vectors(); 01907 int32_t* sv_idx=new int32_t[num_suppvec]; 01908 float64_t* sv_weight=new float64_t[num_suppvec]; 01909 01910 for (int32_t i=0; i<num_suppvec; i++) 01911 { 01912 sv_idx[i]=svm->get_support_vector(i); 01913 sv_weight[i]=svm->get_alpha(i); 01914 } 01915 01916 if ((max_degree < 1) || (max_degree > 12)) 01917 { 01918 //SG_WARNING( "max_degree out of range 1..12 (%d).\n", max_degree); 01919 SG_WARNING( "max_degree out of range 1..12 (%d). setting to 1.\n", max_degree); 01920 max_degree=1; 01921 } 01922 01923 int32_t num_feat = m_poim_num_feat; 01924 int32_t num_sym = m_poim_num_sym; 01925 free(m_poim); 01926 01927 m_poim = compute_POIM(max_degree, num_feat, num_sym, NULL, num_suppvec, sv_idx, 01928 sv_weight, m_poim_distrib); 01929 01930 ASSERT(num_feat==1); 01931 m_poim_result_len=num_sym; 01932 01933 delete[] sv_weight; 01934 delete[] sv_idx; 01935 } 01936 01937 void CWeightedDegreePositionStringKernel::get_POIM2( 01938 float64_t** poim, int32_t* result_len) 01939 { 01940 *poim=(float64_t*) malloc(m_poim_result_len*sizeof(float64_t)); 01941 ASSERT(*poim); 01942 memcpy(*poim, m_poim, m_poim_result_len*sizeof(float64_t)) ; 01943 *result_len=m_poim_result_len ; 01944 } 01945 01946 void CWeightedDegreePositionStringKernel::cleanup_POIM2() 01947 { 01948 free(m_poim) ; 01949 m_poim=NULL ; 01950 free(m_poim_distrib) ; 01951 m_poim_distrib=NULL ; 01952 m_poim_num_sym=0 ; 01953 m_poim_num_sym=0 ; 01954 m_poim_result_len=0 ; 01955 } 01956