|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #ifndef _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H___ 00013 #define _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H___ 00014 00015 #include "lib/common.h" 00016 #include "kernel/StringKernel.h" 00017 #include "kernel/WeightedDegreeStringKernel.h" 00018 #include "lib/Trie.h" 00019 00020 namespace shogun 00021 { 00022 00023 class CSVM; 00024 00048 class CWeightedDegreePositionStringKernel: public CStringKernel<char> 00049 { 00050 public: 00058 CWeightedDegreePositionStringKernel( 00059 int32_t size, int32_t degree, 00060 int32_t max_mismatch=0, int32_t mkl_stepsize=1); 00061 00072 CWeightedDegreePositionStringKernel( 00073 int32_t size, float64_t* weights, int32_t degree, 00074 int32_t max_mismatch, int32_t* shift, int32_t shift_len, 00075 int32_t mkl_stepsize=1); 00076 00083 CWeightedDegreePositionStringKernel( 00084 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree); 00085 00086 virtual ~CWeightedDegreePositionStringKernel(); 00087 00094 virtual bool init(CFeatures* l, CFeatures* r); 00095 00097 virtual void cleanup(); 00098 00103 virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREEPOS; } 00104 00109 virtual const char* get_name() const { return "WeightedDegreePos"; } 00110 00118 inline virtual bool init_optimization( 00119 int32_t p_count, int32_t *IDX, float64_t * alphas) 00120 { 00121 return init_optimization(p_count, IDX, alphas, -1); 00122 } 00123 00135 virtual bool init_optimization( 00136 int32_t count, int32_t *IDX, float64_t * alphas, int32_t tree_num, 00137 int32_t upto_tree=-1); 00138 00143 virtual bool delete_optimization(); 00144 00150 inline virtual float64_t compute_optimized(int32_t idx) 00151 { 00152 ASSERT(get_is_initialized()); 00153 ASSERT(alphabet); 00154 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 00155 return compute_by_tree(idx); 00156 } 00157 00162 static void* compute_batch_helper(void* p); 00163 00174 virtual void compute_batch( 00175 int32_t num_vec, int32_t* vec_idx, float64_t* target, 00176 int32_t num_suppvec, int32_t* IDX, float64_t* alphas, 00177 float64_t factor=1.0); 00178 00182 inline virtual void clear_normal() 00183 { 00184 if ((opt_type==FASTBUTMEMHUNGRY) && (tries.get_use_compact_terminal_nodes())) 00185 { 00186 tries.set_use_compact_terminal_nodes(false) ; 00187 SG_DEBUG( "disabling compact trie nodes with FASTBUTMEMHUNGRY\n") ; 00188 } 00189 00190 if (get_is_initialized()) 00191 { 00192 if (opt_type==SLOWBUTMEMEFFICIENT) 00193 tries.delete_trees(true); 00194 else if (opt_type==FASTBUTMEMHUNGRY) 00195 tries.delete_trees(false); // still buggy 00196 else 00197 SG_ERROR( "unknown optimization type\n"); 00198 00199 set_is_initialized(false); 00200 } 00201 } 00202 00208 inline virtual void add_to_normal(int32_t idx, float64_t weight) 00209 { 00210 add_example_to_tree(idx, weight); 00211 set_is_initialized(true); 00212 } 00213 00218 inline virtual int32_t get_num_subkernels() 00219 { 00220 if (position_weights!=NULL) 00221 return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ; 00222 if (length==0) 00223 return (int32_t) ceil(1.0*get_degree()/mkl_stepsize); 00224 return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ; 00225 } 00226 00232 inline void compute_by_subkernel( 00233 int32_t idx, float64_t * subkernel_contrib) 00234 { 00235 if (get_is_initialized()) 00236 { 00237 compute_by_tree(idx, subkernel_contrib); 00238 return ; 00239 } 00240 00241 SG_ERROR( "CWeightedDegreePositionStringKernel optimization not initialized\n") ; 00242 } 00243 00249 inline const float64_t* get_subkernel_weights(int32_t& num_weights) 00250 { 00251 num_weights = get_num_subkernels() ; 00252 00253 delete[] weights_buffer ; 00254 weights_buffer = new float64_t[num_weights] ; 00255 00256 if (position_weights!=NULL) 00257 for (int32_t i=0; i<num_weights; i++) 00258 weights_buffer[i] = position_weights[i*mkl_stepsize] ; 00259 else 00260 for (int32_t i=0; i<num_weights; i++) 00261 weights_buffer[i] = weights[i*mkl_stepsize] ; 00262 00263 return weights_buffer ; 00264 } 00265 00271 inline void set_subkernel_weights( 00272 float64_t* weights2, int32_t num_weights2) 00273 { 00274 int32_t num_weights = get_num_subkernels() ; 00275 if (num_weights!=num_weights2) 00276 SG_ERROR( "number of weights do not match\n") ; 00277 00278 if (position_weights!=NULL) 00279 for (int32_t i=0; i<num_weights; i++) 00280 for (int32_t j=0; j<mkl_stepsize; j++) 00281 { 00282 if (i*mkl_stepsize+j<seq_length) 00283 position_weights[i*mkl_stepsize+j] = weights2[i] ; 00284 } 00285 else if (length==0) 00286 { 00287 for (int32_t i=0; i<num_weights; i++) 00288 for (int32_t j=0; j<mkl_stepsize; j++) 00289 if (i*mkl_stepsize+j<get_degree()) 00290 weights[i*mkl_stepsize+j] = weights2[i] ; 00291 } 00292 else 00293 { 00294 for (int32_t i=0; i<num_weights; i++) 00295 for (int32_t j=0; j<mkl_stepsize; j++) 00296 if (i*mkl_stepsize+j<get_degree()*length) 00297 weights[i*mkl_stepsize+j] = weights2[i] ; 00298 } 00299 } 00300 00301 // other kernel tree operations 00307 float64_t* compute_abs_weights(int32_t & len); 00308 00313 bool is_tree_initialized() { return tree_initialized; } 00314 00319 inline int32_t get_max_mismatch() { return max_mismatch; } 00320 00325 inline int32_t get_degree() { return degree; } 00326 00332 inline float64_t *get_degree_weights(int32_t& d, int32_t& len) 00333 { 00334 d=degree; 00335 len=length; 00336 return weights; 00337 } 00338 00344 inline float64_t *get_weights(int32_t& num_weights) 00345 { 00346 if (position_weights!=NULL) 00347 { 00348 num_weights = seq_length ; 00349 return position_weights ; 00350 } 00351 if (length==0) 00352 num_weights = degree ; 00353 else 00354 num_weights = degree*length ; 00355 return weights; 00356 } 00357 00363 inline float64_t *get_position_weights(int32_t& len) 00364 { 00365 len=seq_length; 00366 return position_weights; 00367 } 00368 00374 bool set_shifts(int32_t* shifts, int32_t len); 00375 00382 virtual bool set_weights(float64_t* weights, int32_t d, int32_t len=0); 00383 00388 virtual bool set_wd_weights(); 00389 00396 virtual bool set_position_weights(float64_t* pws, int32_t len); 00397 00405 bool set_position_weights_lhs(float64_t* pws, int32_t len, int32_t num); 00406 00414 bool set_position_weights_rhs(float64_t* pws, int32_t len, int32_t num); 00415 00420 bool init_block_weights(); 00421 00426 bool init_block_weights_from_wd(); 00427 00432 bool init_block_weights_from_wd_external(); 00433 00438 bool init_block_weights_const(); 00439 00444 bool init_block_weights_linear(); 00445 00450 bool init_block_weights_sqpoly(); 00451 00456 bool init_block_weights_cubicpoly(); 00457 00462 bool init_block_weights_exp(); 00463 00468 bool init_block_weights_log(); 00469 00474 bool init_block_weights_external(); 00475 00480 bool delete_position_weights() 00481 { 00482 delete[] position_weights; 00483 position_weights=NULL; 00484 return true; 00485 } 00486 00491 bool delete_position_weights_lhs() 00492 { 00493 delete[] position_weights_lhs; 00494 position_weights_lhs=NULL; 00495 return true; 00496 } 00497 00502 bool delete_position_weights_rhs() 00503 { 00504 delete[] position_weights_rhs; 00505 position_weights_rhs=NULL; 00506 return true; 00507 } 00508 00514 virtual float64_t compute_by_tree(int32_t idx); 00515 00521 virtual void compute_by_tree(int32_t idx, float64_t* LevelContrib); 00522 00535 float64_t* compute_scoring( 00536 int32_t max_degree, int32_t& num_feat, int32_t& num_sym, 00537 float64_t* target, int32_t num_suppvec, int32_t* IDX, 00538 float64_t* weights); 00539 00548 char* compute_consensus( 00549 int32_t &num_feat, int32_t num_suppvec, int32_t* IDX, 00550 float64_t* alphas); 00551 00563 float64_t* extract_w( 00564 int32_t max_degree, int32_t& num_feat, int32_t& num_sym, 00565 float64_t* w_result, int32_t num_suppvec, int32_t* IDX, 00566 float64_t* alphas); 00567 00580 float64_t* compute_POIM( 00581 int32_t max_degree, int32_t& num_feat, int32_t& num_sym, 00582 float64_t* poim_result, int32_t num_suppvec, int32_t* IDX, 00583 float64_t* alphas, float64_t* distrib); 00584 00591 void prepare_POIM2( 00592 float64_t* distrib, int32_t num_sym, int32_t num_feat); 00593 00600 void compute_POIM2(int32_t max_degree, CSVM* svm); 00601 00607 void get_POIM2(float64_t** poim, int32_t* result_len); 00608 00610 void cleanup_POIM2(); 00611 00612 protected: 00614 void create_empty_tries(); 00615 00621 virtual void add_example_to_tree( 00622 int32_t idx, float64_t weight); 00623 00630 void add_example_to_single_tree( 00631 int32_t idx, float64_t weight, int32_t tree_num); 00632 00641 virtual float64_t compute(int32_t idx_a, int32_t idx_b); 00642 00651 float64_t compute_with_mismatch( 00652 char* avec, int32_t alen, char* bvec, int32_t blen); 00653 00662 float64_t compute_without_mismatch( 00663 char* avec, int32_t alen, char* bvec, int32_t blen); 00664 00673 float64_t compute_without_mismatch_matrix( 00674 char* avec, int32_t alen, char* bvec, int32_t blen); 00675 00686 float64_t compute_without_mismatch_position_weights( 00687 char* avec, float64_t *posweights_lhs, int32_t alen, 00688 char* bvec, float64_t *posweights_rhs, int32_t blen); 00689 00691 virtual void remove_lhs(); 00692 00693 protected: 00695 float64_t* weights; 00697 float64_t* position_weights; 00699 float64_t* position_weights_lhs; 00701 float64_t* position_weights_rhs; 00703 bool* position_mask; 00704 00706 float64_t* weights_buffer; 00708 int32_t mkl_stepsize; 00709 00711 int32_t degree; 00713 int32_t length; 00714 00716 int32_t max_mismatch; 00718 int32_t seq_length; 00719 00721 int32_t *shift; 00723 int32_t shift_len; 00725 int32_t max_shift; 00726 00728 bool block_computation; 00729 00731 int32_t num_block_weights_external; 00733 float64_t* block_weights_external; 00734 00736 float64_t* block_weights; 00738 EWDKernType type; 00740 int32_t which_degree; 00741 00743 CTrie<DNATrie> tries; 00745 CTrie<POIMTrie> poim_tries; 00746 00748 bool tree_initialized; 00750 bool use_poim_tries; 00751 00753 float64_t* m_poim_distrib; 00755 float64_t* m_poim; 00756 00758 int32_t m_poim_num_sym; 00760 int32_t m_poim_num_feat; 00762 int32_t m_poim_result_len; 00763 00765 CAlphabet* alphabet; 00766 }; 00767 } 00768 #endif /* _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H__ */