|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2009 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #ifndef _TRIE_H___ 00013 #define _TRIE_H___ 00014 00015 #include <string.h> 00016 #include "lib/common.h" 00017 #include "lib/io.h" 00018 #include "lib/DynamicArray.h" 00019 #include "lib/Mathematics.h" 00020 #include "base/SGObject.h" 00021 00022 namespace shogun 00023 { 00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00025 00026 // sentinel is 0xFFFFFFFC or float -2 00027 #define NO_CHILD ((int32_t)-1073741824) 00028 00029 #define WEIGHTS_IN_TRIE 00030 //#define TRIE_CHECK_EVERYTHING 00031 00032 #ifdef TRIE_CHECK_EVERYTHING 00033 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x) 00034 #else 00035 #define TRIE_ASSERT_EVERYTHING(x) 00036 #endif 00037 00038 //#define TRIE_ASSERT(x) ASSERT(x) 00039 #define TRIE_ASSERT(x) 00040 00041 #define TRIE_TERMINAL_CHARACTER 7 00042 00044 struct ConsensusEntry 00045 { 00047 uint64_t string; 00049 float32_t score; 00051 int32_t bt; 00052 }; 00053 00055 struct POIMTrie 00056 { 00058 float64_t weight; 00059 #ifdef TRIE_CHECK_EVERYTHING 00060 00061 bool has_seq; 00063 bool has_floats; 00064 #endif 00065 union 00066 { 00068 float32_t child_weights[4]; 00070 int32_t children[4]; 00072 uint8_t seq[16] ; 00073 }; 00074 00076 float64_t S; 00078 float64_t L; 00080 float64_t R; 00081 }; 00082 00084 struct DNATrie 00085 { 00087 float64_t weight; 00088 #ifdef TRIE_CHECK_EVERYTHING 00089 00090 bool has_seq; 00092 bool has_floats; 00093 #endif 00094 union 00095 { 00097 float32_t child_weights[4]; 00099 int32_t children[4]; 00101 uint8_t seq[16] ; 00102 }; 00103 }; 00104 00106 struct TreeParseInfo { 00108 int32_t num_sym; 00110 int32_t num_feat; 00112 int32_t p; 00114 int32_t k; 00116 int32_t* nofsKmers; 00118 float64_t* margFactors; 00120 int32_t* x; 00122 int32_t* substrs; 00124 int32_t y0; 00126 float64_t* C_k; 00128 float64_t* L_k; 00130 float64_t* R_k; 00131 }; 00132 00133 #endif // DOXYGEN_SHOULD_SKIP_THIS 00134 00135 template <class Trie> class CTrie; 00136 00155 template <class Trie> class CTrie : public CSGObject 00156 { 00157 public: 00164 CTrie(int32_t d, bool p_use_compact_terminal_nodes=true); 00165 00167 CTrie(const CTrie & to_copy); 00168 virtual ~CTrie(); 00169 00171 const CTrie & operator=(const CTrie & to_copy); 00172 00180 bool compare_traverse( 00181 int32_t node, const CTrie & other, int32_t other_node); 00182 00188 bool compare(const CTrie & other); 00189 00196 bool find_node(int32_t node, int32_t * trace, int32_t &trace_len) const; 00197 00204 int32_t find_deepest_node( 00205 int32_t start_node, int32_t &deepest_node) const; 00206 00211 void display_node(int32_t node) const; 00212 00214 void destroy(); 00215 00220 void set_degree(int32_t d); 00221 00228 void create(int32_t len, bool p_use_compact_terminal_nodes=true); 00229 00235 void delete_trees(bool p_use_compact_terminal_nodes=true); 00236 00247 void add_to_trie( 00248 int32_t i, int32_t seq_offset, int32_t* vec, float32_t alpha, 00249 float64_t *weights, bool degree_times_position_weights); 00250 00257 float64_t compute_abs_weights_tree(int32_t tree, int32_t depth); 00258 00264 float64_t* compute_abs_weights(int32_t &len); 00265 00278 float64_t compute_by_tree_helper( 00279 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 00280 int32_t weight_pos, float64_t * weights, 00281 bool degree_times_position_weights) ; 00282 00297 void compute_by_tree_helper( 00298 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 00299 int32_t weight_pos, float64_t* LevelContrib, float64_t factor, 00300 int32_t mkl_stepsize, float64_t * weights, 00301 bool degree_times_position_weights); 00302 00317 void compute_scoring_helper( 00318 int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d, 00319 int32_t max_degree, int32_t num_feat, int32_t num_sym, 00320 int32_t sym_offset, int32_t offs, float64_t* result); 00321 00334 void add_example_to_tree_mismatch_recursion( 00335 int32_t tree, int32_t i, float64_t alpha, int32_t *vec, 00336 int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec, 00337 int32_t max_mismatch, float64_t * weights); 00338 00348 void traverse( 00349 int32_t tree, const int32_t p, struct TreeParseInfo info, 00350 const int32_t depth, int32_t* const x, const int32_t k); 00351 00361 void count( 00362 const float64_t w, const int32_t depth, 00363 const struct TreeParseInfo info, const int32_t p, int32_t* x, 00364 const int32_t k); 00365 00372 int32_t compact_nodes(int32_t start_node, int32_t depth, float64_t * weights); 00373 00382 float64_t get_cumulative_score( 00383 int32_t pos, uint64_t seq, int32_t deg, float64_t* weights); 00384 00394 void fill_backtracking_table_recursion( 00395 Trie* tree, int32_t depth, uint64_t seq, float64_t value, 00396 CDynamicArray<ConsensusEntry>* table, float64_t* weights); 00397 00406 void fill_backtracking_table( 00407 int32_t pos, CDynamicArray<ConsensusEntry>* prev, 00408 CDynamicArray<ConsensusEntry>* cur, bool cumulative, 00409 float64_t* weights); 00410 00416 void POIMs_extract_W(float64_t* const* const W, const int32_t K); 00417 00422 void POIMs_precalc_SLR(const float64_t* const distrib); 00423 00433 void POIMs_get_SLR( 00434 const int32_t parentIdx, const int32_t sym, const int32_t depth, 00435 float64_t* S, float64_t* L, float64_t* R); 00436 00443 void POIMs_add_SLR( 00444 float64_t* const* const poims, const int32_t K, 00445 const int32_t debug); 00446 00451 inline bool get_use_compact_terminal_nodes() 00452 { 00453 return use_compact_terminal_nodes ; 00454 } 00455 00461 inline void set_use_compact_terminal_nodes( 00462 bool p_use_compact_terminal_nodes) 00463 { 00464 use_compact_terminal_nodes=p_use_compact_terminal_nodes ; 00465 } 00466 00471 inline int32_t get_num_used_nodes() 00472 { 00473 return TreeMemPtr; 00474 } 00475 00480 inline void set_position_weights(const float64_t * p_position_weights) 00481 { 00482 position_weights=p_position_weights; 00483 } 00484 00489 inline int32_t get_node(bool last_node=false) 00490 { 00491 int32_t ret = TreeMemPtr++; 00492 check_treemem() ; 00493 00494 if (last_node) 00495 { 00496 for (int32_t q=0; q<4; q++) 00497 TreeMem[ret].child_weights[q]=0.0; 00498 } 00499 else 00500 { 00501 for (int32_t q=0; q<4; q++) 00502 TreeMem[ret].children[q]=NO_CHILD; 00503 } 00504 #ifdef TRIE_CHECK_EVERYTHING 00505 TreeMem[ret].has_seq=false ; 00506 TreeMem[ret].has_floats=false ; 00507 #endif 00508 TreeMem[ret].weight=0.0; 00509 return ret ; 00510 } 00511 00513 inline void check_treemem() 00514 { 00515 if (TreeMemPtr+10 < TreeMemPtrMax) 00516 return; 00517 SG_DEBUG( "Extending TreeMem from %i to %i elements\n", 00518 TreeMemPtrMax, (int32_t) ((float64_t)TreeMemPtrMax*1.2)); 00519 TreeMemPtrMax = (int32_t) ((float64_t)TreeMemPtrMax*1.2); 00520 TreeMem = (Trie*) realloc(TreeMem, 00521 TreeMemPtrMax*sizeof(Trie)); 00522 if (!TreeMem) 00523 SG_ERROR( "out of memory\n"); 00524 } 00525 00530 inline void set_weights_in_tree(bool weights_in_tree_) 00531 { 00532 weights_in_tree = weights_in_tree_; 00533 } 00534 00539 inline bool get_weights_in_tree() 00540 { 00541 return weights_in_tree; 00542 } 00543 00553 void POIMs_extract_W_helper( 00554 const int32_t nodeIdx, const int32_t depth, const int32_t offset, 00555 const int32_t y0, float64_t* const* const W, const int32_t K); 00556 00569 void POIMs_calc_SLR_helper1( 00570 const float64_t* const distrib, const int32_t i, 00571 const int32_t nodeIdx, int32_t left_tries_idx[4], 00572 const int32_t depth, int32_t const lastSym, float64_t* S, 00573 float64_t* L, float64_t* R); 00574 00575 00586 void POIMs_calc_SLR_helper2( 00587 const float64_t* const distrib, const int32_t i, 00588 const int32_t nodeIdx, int32_t left_tries_idx[4], 00589 const int32_t depth, float64_t* S, float64_t* L, float64_t* R); 00590 00601 void POIMs_add_SLR_helper1( 00602 const int32_t nodeIdx, const int32_t depth,const int32_t i, 00603 const int32_t y0, float64_t* const* const poims, const int32_t K, 00604 const int32_t debug); 00605 00619 void POIMs_add_SLR_helper2( 00620 float64_t* const* const poims, const int32_t K, const int32_t k, 00621 const int32_t i, const int32_t y, const float64_t valW, 00622 const float64_t valS, const float64_t valL, const float64_t valR, 00623 const int32_t debug); 00624 00626 inline virtual const char* get_name() const { return "Trie"; } 00627 00628 public: 00630 int32_t NUM_SYMS; 00631 00632 protected: 00634 int32_t length; 00636 int32_t * trees; 00637 00639 int32_t degree; 00641 float64_t const * position_weights; 00642 00644 Trie* TreeMem; 00646 int32_t TreeMemPtr; 00648 int32_t TreeMemPtrMax; 00650 bool use_compact_terminal_nodes; 00651 00653 bool weights_in_tree; 00654 00656 int32_t* nofsKmers; 00657 }; 00658 00659 template <class Trie> 00660 CTrie<Trie>::CTrie(int32_t d, bool p_use_compact_terminal_nodes) 00661 : CSGObject(), degree(d), position_weights(NULL), 00662 use_compact_terminal_nodes(p_use_compact_terminal_nodes), 00663 weights_in_tree(true) 00664 { 00665 TreeMemPtrMax=1024*1024/sizeof(Trie); 00666 TreeMemPtr=0; 00667 TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie)); 00668 00669 length=0; 00670 trees=NULL; 00671 00672 NUM_SYMS=4; 00673 } 00674 00675 template <class Trie> 00676 CTrie<Trie>::CTrie(const CTrie & to_copy) 00677 : CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL), 00678 use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes) 00679 { 00680 if (to_copy.position_weights!=NULL) 00681 { 00682 position_weights = to_copy.position_weights; 00683 /*new float64_t[to_copy.length]; 00684 for (int32_t i=0; i<to_copy.length; i++) 00685 position_weights[i]=to_copy.position_weights[i]; */ 00686 } 00687 else 00688 position_weights=NULL; 00689 00690 TreeMemPtrMax=to_copy.TreeMemPtrMax; 00691 TreeMemPtr=to_copy.TreeMemPtr; 00692 TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie)); 00693 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)); 00694 00695 length=to_copy.length; 00696 trees=new int32_t[length]; 00697 for (int32_t i=0; i<length; i++) 00698 trees[i]=to_copy.trees[i]; 00699 00700 NUM_SYMS=4; 00701 } 00702 00703 template <class Trie> 00704 const CTrie<Trie> &CTrie<Trie>::operator=(const CTrie<Trie> & to_copy) 00705 { 00706 degree=to_copy.degree ; 00707 use_compact_terminal_nodes=to_copy.use_compact_terminal_nodes ; 00708 00709 delete[] position_weights ; 00710 position_weights=NULL ; 00711 if (to_copy.position_weights!=NULL) 00712 { 00713 position_weights=to_copy.position_weights ; 00714 /*position_weights = new float64_t[to_copy.length] ; 00715 for (int32_t i=0; i<to_copy.length; i++) 00716 position_weights[i]=to_copy.position_weights[i] ;*/ 00717 } 00718 else 00719 position_weights=NULL ; 00720 00721 TreeMemPtrMax=to_copy.TreeMemPtrMax ; 00722 TreeMemPtr=to_copy.TreeMemPtr ; 00723 free(TreeMem) ; 00724 TreeMem = (Trie*)malloc(TreeMemPtrMax*sizeof(Trie)) ; 00725 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)) ; 00726 00727 length = to_copy.length ; 00728 if (trees) 00729 delete[] trees ; 00730 trees=new int32_t[length] ; 00731 for (int32_t i=0; i<length; i++) 00732 trees[i]=to_copy.trees[i] ; 00733 00734 return *this ; 00735 } 00736 00737 template <class Trie> 00738 int32_t CTrie<Trie>::find_deepest_node( 00739 int32_t start_node, int32_t& deepest_node) const 00740 { 00741 #ifdef TRIE_CHECK_EVERYTHING 00742 int32_t ret=0 ; 00743 SG_DEBUG("start_node=%i\n", start_node) ; 00744 00745 if (start_node==NO_CHILD) 00746 { 00747 for (int32_t i=0; i<length; i++) 00748 { 00749 int32_t my_deepest_node ; 00750 int32_t depth=find_deepest_node(i, my_deepest_node) ; 00751 SG_DEBUG("start_node %i depth=%i\n", i, depth) ; 00752 if (depth>ret) 00753 { 00754 deepest_node=my_deepest_node ; 00755 ret=depth ; 00756 } 00757 } 00758 return ret ; 00759 } 00760 00761 if (TreeMem[start_node].has_seq) 00762 { 00763 for (int32_t q=0; q<16; q++) 00764 if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER) 00765 ret++ ; 00766 deepest_node=start_node ; 00767 return ret ; 00768 } 00769 if (TreeMem[start_node].has_floats) 00770 { 00771 deepest_node=start_node ; 00772 return 1 ; 00773 } 00774 00775 for (int32_t q=0; q<4; q++) 00776 { 00777 int32_t my_deepest_node ; 00778 if (TreeMem[start_node].children[q]==NO_CHILD) 00779 continue ; 00780 int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ; 00781 if (depth>ret) 00782 { 00783 deepest_node=my_deepest_node ; 00784 ret=depth ; 00785 } 00786 } 00787 return ret ; 00788 #else 00789 SG_ERROR( "not implemented\n") ; 00790 return 0 ; 00791 #endif 00792 } 00793 00794 template <class Trie> 00795 int32_t CTrie<Trie>::compact_nodes( 00796 int32_t start_node, int32_t depth, float64_t * weights) 00797 { 00798 SG_ERROR( "code buggy\n") ; 00799 00800 int32_t ret=0 ; 00801 00802 if (start_node==NO_CHILD) 00803 { 00804 for (int32_t i=0; i<length; i++) 00805 compact_nodes(i,1, weights) ; 00806 return 0 ; 00807 } 00808 if (start_node<0) 00809 return -1 ; 00810 00811 if (depth==degree-1) 00812 { 00813 TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats) ; 00814 int32_t num_used=0 ; 00815 for (int32_t q=0; q<4; q++) 00816 if (TreeMem[start_node].child_weights[q]!=0.0) 00817 num_used++ ; 00818 if (num_used>1) 00819 return -1 ; 00820 return 1 ; 00821 } 00822 TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats) ; 00823 00824 int32_t num_used = 0 ; 00825 int32_t q_used=-1 ; 00826 00827 for (int32_t q=0; q<4; q++) 00828 { 00829 if (TreeMem[start_node].children[q]==NO_CHILD) 00830 continue ; 00831 num_used++ ; 00832 q_used=q ; 00833 } 00834 if (num_used>1) 00835 { 00836 if (depth>=degree-2) 00837 return -1 ; 00838 for (int32_t q=0; q<4; q++) 00839 { 00840 if (TreeMem[start_node].children[q]==NO_CHILD) 00841 continue ; 00842 int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ; 00843 if (num<=2) 00844 continue ; 00845 int32_t node=get_node() ; 00846 00847 int32_t last_node=TreeMem[start_node].children[q] ; 00848 if (weights_in_tree) 00849 { 00850 ASSERT(weights[depth]!=0.0) ; 00851 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ; 00852 } 00853 else 00854 TreeMem[node].weight=TreeMem[last_node].weight ; 00855 00856 #ifdef TRIE_CHECK_EVERYTHING 00857 TreeMem[node].has_seq=true ; 00858 #endif 00859 memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ; 00860 for (int32_t n=0; n<num; n++) 00861 { 00862 ASSERT(depth+n+1<=degree-1) ; 00863 ASSERT(last_node!=NO_CHILD) ; 00864 if (depth+n+1==degree-1) 00865 { 00866 TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats) ; 00867 int32_t k ; 00868 for (k=0; k<4; k++) 00869 if (TreeMem[last_node].child_weights[k]!=0.0) 00870 break ; 00871 if (k==4) 00872 break ; 00873 TreeMem[node].seq[n]=k ; 00874 break ; 00875 } 00876 else 00877 { 00878 TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats) ; 00879 int32_t k ; 00880 for (k=0; k<4; k++) 00881 if (TreeMem[last_node].children[k]!=NO_CHILD) 00882 break ; 00883 if (k==4) 00884 break ; 00885 TreeMem[node].seq[n]=k ; 00886 last_node=TreeMem[last_node].children[k] ; 00887 } 00888 } 00889 TreeMem[start_node].children[q]=-node ; 00890 } 00891 return -1 ; 00892 } 00893 if (num_used==0) 00894 return 0 ; 00895 00896 ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ; 00897 if (ret<0) 00898 return ret ; 00899 return ret+1 ; 00900 } 00901 00902 00903 template <class Trie> 00904 bool CTrie<Trie>::compare_traverse( 00905 int32_t node, const CTrie<Trie> & other, int32_t other_node) 00906 { 00907 SG_DEBUG("checking nodes %i and %i\n", node, other_node) ; 00908 if (fabs(TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5) 00909 { 00910 SG_DEBUG( "CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.TreeMem[other_node].weight) ; 00911 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00912 display_node(node) ; 00913 SG_DEBUG( "============================================================\n") ; 00914 other.display_node(other_node) ; 00915 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00916 return false ; 00917 } 00918 00919 #ifdef TRIE_CHECK_EVERYTHING 00920 if (TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq) 00921 { 00922 SG_DEBUG( "CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq) ; 00923 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00924 display_node(node) ; 00925 SG_DEBUG( "============================================================\n") ; 00926 other.display_node(other_node) ; 00927 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00928 return false ; 00929 } 00930 if (TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats) 00931 { 00932 SG_DEBUG( "CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats) ; 00933 return false ; 00934 } 00935 if (other.TreeMem[other_node].has_floats) 00936 { 00937 for (int32_t q=0; q<4; q++) 00938 if (fabs(TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5) 00939 { 00940 SG_DEBUG( "CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q]) ; 00941 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00942 display_node(node) ; 00943 SG_DEBUG( "============================================================\n") ; 00944 other.display_node(other_node) ; 00945 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00946 return false ; 00947 } 00948 } 00949 if (other.TreeMem[other_node].has_seq) 00950 { 00951 for (int32_t q=0; q<16; q++) 00952 if ((TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4))) 00953 { 00954 SG_DEBUG( "CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q]) ; 00955 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00956 display_node(node) ; 00957 SG_DEBUG( "============================================================\n") ; 00958 other.display_node(other_node) ; 00959 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00960 return false ; 00961 } 00962 } 00963 if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats) 00964 { 00965 for (int32_t q=0; q<4; q++) 00966 { 00967 if ((TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD)) 00968 continue ; 00969 if ((TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD)) 00970 { 00971 SG_DEBUG( "CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q]) ; 00972 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00973 display_node(node) ; 00974 SG_DEBUG( "============================================================\n") ; 00975 other.display_node(other_node) ; 00976 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00977 return false ; 00978 } 00979 if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.TreeMem[other_node].children[q]))) 00980 return false ; 00981 } 00982 } 00983 #else 00984 SG_ERROR( "not implemented\n") ; 00985 #endif 00986 00987 return true ; 00988 } 00989 00990 template <class Trie> 00991 bool CTrie<Trie>::compare(const CTrie<Trie> & other) 00992 { 00993 bool ret=true ; 00994 for (int32_t i=0; i<length; i++) 00995 if (!compare_traverse(trees[i], other, other.trees[i])) 00996 return false ; 00997 else 00998 SG_DEBUG("two tries at %i identical\n", i) ; 00999 01000 return ret ; 01001 } 01002 01003 template <class Trie> 01004 bool CTrie<Trie>::find_node( 01005 int32_t node, int32_t * trace, int32_t& trace_len) const 01006 { 01007 #ifdef TRIE_CHECK_EVERYTHING 01008 ASSERT(trace_len-1>=0) ; 01009 ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax)) 01010 if (TreeMem[trace[trace_len-1]].has_seq) 01011 return false ; 01012 if (TreeMem[trace[trace_len-1]].has_floats) 01013 return false ; 01014 01015 for (int32_t q=0; q<4; q++) 01016 { 01017 if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD) 01018 continue ; 01019 int32_t tl=trace_len+1 ; 01020 if (TreeMem[trace[trace_len-1]].children[q]>=0) 01021 trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ; 01022 else 01023 trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ; 01024 01025 if (trace[trace_len]==node) 01026 { 01027 trace_len=tl ; 01028 return true ; 01029 } 01030 if (find_node(node, trace, tl)) 01031 { 01032 trace_len=tl ; 01033 return true ; 01034 } 01035 } 01036 trace_len=0 ; 01037 return false ; 01038 #else 01039 SG_ERROR( "not implemented\n") ; 01040 return false ; 01041 #endif 01042 } 01043 01044 template <class Trie> 01045 void CTrie<Trie>::display_node(int32_t node) const 01046 { 01047 #ifdef TRIE_CHECK_EVERYTHING 01048 int32_t * trace=new int32_t[2*degree] ; 01049 int32_t trace_len=-1 ; 01050 bool found = false ; 01051 int32_t tree=-1 ; 01052 for (tree=0; tree<length; tree++) 01053 { 01054 trace[0]=trees[tree] ; 01055 trace_len=1 ; 01056 found=find_node(node, trace, trace_len) ; 01057 if (found) 01058 break ; 01059 } 01060 ASSERT(found) ; 01061 SG_PRINT( "position %i trace: ", tree) ; 01062 01063 for (int32_t i=0; i<trace_len-1; i++) 01064 { 01065 int32_t branch=-1 ; 01066 for (int32_t q=0; q<4; q++) 01067 if (abs(TreeMem[trace[i]].children[q])==trace[i+1]) 01068 { 01069 branch=q; 01070 break ; 01071 } 01072 ASSERT(branch!=-1) ; 01073 char acgt[5]="ACGT" ; 01074 SG_PRINT( "%c", acgt[branch]) ; 01075 } 01076 SG_PRINT( "\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats) ; 01077 if (TreeMem[node].has_floats) 01078 { 01079 for (int32_t q=0; q<4; q++) 01080 SG_PRINT( "child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q]) ; 01081 } 01082 if (TreeMem[node].has_seq) 01083 { 01084 for (int32_t q=0; q<16; q++) 01085 SG_PRINT( "seq[%i] = %i\n", q, TreeMem[node].seq[q]) ; 01086 } 01087 if (!TreeMem[node].has_seq && !TreeMem[node].has_floats) 01088 { 01089 for (int32_t q=0; q<4; q++) 01090 { 01091 if (TreeMem[node].children[q]!=NO_CHILD) 01092 { 01093 SG_PRINT( "children[%i] = %i -> \n", q, TreeMem[node].children[q]) ; 01094 display_node(abs(TreeMem[node].children[q])) ; 01095 } 01096 else 01097 SG_PRINT( "children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q]) ; 01098 } 01099 01100 } 01101 01102 delete[] trace ; 01103 #else 01104 SG_ERROR( "not implemented\n") ; 01105 #endif 01106 } 01107 01108 01109 template <class Trie> CTrie<Trie>::~CTrie() 01110 { 01111 destroy() ; 01112 01113 free(TreeMem) ; 01114 } 01115 01116 template <class Trie> void CTrie<Trie>::destroy() 01117 { 01118 if (trees!=NULL) 01119 { 01120 delete_trees(); 01121 for (int32_t i=0; i<length; i++) 01122 trees[i] = NO_CHILD; 01123 delete[] trees; 01124 01125 TreeMemPtr=0; 01126 length=0; 01127 trees=NULL; 01128 } 01129 } 01130 01131 template <class Trie> void CTrie<Trie>::set_degree(int32_t d) 01132 { 01133 delete_trees(get_use_compact_terminal_nodes()); 01134 degree=d; 01135 } 01136 01137 template <class Trie> void CTrie<Trie>::create( 01138 int32_t len, bool p_use_compact_terminal_nodes) 01139 { 01140 destroy(); 01141 01142 trees=new int32_t[len] ; 01143 TreeMemPtr=0 ; 01144 for (int32_t i=0; i<len; i++) 01145 trees[i]=get_node(degree==1); 01146 length = len ; 01147 01148 use_compact_terminal_nodes=p_use_compact_terminal_nodes ; 01149 } 01150 01151 01152 template <class Trie> void CTrie<Trie>::delete_trees( 01153 bool p_use_compact_terminal_nodes) 01154 { 01155 if (trees==NULL) 01156 return; 01157 01158 TreeMemPtr=0 ; 01159 for (int32_t i=0; i<length; i++) 01160 trees[i]=get_node(degree==1); 01161 01162 use_compact_terminal_nodes=p_use_compact_terminal_nodes ; 01163 } 01164 01165 template <class Trie> 01166 float64_t CTrie<Trie>::compute_abs_weights_tree(int32_t tree, int32_t depth) 01167 { 01168 float64_t ret=0 ; 01169 01170 if (tree==NO_CHILD) 01171 return 0 ; 01172 TRIE_ASSERT(tree>=0) ; 01173 01174 if (depth==degree-2) 01175 { 01176 ret+=(TreeMem[tree].weight) ; 01177 01178 for (int32_t k=0; k<4; k++) 01179 ret+=(TreeMem[tree].child_weights[k]) ; 01180 01181 return ret ; 01182 } 01183 01184 ret+=(TreeMem[tree].weight) ; 01185 01186 for (int32_t i=0; i<4; i++) 01187 if (TreeMem[tree].children[i]!=NO_CHILD) 01188 ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1) ; 01189 01190 return ret ; 01191 } 01192 01193 01194 template <class Trie> 01195 float64_t *CTrie<Trie>::compute_abs_weights(int32_t &len) 01196 { 01197 float64_t * sum=new float64_t[length*4] ; 01198 for (int32_t i=0; i<length*4; i++) 01199 sum[i]=0 ; 01200 len=length ; 01201 01202 for (int32_t i=0; i<length; i++) 01203 { 01204 TRIE_ASSERT(trees[i]!=NO_CHILD) ; 01205 for (int32_t k=0; k<4; k++) 01206 { 01207 sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ; 01208 } 01209 } 01210 01211 return sum ; 01212 } 01213 01214 template <class Trie> 01215 void CTrie<Trie>::add_example_to_tree_mismatch_recursion( 01216 int32_t tree, int32_t i, float64_t alpha, 01217 int32_t *vec, int32_t len_rem, 01218 int32_t degree_rec, int32_t mismatch_rec, 01219 int32_t max_mismatch, float64_t * weights) 01220 { 01221 if (tree==NO_CHILD) 01222 tree=trees[i] ; 01223 TRIE_ASSERT(tree!=NO_CHILD) ; 01224 01225 if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree)) 01226 return ; 01227 const int32_t other[4][3] = { {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ; 01228 01229 int32_t subtree = NO_CHILD ; 01230 01231 if (degree_rec==degree-1) 01232 { 01233 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ; 01234 if (weights_in_tree) 01235 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]; 01236 else 01237 if (weights[degree_rec]!=0.0) 01238 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec]; 01239 if (mismatch_rec+1<=max_mismatch) 01240 for (int32_t o=0; o<3; o++) 01241 { 01242 if (weights_in_tree) 01243 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]; 01244 else 01245 if (weights[degree_rec]!=0.0) 01246 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec]; 01247 } 01248 return ; 01249 } 01250 else 01251 { 01252 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01253 if (TreeMem[tree].children[vec[0]]!=NO_CHILD) 01254 { 01255 subtree=TreeMem[tree].children[vec[0]] ; 01256 if (weights_in_tree) 01257 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]; 01258 else 01259 if (weights[degree_rec]!=0.0) 01260 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec]; 01261 } 01262 else 01263 { 01264 int32_t tmp = get_node(degree_rec==degree-2); 01265 ASSERT(tmp>=0) ; 01266 TreeMem[tree].children[vec[0]]=tmp ; 01267 subtree=tmp ; 01268 #ifdef TRIE_CHECK_EVERYTHING 01269 if (degree_rec==degree-2) 01270 TreeMem[subtree].has_floats=true ; 01271 #endif 01272 if (weights_in_tree) 01273 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ; 01274 else 01275 if (weights[degree_rec]!=0.0) 01276 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ; 01277 else 01278 TreeMem[subtree].weight = 0.0 ; 01279 } 01280 add_example_to_tree_mismatch_recursion(subtree, i, alpha, 01281 &vec[1], len_rem-1, 01282 degree_rec+1, mismatch_rec, max_mismatch, weights) ; 01283 01284 if (mismatch_rec+1<=max_mismatch) 01285 { 01286 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01287 for (int32_t o=0; o<3; o++) 01288 { 01289 int32_t ot = other[vec[0]][o] ; 01290 if (TreeMem[tree].children[ot]!=NO_CHILD) 01291 { 01292 subtree=TreeMem[tree].children[ot] ; 01293 if (weights_in_tree) 01294 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]; 01295 else 01296 if (weights[degree_rec]!=0.0) 01297 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec]; 01298 } 01299 else 01300 { 01301 int32_t tmp = get_node(degree_rec==degree-2); 01302 ASSERT(tmp>=0) ; 01303 TreeMem[tree].children[ot]=tmp ; 01304 subtree=tmp ; 01305 #ifdef TRIE_CHECK_EVERYTHING 01306 if (degree_rec==degree-2) 01307 TreeMem[subtree].has_floats=true ; 01308 #endif 01309 01310 if (weights_in_tree) 01311 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ; 01312 else 01313 if (weights[degree_rec]!=0.0) 01314 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ; 01315 else 01316 TreeMem[subtree].weight = 0.0 ; 01317 } 01318 01319 add_example_to_tree_mismatch_recursion(subtree, i, alpha, 01320 &vec[1], len_rem-1, 01321 degree_rec+1, mismatch_rec+1, max_mismatch, weights) ; 01322 } 01323 } 01324 } 01325 } 01326 01327 template <class Trie> 01328 void CTrie<Trie>::compute_scoring_helper( 01329 int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d, 01330 int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset, 01331 int32_t offs, float64_t* result) 01332 { 01333 if (i+j<num_feat) 01334 { 01335 float64_t decay=1.0; //no decay by default 01336 //if (j>d) 01337 // decay=pow(0.5,j); //marginalize out lower order matches 01338 01339 if (j<degree-1) 01340 { 01341 for (int32_t k=0; k<num_sym; k++) 01342 { 01343 if (TreeMem[tree].children[k]!=NO_CHILD) 01344 { 01345 int32_t child=TreeMem[tree].children[k]; 01346 //continue recursion if not yet at max_degree, else add to result 01347 if (d<max_degree-1) 01348 compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result); 01349 else 01350 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight; 01351 01353 if (d==0) 01354 compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result); 01355 } 01356 } 01357 } 01358 else if (j==degree-1) 01359 { 01360 for (int32_t k=0; k<num_sym; k++) 01361 { 01362 //continue recursion if not yet at max_degree, else add to result 01363 if (d<max_degree-1 && i<num_feat-1) 01364 compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result); 01365 else 01366 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k]; 01367 } 01368 } 01369 } 01370 } 01371 01372 template <class Trie> 01373 void CTrie<Trie>::traverse( 01374 int32_t tree, const int32_t p, struct TreeParseInfo info, 01375 const int32_t depth, int32_t* const x, const int32_t k) 01376 { 01377 const int32_t num_sym = info.num_sym; 01378 const int32_t y0 = info.y0; 01379 const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] ); 01380 //const int32_t temp = info.substrs[depth]*num_sym - ( (depth<=k) ? 0 : info.nofsKmers[k] * x[depth-k-1] ); 01381 //if( !( info.y0 == temp ) ) { 01382 // printf( "\n temp=%d y0=%d k=%d depth=%d \n", temp, info.y0, k, depth ); 01383 //} 01384 //ASSERT( info.y0 == temp ); 01385 int32_t sym; 01386 ASSERT( depth < degree ); 01387 //ASSERT( 0 <= info.substrs[depth] && info.substrs[depth] < info.nofsKmers[k] ); 01388 if (depth<degree-1) 01389 { 01390 for( sym=0; sym<num_sym; ++sym ) { 01391 const int32_t childNum = TreeMem[tree].children[ sym ]; 01392 if( childNum != NO_CHILD ) { 01393 int32_t child = childNum ; 01394 x[depth] = sym; 01395 info.substrs[depth+1] = y0 + sym; 01396 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym; 01397 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) ); 01398 count( TreeMem[child].weight, depth, info, p, x, k ); 01399 traverse( child, p, info, depth+1, x, k ); 01400 x[depth] = -1; 01401 } 01402 } 01403 } 01404 else if( depth == degree-1 ) 01405 { 01406 for( sym=0; sym<num_sym; ++sym ) { 01407 const float64_t w = TreeMem[tree].child_weights[ sym ]; 01408 if( w != 0.0 ) { 01409 x[depth] = sym; 01410 info.substrs[depth+1] = y0 + sym; 01411 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym; 01412 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) ); 01413 count( w, depth, info, p, x, k ); 01414 x[depth] = -1; 01415 } 01416 } 01417 } 01418 //info.substrs[depth+1] = -1; 01419 //info.y0 = temp; 01420 } 01421 01422 template <class Trie> 01423 void CTrie<Trie>::count( 01424 const float64_t w, const int32_t depth, const struct TreeParseInfo info, 01425 const int32_t p, int32_t* x, const int32_t k) 01426 { 01427 ASSERT( fabs(w) < 1e10 ); 01428 ASSERT( x[depth] >= 0 ); 01429 ASSERT( x[depth+1] < 0 ); 01430 if ( depth < k ) { 01431 return; 01432 } 01433 //ASSERT( info.margFactors[ depth-k ] == pow( 0.25, depth-k ) ); 01434 const int32_t nofKmers = info.nofsKmers[k]; 01435 const float64_t margWeight = w * info.margFactors[ depth-k ]; 01436 const int32_t m_a = depth - k + 1; 01437 const int32_t m_b = info.num_feat - p; 01438 const int32_t m = ( m_a < m_b ) ? m_a : m_b; 01439 // all proper k-substrings 01440 const int32_t offset0 = nofKmers * p; 01441 register int32_t i; 01442 register int32_t offset; 01443 offset = offset0; 01444 for( i = 0; i < m; ++i ) { 01445 const int32_t y = info.substrs[i+k+1]; 01446 info.C_k[ y + offset ] += margWeight; 01447 offset += nofKmers; 01448 } 01449 if( depth > k ) { 01450 // k-prefix 01451 const int32_t offsR = info.substrs[k+1] + offset0; 01452 info.R_k[offsR] += margWeight; 01453 // k-suffix 01454 if( p+depth-k < info.num_feat ) { 01455 const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k); 01456 info.L_k[offsL] += margWeight; 01457 } 01458 } 01459 // # N.x = substring represented by N 01460 // # N.d = length of N.x 01461 // # N.s = starting position of N.x 01462 // # N.w = weight for feature represented by N 01463 // if( N.d >= k ) 01464 // margContrib = w / 4^(N.d-k) 01465 // for i = 1 to (N.d-k+1) 01466 // y = N.x[i:(i+k-1)] # overlapped k-mer 01467 // C_k[ N.s+i-1, y ] += margContrib 01468 // end; 01469 // if( N.d > k ) 01470 // L_k[ N.s+N.d-k, N.x[N.d-k+(1:k)] ] += margContrib # j-suffix of N.x 01471 // R_k[ N.s, N.x[1:k] ] += margContrib # j-prefix of N.x 01472 // end; 01473 // end; 01474 } 01475 01476 template <class Trie> 01477 void CTrie<Trie>::add_to_trie( 01478 int32_t i, int32_t seq_offset, int32_t * vec, float32_t alpha, 01479 float64_t *weights, bool degree_times_position_weights) 01480 { 01481 int32_t tree = trees[i] ; 01482 //ASSERT(seq_offset==0) ; 01483 01484 int32_t max_depth = 0 ; 01485 float64_t* weights_column ; 01486 if (degree_times_position_weights) 01487 weights_column = &weights[(i+seq_offset)*degree] ; 01488 else 01489 weights_column = weights ; 01490 01491 if (weights_in_tree) 01492 { 01493 for (int32_t j=0; (j<degree) && (i+j<length); j++) 01494 if (CMath::abs(weights_column[j]*alpha)>0) 01495 max_depth = j+1 ; 01496 } 01497 else 01498 // don't use the weights 01499 max_depth=degree ; 01500 01501 for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++) 01502 { 01503 TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4)) ; 01504 if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD)) 01505 { 01506 if (TreeMem[tree].children[vec[i+j+seq_offset]]<0) 01507 { 01508 // special treatment of the next nodes 01509 TRIE_ASSERT(j >= degree-16) ; 01510 // get the right element 01511 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01512 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01513 int32_t node = - TreeMem[tree].children[vec[i+j+seq_offset]] ; 01514 01515 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax)) ; 01516 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq) ; 01517 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats) ; 01518 01519 // check whether the same string is stored 01520 int32_t mismatch_pos = -1 ; 01521 { 01522 int32_t k ; 01523 for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++) 01524 { 01525 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ; 01526 // ### 01527 if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER)) 01528 fprintf(stderr, "+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ; 01529 TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER)) ; 01530 TRIE_ASSERT(k<16) ; 01531 if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k]) 01532 { 01533 mismatch_pos=k ; 01534 break ; 01535 } 01536 } 01537 } 01538 01539 // what happens when the .seq sequence is longer than vec? should we branch??? 01540 01541 if (mismatch_pos==-1) 01542 // if so, then just increase the weight by alpha and stop 01543 TreeMem[node].weight+=alpha ; 01544 else 01545 // otherwise 01546 // 1. replace current node with new node 01547 // 2. create new nodes until mismatching positon 01548 // 2. add a branch with old string (old node) and the new string (new node) 01549 { 01550 // replace old node 01551 int32_t last_node=tree ; 01552 01553 // create new nodes until mismatch 01554 int32_t k ; 01555 for (k=0; k<mismatch_pos; k++) 01556 { 01557 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ; 01558 TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k]) ; 01559 01560 int32_t tmp=get_node(); 01561 TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ; 01562 last_node=tmp ; 01563 if (weights_in_tree) 01564 TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ; 01565 else 01566 TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ; 01567 TRIE_ASSERT(j+k!=degree-1) ; 01568 } 01569 if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER)) 01570 fprintf(stderr, "**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ; 01571 ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER)) ; 01572 TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos]) ; 01573 01574 if (j+k==degree-1) 01575 { 01576 // init child weights with zero if after dropping out 01577 // of the k<mismatch_pos loop we are one level below degree 01578 // (keep this even after get_node() change!) 01579 for (int32_t q=0; q<4; q++) 01580 TreeMem[last_node].child_weights[q]=0.0 ; 01581 if (weights_in_tree) 01582 { 01583 if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER 01584 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ; 01585 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ; 01586 } 01587 else 01588 { 01589 if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER 01590 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ; 01591 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ; 01592 } 01593 01594 #ifdef TRIE_CHECK_EVERYTHING 01595 TreeMem[last_node].has_floats=true ; 01596 #endif 01597 } 01598 else 01599 { 01600 // the branch for the existing string 01601 if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER 01602 { 01603 TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ; 01604 01605 // move string by mismatch_pos positions 01606 for (int32_t q=0; q<16; q++) 01607 { 01608 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length)) 01609 TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ; 01610 else 01611 TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ; 01612 } 01613 #ifdef TRIE_CHECK_EVERYTHING 01614 TreeMem[node].has_seq=true ; 01615 #endif 01616 } 01617 01618 // the new branch 01619 TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4)) ; 01620 int32_t tmp = get_node() ; 01621 TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ; 01622 last_node=tmp ; 01623 01624 TreeMem[last_node].weight = alpha ; 01625 #ifdef TRIE_CHECK_EVERYTHING 01626 TreeMem[last_node].has_seq = true ; 01627 #endif 01628 memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ; 01629 for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++) 01630 TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ; 01631 } 01632 } 01633 break ; 01634 } 01635 else 01636 { 01637 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ; 01638 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ; 01639 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01640 if (weights_in_tree) 01641 TreeMem[tree].weight += alpha*weights_column[j]; 01642 else 01643 TreeMem[tree].weight += alpha ; 01644 } 01645 } 01646 else if (j==degree-1) 01647 { 01648 // special treatment of the last node 01649 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01650 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ; 01651 if (weights_in_tree) 01652 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ; 01653 else 01654 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha; 01655 01656 break; 01657 } 01658 else 01659 { 01660 bool use_seq = use_compact_terminal_nodes && (j>degree-16) ; 01661 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01662 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01663 01664 int32_t tmp = get_node((j==degree-2) && (!use_seq)); 01665 if (use_seq) 01666 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ; 01667 else 01668 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ; 01669 tree=tmp ; 01670 01671 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ; 01672 #ifdef TRIE_CHECK_EVERYTHING 01673 TreeMem[tree].has_seq = use_seq ; 01674 #endif 01675 if (use_seq) 01676 { 01677 TreeMem[tree].weight = alpha ; 01678 // important to have the terminal characters (see ###) 01679 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ; 01680 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++) 01681 { 01682 TRIE_ASSERT(q<16) ; 01683 TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ; 01684 } 01685 break ; 01686 } 01687 else 01688 { 01689 if (weights_in_tree) 01690 TreeMem[tree].weight = alpha*weights_column[j] ; 01691 else 01692 TreeMem[tree].weight = alpha ; 01693 #ifdef TRIE_CHECK_EVERYTHING 01694 if (j==degree-2) 01695 TreeMem[tree].has_floats = true ; 01696 #endif 01697 } 01698 } 01699 } 01700 } 01701 01702 template <class Trie> 01703 float64_t CTrie<Trie>::compute_by_tree_helper( 01704 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 01705 int32_t weight_pos, float64_t* weights, 01706 bool degree_times_position_weights) 01707 { 01708 int32_t tree = trees[tree_pos] ; 01709 01710 if ((position_weights!=NULL) && (position_weights[weight_pos]==0)) 01711 return 0.0; 01712 01713 float64_t *weights_column=NULL ; 01714 if (degree_times_position_weights) 01715 weights_column=&weights[weight_pos*degree] ; 01716 else // weights is a vector (1 x degree) 01717 weights_column=weights ; 01718 01719 float64_t sum=0 ; 01720 for (int32_t j=0; seq_pos+j < len; j++) 01721 { 01722 TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0)) ; 01723 01724 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01725 { 01726 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01727 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01728 { 01729 tree = - TreeMem[tree].children[vec[seq_pos+j]]; 01730 TRIE_ASSERT(tree>=0) ; 01731 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01732 float64_t this_weight=0.0 ; 01733 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01734 { 01735 TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0)) ; 01736 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01737 break ; 01738 this_weight += weights_column[j+k] ; 01739 } 01740 sum += TreeMem[tree].weight * this_weight ; 01741 break ; 01742 } 01743 else 01744 { 01745 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01746 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01747 if (weights_in_tree) 01748 sum += TreeMem[tree].weight ; 01749 else 01750 sum += TreeMem[tree].weight * weights_column[j] ; 01751 } ; 01752 } 01753 else 01754 { 01755 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01756 if (j==degree-1) 01757 { 01758 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ; 01759 if (weights_in_tree) 01760 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01761 else 01762 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ; 01763 } 01764 else 01765 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01766 01767 break; 01768 } 01769 } 01770 01771 if (position_weights!=NULL) 01772 return sum*position_weights[weight_pos] ; 01773 else 01774 return sum ; 01775 } 01776 01777 template <class Trie> 01778 void CTrie<Trie>::compute_by_tree_helper( 01779 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 01780 int32_t weight_pos, float64_t* LevelContrib, float64_t factor, 01781 int32_t mkl_stepsize, float64_t * weights, 01782 bool degree_times_position_weights) 01783 { 01784 int32_t tree = trees[tree_pos] ; 01785 if (factor==0) 01786 return ; 01787 01788 if (position_weights!=NULL) 01789 { 01790 factor *= position_weights[weight_pos] ; 01791 if (factor==0) 01792 return ; 01793 if (!degree_times_position_weights) // with position_weigths, weights is a vector (1 x degree) 01794 { 01795 for (int32_t j=0; seq_pos+j<len; j++) 01796 { 01797 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01798 { 01799 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01800 { 01801 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01802 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01803 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01804 { 01805 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01806 break ; 01807 if (weights_in_tree) 01808 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01809 else 01810 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ; 01811 } 01812 break ; 01813 } 01814 else 01815 { 01816 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01817 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01818 if (weights_in_tree) 01819 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01820 else 01821 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ; 01822 } 01823 } 01824 else 01825 { 01826 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01827 if (j==degree-1) 01828 { 01829 if (weights_in_tree) 01830 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01831 else 01832 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ; 01833 } 01834 } 01835 } 01836 } 01837 else // with position_weigths, weights is a matrix (len x degree) 01838 { 01839 for (int32_t j=0; seq_pos+j<len; j++) 01840 { 01841 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01842 { 01843 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01844 { 01845 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01846 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01847 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01848 { 01849 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01850 break ; 01851 if (weights_in_tree) 01852 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01853 else 01854 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ; 01855 } 01856 break ; 01857 } 01858 else 01859 { 01860 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01861 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01862 if (weights_in_tree) 01863 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01864 else 01865 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ; 01866 } 01867 } 01868 else 01869 { 01870 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01871 if (j==degree-1) 01872 { 01873 if (weights_in_tree) 01874 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01875 else 01876 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ; 01877 } 01878 break ; 01879 } 01880 } 01881 } 01882 } 01883 else if (!degree_times_position_weights) // no position_weigths, weights is a vector (1 x degree) 01884 { 01885 for (int32_t j=0; seq_pos+j<len; j++) 01886 { 01887 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01888 { 01889 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01890 { 01891 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01892 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01893 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01894 { 01895 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01896 break ; 01897 if (weights_in_tree) 01898 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ; 01899 else 01900 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ; 01901 } 01902 break ; 01903 } 01904 else 01905 { 01906 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01907 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01908 if (weights_in_tree) 01909 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ; 01910 else 01911 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ; 01912 } 01913 } 01914 else 01915 { 01916 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01917 if (j==degree-1) 01918 { 01919 if (weights_in_tree) 01920 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01921 else 01922 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ; 01923 } 01924 break ; 01925 } 01926 } 01927 } 01928 else // no position_weigths, weights is a matrix (len x degree) 01929 { 01930 /*if (!position_mask) 01931 { 01932 position_mask = new bool[len] ; 01933 for (int32_t i=0; i<len; i++) 01934 { 01935 position_mask[i]=false ; 01936 01937 for (int32_t j=0; j<degree; j++) 01938 if (weights[i*degree+j]!=0.0) 01939 { 01940 position_mask[i]=true ; 01941 break ; 01942 } 01943 } 01944 } 01945 if (position_mask[weight_pos]==0) 01946 return ;*/ 01947 01948 for (int32_t j=0; seq_pos+j<len; j++) 01949 { 01950 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01951 { 01952 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01953 { 01954 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01955 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01956 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01957 { 01958 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01959 break ; 01960 if (weights_in_tree) 01961 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ; 01962 else 01963 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ; 01964 } 01965 break ; 01966 } 01967 else 01968 { 01969 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01970 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01971 if (weights_in_tree) 01972 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ; 01973 else 01974 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ; 01975 } 01976 } 01977 else 01978 { 01979 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01980 if (j==degree-1) 01981 { 01982 if (weights_in_tree) 01983 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01984 else 01985 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ; 01986 } 01987 break ; 01988 } 01989 } 01990 } 01991 } 01992 01993 template <class Trie> 01994 void CTrie<Trie>::fill_backtracking_table_recursion( 01995 Trie* tree, int32_t depth, uint64_t seq, float64_t value, 01996 CDynamicArray<ConsensusEntry>* table, float64_t* weights) 01997 { 01998 float64_t w=1.0; 01999 02000 if (weights_in_tree || depth==0) 02001 value+=tree->weight; 02002 else 02003 { 02004 w=weights[degree-1]; 02005 value+=weights[depth-1]*tree->weight; 02006 } 02007 02008 if (degree-1==depth) 02009 { 02010 for (int32_t sym=0; sym<4; sym++) 02011 { 02012 float64_t v=w*tree->child_weights[sym]; 02013 if (v!=0.0) 02014 { 02015 ConsensusEntry entry; 02016 entry.bt=-1; 02017 entry.score=value+v; 02018 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1)); 02019 02020 table->append_element(entry); 02021 } 02022 } 02023 } 02024 else 02025 { 02026 for (int32_t sym=0; sym<4; sym++) 02027 { 02028 uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1)); 02029 if (tree->children[sym] != NO_CHILD) 02030 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights); 02031 } 02032 } 02033 } 02034 02035 template <class Trie> 02036 float64_t CTrie<Trie>::get_cumulative_score( 02037 int32_t pos, uint64_t seq, int32_t deg, float64_t* weights) 02038 { 02039 float64_t result=0.0; 02040 02041 //SG_PRINT("pos:%i length:%i deg:%i seq:0x%0llx...\n", pos, length, deg, seq); 02042 02043 for (int32_t i=pos; i<pos+deg && i<length; i++) 02044 { 02045 //SG_PRINT("loop %d\n", i); 02046 Trie* tree = &TreeMem[trees[i]]; 02047 02048 for (int32_t d=0; d<deg-i+pos; d++) 02049 { 02050 //SG_PRINT("loop degree %d shit: %d\n", d, (2*(deg-1-d-i+pos))); 02051 ASSERT(d-1<degree); 02052 int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3); 02053 02054 float64_t w=1.0; 02055 if (!weights_in_tree) 02056 w=weights[d]; 02057 02058 ASSERT(tree->children[sym] != NO_CHILD); 02059 tree=&TreeMem[tree->children[sym]]; 02060 result+=w*tree->weight; 02061 } 02062 } 02063 //SG_PRINT("cum: %f\n", result); 02064 return result; 02065 } 02066 02067 template <class Trie> 02068 void CTrie<Trie>::fill_backtracking_table( 02069 int32_t pos, CDynamicArray<ConsensusEntry>* prev, 02070 CDynamicArray<ConsensusEntry>* cur, bool cumulative, float64_t* weights) 02071 { 02072 ASSERT(pos>=0 && pos<length); 02073 ASSERT(!use_compact_terminal_nodes); 02074 02075 Trie* t = &TreeMem[trees[pos]]; 02076 02077 fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights); 02078 02079 02080 if (cumulative) 02081 { 02082 int32_t num_cur=cur->get_num_elements(); 02083 for (int32_t i=0; i<num_cur; i++) 02084 { 02085 ConsensusEntry entry=cur->get_element(i); 02086 entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights); 02087 cur->set_element(entry,i); 02088 //SG_PRINT("cum: str:0%0llx sc:%f bt:%d\n",entry.string,entry.score,entry.bt); 02089 } 02090 } 02091 02092 //if previous tree exists find maximum scoring path 02093 //for each element in cur and update bt table 02094 if (prev) 02095 { 02096 int32_t num_cur=cur->get_num_elements(); 02097 int32_t num_prev=prev->get_num_elements(); 02098 02099 for (int32_t i=0; i<num_cur; i++) 02100 { 02101 //uint64_t str_cur_old= cur->get_element(i).string; 02102 uint64_t str_cur= cur->get_element(i).string >> 2; 02103 //SG_PRINT("...cur:0x%0llx cur_noprfx:0x%0llx...\n", str_cur_old, str_cur); 02104 02105 int32_t bt=-1; 02106 float64_t max_score=0.0; 02107 02108 for (int32_t j=0; j<num_prev; j++) 02109 { 02110 //uint64_t str_prev_old= prev->get_element(j).string; 02111 uint64_t mask= 02112 ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1)))); 02113 uint64_t str_prev= mask & prev->get_element(j).string; 02114 //SG_PRINT("...prev:0x%0llx prev_nosfx:0x%0llx mask:%0llx...\n", str_prev_old, str_prev,mask); 02115 02116 if (str_cur == str_prev) 02117 { 02118 float64_t sc=prev->get_element(j).score+cur->get_element(i).score; 02119 if (bt==-1 || sc>max_score) 02120 { 02121 bt=j; 02122 max_score=sc; 02123 02124 //SG_PRINT("new max[%i,%i] = %f\n", j,i, max_score); 02125 } 02126 } 02127 } 02128 02129 ASSERT(bt!=-1); 02130 ConsensusEntry entry; 02131 entry.bt=bt; 02132 entry.score=max_score; 02133 entry.string=cur->get_element(i).string; 02134 cur->set_element(entry, i); 02135 //SG_PRINT("entry[%d]: str:0%0llx sc:%f bt:%d\n",i, entry.string,entry.score,entry.bt); 02136 } 02137 } 02138 } 02139 } 02140 #endif // _TRIE_H___