|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009 Sebastian J. Schultheiss and Soeren Sonnenburg 00008 * Copyright (C) 2009 Max-Planck-Society 00009 */ 00010 00011 #include "lib/common.h" 00012 #include "kernel/RegulatoryModulesStringKernel.h" 00013 #include "features/Features.h" 00014 #include "features/SimpleFeatures.h" 00015 #include "lib/io.h" 00016 00017 using namespace shogun; 00018 00019 CRegulatoryModulesStringKernel::CRegulatoryModulesStringKernel( 00020 int32_t size, float64_t w, int32_t d, int32_t s, int32_t wl) 00021 : CStringKernel<char>(size), width(w), degree(d), shift(s), window(wl), 00022 motif_positions_lhs(NULL), motif_positions_rhs(NULL), position_weights(NULL), weights(NULL) 00023 { 00024 } 00025 00026 CRegulatoryModulesStringKernel::CRegulatoryModulesStringKernel(CStringFeatures<char>* lstr, CStringFeatures<char>* rstr, 00027 CSimpleFeatures<uint16_t>* lpos, CSimpleFeatures<uint16_t>* rpos, 00028 float64_t w, int32_t d, int32_t s, int32_t wl, int32_t size) 00029 : CStringKernel<char>(size), width(w), degree(d), shift(s), window(wl), 00030 motif_positions_lhs(NULL), motif_positions_rhs(NULL), position_weights(NULL), weights(NULL) 00031 { 00032 set_motif_positions(lpos, rpos); 00033 init(lstr,rstr); 00034 } 00035 00036 CRegulatoryModulesStringKernel::~CRegulatoryModulesStringKernel() 00037 { 00038 SG_UNREF(motif_positions_lhs); 00039 SG_UNREF(motif_positions_rhs); 00040 } 00041 00042 bool CRegulatoryModulesStringKernel::init(CFeatures* l, CFeatures* r) 00043 { 00044 ASSERT(motif_positions_lhs); 00045 ASSERT(motif_positions_rhs); 00046 00047 if (l->get_num_vectors() != motif_positions_lhs->get_num_vectors()) 00048 SG_ERROR("Number of vectors does not agree (LHS: %d, Motif LHS: %d).\n", 00049 l->get_num_vectors(), motif_positions_lhs->get_num_vectors()); 00050 if (r->get_num_vectors() != motif_positions_rhs->get_num_vectors()) 00051 SG_ERROR("Number of vectors does not agree (RHS: %d, Motif RHS: %d).\n", 00052 r->get_num_vectors(), motif_positions_rhs->get_num_vectors()); 00053 00054 set_wd_weights(); 00055 CStringKernel<char>::init(l, r); 00056 return init_normalizer(); 00057 } 00058 00059 void CRegulatoryModulesStringKernel::set_motif_positions( 00060 CSimpleFeatures<uint16_t>* positions_lhs, CSimpleFeatures<uint16_t>* positions_rhs) 00061 { 00062 ASSERT(positions_lhs); 00063 ASSERT(positions_rhs); 00064 SG_UNREF(motif_positions_lhs); 00065 SG_UNREF(motif_positions_rhs); 00066 if (positions_lhs->get_num_features() != positions_rhs->get_num_features()) 00067 SG_ERROR("Number of dimensions does not agree.\n"); 00068 00069 motif_positions_lhs=positions_lhs; 00070 motif_positions_rhs=positions_rhs; 00071 SG_REF(positions_lhs); 00072 SG_REF(positions_rhs); 00073 } 00074 00075 float64_t CRegulatoryModulesStringKernel::compute(int32_t idx_a, int32_t idx_b) 00076 { 00077 ASSERT(motif_positions_lhs); 00078 ASSERT(motif_positions_rhs); 00079 00080 int32_t alen, blen; 00081 bool free_avec, free_bvec; 00082 char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec); 00083 char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec); 00084 00085 int32_t alen_pos, blen_pos; 00086 bool afree_pos, bfree_pos; 00087 uint16_t* positions_a = motif_positions_lhs->get_feature_vector(idx_a, alen_pos, afree_pos); 00088 uint16_t* positions_b = motif_positions_rhs->get_feature_vector(idx_b, blen_pos, bfree_pos); 00089 ASSERT(alen_pos==blen_pos); 00090 int32_t num_pos=alen_pos; 00091 00092 00093 float64_t result_rbf=0; 00094 float64_t result_wds=0; 00095 00096 for (int32_t p=0; p<num_pos; p++) 00097 { 00098 result_rbf+=CMath::sq(positions_a[p]-positions_b[p]); 00099 00100 for (int32_t p2=0; p2<num_pos; p2++) //p+1 and below * 2 00101 result_rbf+=CMath::sq( (positions_a[p]-positions_a[p2]) - (positions_b[p]-positions_b[p2]) ); 00102 00103 int32_t limit = window; 00104 if (window + positions_a[p] > alen) 00105 limit = alen - positions_a[p]; 00106 00107 if (window + positions_b[p] > blen) 00108 limit = CMath::min(limit, blen - positions_b[p]); 00109 00110 result_wds+=compute_wds(&avec[positions_a[p]], &bvec[positions_b[p]], 00111 limit); 00112 } 00113 00114 float64_t result=exp(-result_rbf/width)+result_wds; 00115 00116 ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec); 00117 ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec); 00118 ((CSimpleFeatures<uint16_t>*) lhs)->free_feature_vector(positions_a, idx_a, afree_pos); 00119 ((CSimpleFeatures<uint16_t>*) rhs)->free_feature_vector(positions_b, idx_b, bfree_pos); 00120 00121 return result; 00122 } 00123 00124 float64_t CRegulatoryModulesStringKernel::compute_wds( 00125 char* avec, char* bvec, int32_t len) 00126 { 00127 float64_t* max_shift_vec = new float64_t[shift]; 00128 float64_t sum0=0 ; 00129 for (int32_t i=0; i<shift; i++) 00130 max_shift_vec[i]=0 ; 00131 00132 // no shift 00133 for (int32_t i=0; i<len; i++) 00134 { 00135 if ((position_weights!=NULL) && (position_weights[i]==0.0)) 00136 continue ; 00137 00138 float64_t sumi = 0.0 ; 00139 for (int32_t j=0; (j<degree) && (i+j<len); j++) 00140 { 00141 if (avec[i+j]!=bvec[i+j]) 00142 break ; 00143 sumi += weights[j]; 00144 } 00145 if (position_weights!=NULL) 00146 sum0 += position_weights[i]*sumi ; 00147 else 00148 sum0 += sumi ; 00149 } ; 00150 00151 for (int32_t i=0; i<len; i++) 00152 { 00153 for (int32_t k=1; (k<=shift) && (i+k<len); k++) 00154 { 00155 if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0)) 00156 continue ; 00157 00158 float64_t sumi1 = 0.0 ; 00159 // shift in sequence a 00160 for (int32_t j=0; (j<degree) && (i+j+k<len); j++) 00161 { 00162 if (avec[i+j+k]!=bvec[i+j]) 00163 break ; 00164 sumi1 += weights[j]; 00165 } 00166 float64_t sumi2 = 0.0 ; 00167 // shift in sequence b 00168 for (int32_t j=0; (j<degree) && (i+j+k<len); j++) 00169 { 00170 if (avec[i+j]!=bvec[i+j+k]) 00171 break ; 00172 sumi2 += weights[j]; 00173 } 00174 if (position_weights!=NULL) 00175 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ; 00176 else 00177 max_shift_vec[k-1] += sumi1 + sumi2 ; 00178 } ; 00179 } 00180 00181 float64_t result = sum0 ; 00182 for (int32_t i=0; i<shift; i++) 00183 result += max_shift_vec[i]/(2*(i+1)) ; 00184 00185 delete[] max_shift_vec; 00186 return result ; 00187 } 00188 00189 void CRegulatoryModulesStringKernel::set_wd_weights() 00190 { 00191 ASSERT(degree>0); 00192 00193 delete[] weights; 00194 weights=new float64_t[degree]; 00195 00196 int32_t i; 00197 float64_t sum=0; 00198 for (i=0; i<degree; i++) 00199 { 00200 weights[i]=degree-i; 00201 sum+=weights[i]; 00202 } 00203 00204 for (i=0; i<degree; i++) 00205 weights[i]/=sum; 00206 }