|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2008 Gunnar Raetsch 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include "lib/common.h" 00012 #include "lib/io.h" 00013 #include "kernel/SimpleLocalityImprovedStringKernel.h" 00014 #include "features/Features.h" 00015 #include "features/StringFeatures.h" 00016 00017 using namespace shogun; 00018 00019 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel( 00020 int32_t size, int32_t l, int32_t id, int32_t od) 00021 : CStringKernel<char>(size), length(l), inner_degree(id), outer_degree(od), 00022 pyramid_weights(NULL) 00023 { 00024 } 00025 00026 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel( 00027 CStringFeatures<char>* l, CStringFeatures<char>* r, 00028 int32_t len, int32_t id, int32_t od) 00029 : CStringKernel<char>(10), length(len), inner_degree(id), outer_degree(od), 00030 pyramid_weights(NULL) 00031 { 00032 init(l, r); 00033 } 00034 00035 CSimpleLocalityImprovedStringKernel::~CSimpleLocalityImprovedStringKernel() 00036 { 00037 cleanup(); 00038 } 00039 00040 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r) 00041 { 00042 bool result = CStringKernel<char>::init(l,r); 00043 00044 if (!result) 00045 return false; 00046 int32_t num_features = ((CStringFeatures<char>*) l)->get_max_vector_length(); 00047 delete[] pyramid_weights; 00048 pyramid_weights = new float64_t[num_features]; 00049 00050 SG_INFO("initializing pyramid weights: size=%ld length=%i\n", 00051 num_features, length); 00052 00053 const int32_t PYRAL = 2 * length - 1; // total window length 00054 float64_t PYRAL_pot; 00055 int32_t DEGREE1_1 = (inner_degree & 0x1)==0; 00056 int32_t DEGREE1_1n = (inner_degree & ~0x1)!=0; 00057 int32_t DEGREE1_2 = (inner_degree & 0x2)!=0; 00058 int32_t DEGREE1_3 = (inner_degree & ~0x3)!=0; 00059 int32_t DEGREE1_4 = (inner_degree & 0x4)!=0; 00060 { 00061 float64_t PYRAL_ = PYRAL; 00062 PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_; 00063 if (DEGREE1_1n) 00064 { 00065 PYRAL_ *= PYRAL_; 00066 if (DEGREE1_2) 00067 PYRAL_pot *= PYRAL_; 00068 if (DEGREE1_3) 00069 { 00070 PYRAL_ *= PYRAL_; 00071 if (DEGREE1_4) 00072 PYRAL_pot *= PYRAL_; 00073 } 00074 } 00075 } 00076 00077 int32_t pyra_len = num_features-PYRAL+1; 00078 int32_t pyra_len2 = (int32_t) pyra_len/2; 00079 { 00080 int32_t j; 00081 for (j = 0; j < pyra_len; j++) 00082 pyramid_weights[j] = 4*((float64_t)((j < pyra_len2)? j+1 : pyra_len-j))/((float64_t)pyra_len); 00083 for (j = 0; j < pyra_len; j++) 00084 pyramid_weights[j] /= PYRAL_pot; 00085 } 00086 00087 return init_normalizer(); 00088 } 00089 00090 void CSimpleLocalityImprovedStringKernel::cleanup() 00091 { 00092 delete[] pyramid_weights; 00093 pyramid_weights = NULL; 00094 00095 CKernel::cleanup(); 00096 } 00097 00098 float64_t CSimpleLocalityImprovedStringKernel::dot_pyr (const char* const x1, 00099 const char* const x2, const int32_t NOF_NTS, const int32_t NTWIDTH, 00100 const int32_t DEGREE1, const int32_t DEGREE2, float64_t *pyra) 00101 { 00102 const int32_t PYRAL = 2*NTWIDTH-1; // total window length 00103 int32_t pyra_len, pyra_len2; 00104 float64_t pot, PYRAL_pot; 00105 float64_t sum; 00106 int32_t DEGREE1_1 = (DEGREE1 & 0x1)==0; 00107 int32_t DEGREE1_1n = (DEGREE1 & ~0x1)!=0; 00108 int32_t DEGREE1_2 = (DEGREE1 & 0x2)!=0; 00109 int32_t DEGREE1_3 = (DEGREE1 & ~0x3)!=0; 00110 int32_t DEGREE1_4 = (DEGREE1 & 0x4)!=0; 00111 { 00112 float64_t PYRAL_ = PYRAL; 00113 PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_; 00114 if (DEGREE1_1n) 00115 { 00116 PYRAL_ *= PYRAL_; 00117 if (DEGREE1_2) PYRAL_pot *= PYRAL_; 00118 if (DEGREE1_3) 00119 { 00120 PYRAL_ *= PYRAL_; 00121 if (DEGREE1_4) PYRAL_pot *= PYRAL_; 00122 } 00123 } 00124 } 00125 00126 ASSERT((DEGREE1 & ~0x7) == 0); 00127 ASSERT((DEGREE2 & ~0x7) == 0); 00128 00129 pyra_len = NOF_NTS-PYRAL+1; 00130 pyra_len2 = (int32_t) pyra_len/2; 00131 { 00132 int32_t j; 00133 for (j = 0; j < pyra_len; j++) 00134 pyra[j] = 4*((float64_t)((j < pyra_len2) ? j+1 : pyra_len-j))/((float64_t)pyra_len); 00135 for (j = 0; j < pyra_len; j++) 00136 pyra[j] /= PYRAL_pot; 00137 } 00138 00139 register int32_t conv; 00140 register int32_t i; 00141 register int32_t j; 00142 00143 sum = 0.0; 00144 conv = 0; 00145 for (j = 0; j < PYRAL; j++) 00146 conv += (x1[j] == x2[j]) ? 1 : 0; 00147 00148 for (i = 0; i < NOF_NTS-PYRAL+1; i++) 00149 { 00150 register float64_t pot2; 00151 if (i>0) 00152 conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) - 00153 ((x1[i-1] == x2[i-1]) ? 1 : 0); 00154 { /* potencing of conv -- float64_t is faster*/ 00155 register float64_t conv2 = conv; 00156 pot2 = (DEGREE1_1) ? 1.0 : conv2; 00157 if (DEGREE1_1n) 00158 { 00159 conv2 *= conv2; 00160 if (DEGREE1_2) 00161 pot2 *= conv2; 00162 if (DEGREE1_3 && DEGREE1_4) 00163 pot2 *= conv2*conv2; 00164 } 00165 } 00166 sum += pot2*pyra[i]; 00167 } 00168 00169 pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum; 00170 if ((DEGREE2 & ~0x1) != 0) 00171 { 00172 sum *= sum; 00173 if ((DEGREE2 & 0x2) != 0) 00174 pot *= sum; 00175 if ((DEGREE2 & ~0x3) != 0) 00176 { 00177 sum *= sum; 00178 if ((DEGREE2 & 0x4) != 0) 00179 pot *= sum; 00180 } 00181 } 00182 return pot; 00183 } 00184 00185 float64_t CSimpleLocalityImprovedStringKernel::compute( 00186 int32_t idx_a, int32_t idx_b) 00187 { 00188 int32_t alen, blen; 00189 bool free_avec, free_bvec; 00190 00191 char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec); 00192 char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec); 00193 00194 // can only deal with strings of same length 00195 ASSERT(alen==blen); 00196 00197 float64_t dpt; 00198 00199 dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights); 00200 dpt = dpt / pow((float64_t)alen, (float64_t)outer_degree); 00201 00202 ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec); 00203 ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec); 00204 return (float64_t) dpt; 00205 }