|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2008 Gunnar Raetsch 00008 * Written (W) 1999-2008 Soeren Sonnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "lib/common.h" 00013 #include "lib/io.h" 00014 #include "kernel/LocalityImprovedStringKernel.h" 00015 #include "features/StringFeatures.h" 00016 00017 using namespace shogun; 00018 00019 CLocalityImprovedStringKernel::CLocalityImprovedStringKernel( 00020 int32_t size, int32_t l, int32_t id, int32_t od) 00021 : CStringKernel<char>(size), length(l), inner_degree(id), outer_degree(od) 00022 { 00023 SG_INFO( "LIK with parms: l=%d, id=%d, od=%d created!\n", l, id, od); 00024 } 00025 00026 CLocalityImprovedStringKernel::CLocalityImprovedStringKernel( 00027 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t len, 00028 int32_t id, int32_t od) 00029 : CStringKernel<char>(10), length(len), inner_degree(id), outer_degree(od) 00030 { 00031 SG_INFO( "LIK with parms: l=%d, id=%d, od=%d created!\n", len, id, od); 00032 00033 init(l, r); 00034 } 00035 00036 CLocalityImprovedStringKernel::~CLocalityImprovedStringKernel() 00037 { 00038 cleanup(); 00039 } 00040 00041 bool CLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r) 00042 { 00043 CStringKernel<char>::init(l,r); 00044 return init_normalizer(); 00045 } 00046 00047 float64_t CLocalityImprovedStringKernel::compute(int32_t idx_a, int32_t idx_b) 00048 { 00049 int32_t alen, blen; 00050 bool free_avec, free_bvec; 00051 00052 char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec); 00053 char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec); 00054 // can only deal with strings of same length 00055 ASSERT(alen==blen && alen>0); 00056 00057 int32_t i,t; 00058 float64_t* match=new float64_t[alen]; 00059 00060 // initialize match table 1 -> match; 0 -> no match 00061 for (i = 0; i<alen; i++) 00062 match[i] = (avec[i] == bvec[i])? 1 : 0; 00063 00064 float64_t outer_sum = 0; 00065 00066 for (t = 0; t<alen-length; t++) 00067 { 00068 float64_t sum = 0; 00069 for (i = 0; i<length && t+i+length+1<alen; i++) 00070 sum += (i+1)*match[t+i]+(length-i)*match[t+i+length+1]; 00071 //add middle element + normalize with sum_i=0^2l+1 i = (2l+1)(l+1) 00072 float64_t inner_sum = (sum + (length+1)*match[t+length]) / ((2*length+1)*(length+1)); 00073 inner_sum = pow(inner_sum, inner_degree + 1); 00074 outer_sum += inner_sum; 00075 } 00076 delete[] match; 00077 00078 ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec); 00079 ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec); 00080 return pow(outer_sum, outer_degree + 1); 00081 }