|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) Christian Gehl 00008 * Written (W) 1999-2009 Soeren Sonnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "lib/common.h" 00013 #include "distance/CanberraWordDistance.h" 00014 #include "features/Features.h" 00015 #include "features/StringFeatures.h" 00016 #include "lib/io.h" 00017 00018 using namespace shogun; 00019 00020 CCanberraWordDistance::CCanberraWordDistance() 00021 : CStringDistance<uint16_t>() 00022 { 00023 SG_DEBUG("CCanberraWordDistance created"); 00024 dictionary_size= 1<<(sizeof(uint16_t)*8); 00025 dictionary_weights = new float64_t[dictionary_size]; 00026 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size); 00027 } 00028 00029 CCanberraWordDistance::CCanberraWordDistance( 00030 CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r) 00031 : CStringDistance<uint16_t>() 00032 { 00033 SG_DEBUG("CCanberraWordDistance created"); 00034 dictionary_size= 1<<(sizeof(uint16_t)*8); 00035 dictionary_weights = new float64_t[dictionary_size]; 00036 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size); 00037 00038 init(l, r); 00039 } 00040 00041 CCanberraWordDistance::~CCanberraWordDistance() 00042 { 00043 cleanup(); 00044 00045 delete[] dictionary_weights; 00046 } 00047 00048 bool CCanberraWordDistance::init(CFeatures* l, CFeatures* r) 00049 { 00050 return CStringDistance<uint16_t>::init(l,r); 00051 } 00052 00053 void CCanberraWordDistance::cleanup() 00054 { 00055 } 00056 00057 float64_t CCanberraWordDistance::compute(int32_t idx_a, int32_t idx_b) 00058 { 00059 int32_t alen, blen; 00060 bool free_avec, free_bvec; 00061 00062 uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)-> 00063 get_feature_vector(idx_a, alen, free_avec); 00064 uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)-> 00065 get_feature_vector(idx_b, blen, free_bvec); 00066 00067 float64_t result=0; 00068 00069 int32_t left_idx=0; 00070 int32_t right_idx=0; 00071 00072 while (left_idx < alen && right_idx < blen) 00073 { 00074 uint16_t sym=avec[left_idx]; 00075 if (avec[left_idx]==bvec[right_idx]) 00076 { 00077 int32_t old_left_idx=left_idx; 00078 int32_t old_right_idx=right_idx; 00079 00080 while (left_idx< alen && avec[left_idx]==sym) 00081 left_idx++; 00082 00083 while (right_idx< blen && bvec[right_idx]==sym) 00084 right_idx++; 00085 00086 result += 00087 CMath::abs((float64_t) 00088 ((left_idx-old_left_idx)-(right_idx-old_right_idx)))/ 00089 ((float64_t) 00090 ((left_idx-old_left_idx) + (right_idx-old_right_idx))); 00091 } 00092 else if (avec[left_idx]<bvec[right_idx]) 00093 { 00094 result++; 00095 00096 while (left_idx< alen && avec[left_idx]==sym) 00097 left_idx++; 00098 } 00099 else 00100 { 00101 sym=bvec[right_idx]; 00102 result++; 00103 00104 while (right_idx< blen && bvec[right_idx]==sym) 00105 right_idx++; 00106 } 00107 } 00108 00109 while (left_idx < alen) 00110 { 00111 uint16_t sym=avec[left_idx]; 00112 result++; 00113 00114 while (left_idx< alen && avec[left_idx]==sym) 00115 left_idx++; 00116 } 00117 00118 while (right_idx < blen) 00119 { 00120 uint16_t sym=bvec[right_idx]; 00121 result++; 00122 00123 while (right_idx< blen && bvec[right_idx]==sym) 00124 right_idx++; 00125 } 00126 ((CStringFeatures<uint16_t>*) lhs)-> 00127 free_feature_vector(avec, idx_a, free_avec); 00128 ((CStringFeatures<uint16_t>*) rhs)-> 00129 free_feature_vector(bvec, idx_b, free_bvec); 00130 00131 return result; 00132 }