|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2007-2009 Christian Gehl 00008 * Written (W) 1999-2009 Soeren Sonnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "lib/common.h" 00013 #include "distance/HammingWordDistance.h" 00014 #include "features/Features.h" 00015 #include "features/StringFeatures.h" 00016 #include "lib/io.h" 00017 00018 using namespace shogun; 00019 00020 CHammingWordDistance::CHammingWordDistance(bool sign) 00021 : CStringDistance<uint16_t>(), use_sign(sign) 00022 { 00023 SG_DEBUG( "CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0); 00024 dictionary_size= 1<<(sizeof(uint16_t)*8); 00025 dictionary_weights = new float64_t[dictionary_size]; 00026 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size); 00027 } 00028 00029 CHammingWordDistance::CHammingWordDistance( 00030 CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r, bool sign) 00031 : CStringDistance<uint16_t>(), use_sign(sign) 00032 { 00033 SG_DEBUG( "CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0); 00034 dictionary_size= 1<<(sizeof(uint16_t)*8); 00035 dictionary_weights = new float64_t[dictionary_size]; 00036 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size); 00037 00038 init(l, r); 00039 } 00040 00041 CHammingWordDistance::~CHammingWordDistance() 00042 { 00043 cleanup(); 00044 00045 delete[] dictionary_weights; 00046 } 00047 00048 bool CHammingWordDistance::init(CFeatures* l, CFeatures* r) 00049 { 00050 bool result=CStringDistance<uint16_t>::init(l,r); 00051 return result; 00052 } 00053 00054 void CHammingWordDistance::cleanup() 00055 { 00056 } 00057 00058 float64_t CHammingWordDistance::compute(int32_t idx_a, int32_t idx_b) 00059 { 00060 int32_t alen, blen; 00061 bool free_avec, free_bvec; 00062 00063 uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)-> 00064 get_feature_vector(idx_a, alen, free_avec); 00065 uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)-> 00066 get_feature_vector(idx_b, blen, free_bvec); 00067 00068 int32_t result=0; 00069 00070 int32_t left_idx=0; 00071 int32_t right_idx=0; 00072 00073 if (use_sign) 00074 { 00075 // hamming of: if words appear in both vectors 00076 while (left_idx < alen && right_idx < blen) 00077 { 00078 uint16_t sym=avec[left_idx]; 00079 if (avec[left_idx]==bvec[right_idx]) 00080 { 00081 while (left_idx< alen && avec[left_idx]==sym) 00082 left_idx++; 00083 00084 while (right_idx< blen && bvec[right_idx]==sym) 00085 right_idx++; 00086 } 00087 else if (avec[left_idx]<bvec[right_idx]) 00088 { 00089 result++; 00090 00091 while (left_idx< alen && avec[left_idx]==sym) 00092 left_idx++; 00093 } 00094 else 00095 { 00096 sym=bvec[right_idx]; 00097 result++; 00098 00099 while (right_idx< blen && bvec[right_idx]==sym) 00100 right_idx++; 00101 } 00102 } 00103 } 00104 else 00105 { 00106 //hamming of: if words appear in both vectors _the same number_ of times 00107 while (left_idx < alen && right_idx < blen) 00108 { 00109 uint16_t sym=avec[left_idx]; 00110 if (avec[left_idx]==bvec[right_idx]) 00111 { 00112 int32_t old_left_idx=left_idx; 00113 int32_t old_right_idx=right_idx; 00114 00115 while (left_idx< alen && avec[left_idx]==sym) 00116 left_idx++; 00117 00118 while (right_idx< blen && bvec[right_idx]==sym) 00119 right_idx++; 00120 00121 if ((left_idx-old_left_idx)!=(right_idx-old_right_idx)) 00122 result++; 00123 } 00124 else if (avec[left_idx]<bvec[right_idx]) 00125 { 00126 result++; 00127 00128 while (left_idx< alen && avec[left_idx]==sym) 00129 left_idx++; 00130 } 00131 else 00132 { 00133 sym=bvec[right_idx]; 00134 result++; 00135 00136 while (right_idx< blen && bvec[right_idx]==sym) 00137 right_idx++; 00138 } 00139 } 00140 } 00141 00142 while (left_idx < alen) 00143 { 00144 uint16_t sym=avec[left_idx]; 00145 result++; 00146 00147 while (left_idx< alen && avec[left_idx]==sym) 00148 left_idx++; 00149 } 00150 00151 while (right_idx < blen) 00152 { 00153 uint16_t sym=bvec[right_idx]; 00154 result++; 00155 00156 while (right_idx< blen && bvec[right_idx]==sym) 00157 right_idx++; 00158 } 00159 00160 ((CStringFeatures<uint16_t>*) lhs)-> 00161 free_feature_vector(avec, idx_a, free_avec); 00162 ((CStringFeatures<uint16_t>*) rhs)-> 00163 free_feature_vector(bvec, idx_b, free_bvec); 00164 00165 return result; 00166 }