|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2007-2009 Christian Gehl 00008 * Written (W) 1999-2009 Soeren Sonnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "lib/common.h" 00013 #include "distance/ManhattanWordDistance.h" 00014 #include "features/Features.h" 00015 #include "features/StringFeatures.h" 00016 #include "lib/io.h" 00017 00018 using namespace shogun; 00019 00020 CManhattanWordDistance::CManhattanWordDistance() 00021 : CStringDistance<uint16_t>() 00022 { 00023 SG_DEBUG("CManhattanWordDistance created"); 00024 dictionary_size= 1<<(sizeof(uint16_t)*8); 00025 dictionary_weights = new float64_t[dictionary_size]; 00026 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size); 00027 } 00028 00029 CManhattanWordDistance::CManhattanWordDistance( 00030 CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r) 00031 : CStringDistance<uint16_t>() 00032 { 00033 SG_DEBUG("CManhattanWordDistance created"); 00034 dictionary_size= 1<<(sizeof(uint16_t)*8); 00035 dictionary_weights = new float64_t[dictionary_size]; 00036 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size); 00037 00038 init(l, r); 00039 } 00040 00041 CManhattanWordDistance::~CManhattanWordDistance() 00042 { 00043 cleanup(); 00044 00045 delete[] dictionary_weights; 00046 } 00047 00048 bool CManhattanWordDistance::init(CFeatures* l, CFeatures* r) 00049 { 00050 bool result=CStringDistance<uint16_t>::init(l,r); 00051 return result; 00052 } 00053 00054 void CManhattanWordDistance::cleanup() 00055 { 00056 } 00057 00058 float64_t CManhattanWordDistance::compute(int32_t idx_a, int32_t idx_b) 00059 { 00060 int32_t alen, blen; 00061 bool free_avec, free_bvec; 00062 00063 uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)-> 00064 get_feature_vector(idx_a, alen, free_avec); 00065 uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)-> 00066 get_feature_vector(idx_b, blen, free_bvec); 00067 00068 int32_t result=0; 00069 00070 int32_t left_idx=0; 00071 int32_t right_idx=0; 00072 00073 while (left_idx < alen && right_idx < blen) 00074 { 00075 uint16_t sym=avec[left_idx]; 00076 if (avec[left_idx]==bvec[right_idx]) 00077 { 00078 int32_t old_left_idx=left_idx; 00079 int32_t old_right_idx=right_idx; 00080 00081 while (left_idx< alen && avec[left_idx]==sym) 00082 left_idx++; 00083 00084 while (right_idx< blen && bvec[right_idx]==sym) 00085 right_idx++; 00086 00087 result += CMath::abs( (left_idx-old_left_idx) - (right_idx-old_right_idx) ); 00088 } 00089 else if (avec[left_idx]<bvec[right_idx]) 00090 { 00091 00092 while (left_idx< alen && avec[left_idx]==sym) 00093 { 00094 result++; 00095 left_idx++; 00096 } 00097 } 00098 else 00099 { 00100 sym=bvec[right_idx]; 00101 00102 while (right_idx< blen && bvec[right_idx]==sym) 00103 { 00104 result++; 00105 right_idx++; 00106 } 00107 } 00108 } 00109 00110 result+=blen-right_idx + alen-left_idx; 00111 00112 ((CStringFeatures<uint16_t>*) lhs)-> 00113 free_feature_vector(avec, idx_a, free_avec); 00114 ((CStringFeatures<uint16_t>*) rhs)-> 00115 free_feature_vector(bvec, idx_b, free_bvec); 00116 00117 return result; 00118 } 00119