|
SHOGUN v0.9.3
|
00001 /* 00002 * this program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2006-2009 Christian Gehl 00008 * Written (W) 2006-2009 Soeren Sonnenburg 00009 * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "lib/config.h" 00013 00014 #include "lib/common.h" 00015 #include "lib/io.h" 00016 #include "lib/File.h" 00017 #include "lib/Time.h" 00018 #include "base/Parallel.h" 00019 00020 #include "distance/Distance.h" 00021 #include "features/Features.h" 00022 00023 #include <string.h> 00024 #include <unistd.h> 00025 00026 #ifndef WIN32 00027 #include <pthread.h> 00028 #endif 00029 00030 using namespace shogun; 00031 00032 CDistance::CDistance() 00033 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false), 00034 lhs(NULL), rhs(NULL) 00035 { 00036 } 00037 00038 00039 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs) 00040 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false), 00041 lhs(NULL), rhs(NULL) 00042 { 00043 init(p_lhs, p_rhs); 00044 } 00045 00046 CDistance::~CDistance() 00047 { 00048 delete[] precomputed_matrix; 00049 precomputed_matrix=NULL; 00050 00051 remove_lhs_and_rhs(); 00052 } 00053 00054 bool CDistance::init(CFeatures* l, CFeatures* r) 00055 { 00056 //make sure features were indeed supplied 00057 ASSERT(l); 00058 ASSERT(r); 00059 00060 //make sure features are compatible 00061 ASSERT(l->get_feature_class()==r->get_feature_class()); 00062 ASSERT(l->get_feature_type()==r->get_feature_type()); 00063 00064 //remove references to previous features 00065 remove_lhs_and_rhs(); 00066 00067 //increase reference counts 00068 SG_REF(l); 00069 if (l!=r) 00070 SG_REF(r); 00071 00072 lhs=l; 00073 rhs=r; 00074 00075 delete[] precomputed_matrix ; 00076 precomputed_matrix=NULL ; 00077 00078 return true; 00079 } 00080 00081 void CDistance::load(CFile* loader) 00082 { 00083 } 00084 00085 void CDistance::save(CFile* writer) 00086 { 00087 } 00088 00089 void CDistance::remove_lhs_and_rhs() 00090 { 00091 if (rhs!=lhs) 00092 SG_UNREF(rhs); 00093 rhs = NULL; 00094 00095 SG_UNREF(lhs); 00096 lhs = NULL; 00097 } 00098 00099 void CDistance::remove_lhs() 00100 { 00101 SG_UNREF(lhs); 00102 lhs = NULL; 00103 } 00104 00106 void CDistance::remove_rhs() 00107 { 00108 SG_UNREF(rhs); 00109 rhs = NULL; 00110 } 00111 00112 CFeatures* CDistance::replace_rhs(CFeatures* r) 00113 { 00114 //make sure features were indeed supplied 00115 ASSERT(r); 00116 00117 //make sure features are compatible 00118 ASSERT(lhs->get_feature_class()==r->get_feature_class()); 00119 ASSERT(lhs->get_feature_type()==r->get_feature_type()); 00120 00121 //remove references to previous rhs features 00122 CFeatures* tmp=rhs; 00123 00124 rhs=r; 00125 00126 delete[] precomputed_matrix ; 00127 precomputed_matrix=NULL ; 00128 00129 return tmp; 00130 } 00131 00132 void CDistance::do_precompute_matrix() 00133 { 00134 int32_t num_left=lhs->get_num_vectors(); 00135 int32_t num_right=rhs->get_num_vectors(); 00136 SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ; 00137 00138 ASSERT(num_left==num_right); 00139 ASSERT(lhs==rhs); 00140 int32_t num=num_left; 00141 00142 delete[] precomputed_matrix; 00143 precomputed_matrix=new float32_t[num*(num+1)/2]; 00144 00145 for (int32_t i=0; i<num; i++) 00146 { 00147 SG_PROGRESS(i*i,0,num*num); 00148 for (int32_t j=0; j<=i; j++) 00149 precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ; 00150 } 00151 00152 SG_PROGRESS(num*num,0,num*num); 00153 SG_DONE(); 00154 } 00155 00156 void CDistance::get_distance_matrix(float64_t** dst, int32_t* m, int32_t* n) 00157 { 00158 ASSERT(dst && m && n); 00159 00160 float64_t* result = NULL; 00161 CFeatures* f1 = lhs; 00162 CFeatures* f2 = rhs; 00163 00164 if (f1 && f2) 00165 { 00166 int32_t num_vec1=f1->get_num_vectors(); 00167 int32_t num_vec2=f2->get_num_vectors(); 00168 *m=num_vec1; 00169 *n=num_vec2; 00170 00171 int64_t total_num=num_vec1*num_vec2; 00172 int32_t num_done=0; 00173 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2); 00174 00175 result=(float64_t*) malloc(total_num*sizeof(float64_t)); 00176 ASSERT(result); 00177 00178 if ( (f1 == f2) && (num_vec1 == num_vec2) ) 00179 { 00180 for (int32_t i=0; i<num_vec1; i++) 00181 { 00182 for (int32_t j=i; j<num_vec1; j++) 00183 { 00184 float64_t v=distance(i,j); 00185 00186 result[i+j*num_vec1]=v; 00187 result[j+i*num_vec1]=v; 00188 00189 if (num_done%100000) 00190 SG_PROGRESS(num_done, 0, total_num-1); 00191 00192 if (i!=j) 00193 num_done+=2; 00194 else 00195 num_done+=1; 00196 } 00197 } 00198 } 00199 else 00200 { 00201 for (int32_t i=0; i<num_vec1; i++) 00202 { 00203 for (int32_t j=0; j<num_vec2; j++) 00204 { 00205 result[i+j*num_vec1]=distance(i,j) ; 00206 00207 if (num_done%100000) 00208 SG_PROGRESS(num_done, 0, total_num-1); 00209 00210 num_done++; 00211 } 00212 } 00213 } 00214 00215 SG_DONE(); 00216 } 00217 else 00218 SG_ERROR( "no features assigned to distance\n"); 00219 00220 *dst=result; 00221 } 00222 00223 float32_t* CDistance::get_distance_matrix_shortreal( 00224 int32_t &num_vec1, int32_t &num_vec2, float32_t* target) 00225 { 00226 float32_t* result = NULL; 00227 CFeatures* f1 = lhs; 00228 CFeatures* f2 = rhs; 00229 00230 if (f1 && f2) 00231 { 00232 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors())) 00233 SG_ERROR("distance matrix does not fit into target\n"); 00234 00235 num_vec1=f1->get_num_vectors(); 00236 num_vec2=f2->get_num_vectors(); 00237 int64_t total_num=num_vec1*num_vec2; 00238 int32_t num_done=0; 00239 00240 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2); 00241 00242 if (target) 00243 result=target; 00244 else 00245 result=new float32_t[total_num]; 00246 00247 if ( (f1 == f2) && (num_vec1 == num_vec2) ) 00248 { 00249 for (int32_t i=0; i<num_vec1; i++) 00250 { 00251 for (int32_t j=i; j<num_vec1; j++) 00252 { 00253 float64_t v=distance(i,j); 00254 00255 result[i+j*num_vec1]=v; 00256 result[j+i*num_vec1]=v; 00257 00258 if (num_done%100000) 00259 SG_PROGRESS(num_done, 0, total_num-1); 00260 00261 if (i!=j) 00262 num_done+=2; 00263 else 00264 num_done+=1; 00265 } 00266 } 00267 } 00268 else 00269 { 00270 for (int32_t i=0; i<num_vec1; i++) 00271 { 00272 for (int32_t j=0; j<num_vec2; j++) 00273 { 00274 result[i+j*num_vec1]=distance(i,j) ; 00275 00276 if (num_done%100000) 00277 SG_PROGRESS(num_done, 0, total_num-1); 00278 00279 num_done++; 00280 } 00281 } 00282 } 00283 00284 SG_DONE(); 00285 } 00286 else 00287 SG_ERROR( "no features assigned to distance\n"); 00288 00289 return result; 00290 } 00291 00292 float64_t* CDistance::get_distance_matrix_real( 00293 int32_t &num_vec1, int32_t &num_vec2, float64_t* target) 00294 { 00295 float64_t* result = NULL; 00296 CFeatures* f1 = lhs; 00297 CFeatures* f2 = rhs; 00298 00299 if (f1 && f2) 00300 { 00301 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors())) 00302 SG_ERROR("distance matrix does not fit into target\n"); 00303 00304 num_vec1=f1->get_num_vectors(); 00305 num_vec2=f2->get_num_vectors(); 00306 int64_t total_num=num_vec1*num_vec2; 00307 int32_t num_done=0; 00308 00309 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2); 00310 00311 if (target) 00312 result=target; 00313 else 00314 result=new float64_t[total_num]; 00315 00316 if ( (f1 == f2) && (num_vec1 == num_vec2) ) 00317 { 00318 for (int32_t i=0; i<num_vec1; i++) 00319 { 00320 for (int32_t j=i; j<num_vec1; j++) 00321 { 00322 float64_t v=distance(i,j); 00323 00324 result[i+j*num_vec1]=v; 00325 result[j+i*num_vec1]=v; 00326 00327 if (num_done%100000) 00328 SG_PROGRESS(num_done, 0, total_num-1); 00329 00330 if (i!=j) 00331 num_done+=2; 00332 else 00333 num_done+=1; 00334 } 00335 } 00336 } 00337 else 00338 { 00339 for (int32_t i=0; i<num_vec1; i++) 00340 { 00341 for (int32_t j=0; j<num_vec2; j++) 00342 { 00343 result[i+j*num_vec1]=distance(i,j) ; 00344 00345 if (num_done%100000) 00346 SG_PROGRESS(num_done, 0, total_num-1); 00347 00348 num_done++; 00349 } 00350 } 00351 } 00352 00353 SG_DONE(); 00354 } 00355 else 00356 SG_ERROR( "no features assigned to distance\n"); 00357 00358 return result; 00359 }