|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "features/Labels.h" 00013 #include "lib/common.h" 00014 #include "lib/File.h" 00015 #include "lib/io.h" 00016 #include "lib/Mathematics.h" 00017 00018 #ifdef HAVE_BOOST_SERIALIZATION 00019 #include <boost/serialization/export.hpp> 00020 BOOST_CLASS_EXPORT(shogun::CLabels); 00021 #endif //HAVE_BOOST_SERIALIZATION 00022 00023 using namespace shogun; 00024 00025 CLabels::CLabels() 00026 : CSGObject() 00027 { 00028 labels = NULL; 00029 num_labels = 0; 00030 m_confidences=NULL; 00031 m_num_classes=0; 00032 } 00033 00034 CLabels::CLabels(int32_t num_lab) 00035 : CSGObject(), num_labels(num_lab) 00036 { 00037 labels=new float64_t[num_lab]; 00038 for (int32_t i=0; i<num_lab; i++) 00039 labels[i]=0; 00040 00041 m_num_classes=0; 00042 m_confidences=NULL; 00043 } 00044 00045 CLabels::CLabels(float64_t* p_labels, int32_t len) 00046 : CSGObject() 00047 { 00048 labels = NULL; 00049 num_labels = 0; 00050 00051 set_labels(p_labels, len); 00052 00053 // We don't allocate the confidences matrix, unless it is necessary. 00054 // For problems with many classes and samples it might get really big. 00055 m_num_classes=get_num_classes(); 00056 m_confidences=NULL; 00057 } 00058 00059 CLabels::CLabels(float64_t* in_confidences, int32_t in_num_labels, 00060 int32_t in_num_classes) 00061 : CSGObject() 00062 { 00063 labels=new float64_t[in_num_labels]; 00064 for (int32_t i=0; i<in_num_labels; i++) 00065 labels[i]=0; 00066 00067 m_num_classes=in_num_classes; 00068 m_confidences=in_confidences; 00069 find_labels(); 00070 } 00071 00072 CLabels::CLabels(CFile* loader) 00073 : CSGObject() 00074 { 00075 num_labels=0; 00076 labels=NULL; 00077 m_num_classes=0; 00078 m_confidences=NULL; 00079 00080 load(loader); 00081 } 00082 00083 CLabels::~CLabels() 00084 { 00085 delete[] labels; 00086 delete[] m_confidences; 00087 num_labels=0; 00088 m_num_classes=0; 00089 labels=NULL; 00090 m_confidences=NULL; 00091 } 00092 00093 void CLabels::set_labels(float64_t* p_labels, int32_t len) 00094 { 00095 ASSERT(len>0); 00096 num_labels=len; 00097 00098 delete[] labels; 00099 labels=CMath::clone_vector(p_labels, len); 00100 } 00101 00102 void CLabels::set_confidences(float64_t* in_confidences, int32_t in_num_labels, 00103 int32_t in_num_classes) 00104 { 00105 if (num_labels && (num_labels != in_num_labels)) 00106 { 00107 SG_ERROR("Shape of confidence matrix mismatch (number of " 00108 "labels = %d does not match %d\n", num_labels, in_num_labels); 00109 } 00110 00111 if (m_num_classes && (m_num_classes != in_num_classes)) 00112 { 00113 SG_ERROR("Shape of confidence matrix mismatch (number of " 00114 "num_classes = %d does not match %d\n", m_num_classes, in_num_classes); 00115 } 00116 00117 delete[] m_confidences; 00118 00119 num_labels=in_num_labels; 00120 m_num_classes=in_num_classes; 00121 m_confidences=in_confidences; 00122 find_labels(); 00123 } 00124 00125 float64_t* CLabels::get_confidences(int32_t& out_num_labels, int32_t& out_num_classes) 00126 { 00127 out_num_labels=num_labels; 00128 out_num_classes=m_num_classes; 00129 00130 if (!num_labels || !m_num_classes || !m_confidences) 00131 SG_ERROR("No labels / confidences set\n"); 00132 00133 float64_t* out_conf=new float64_t[num_labels*m_num_classes]; 00134 memcpy(out_conf, m_confidences, num_labels*m_num_classes*sizeof(float64_t)); 00135 return out_conf; 00136 } 00137 00138 void CLabels::get_confidences(float64_t** dst, int32_t* out_num_labels, int32_t* out_num_classes) 00139 { 00140 ASSERT(dst && out_num_labels && out_num_classes); 00141 00142 if (num_labels<=0 || m_num_classes<=0 || !m_confidences) 00143 SG_ERROR("No labels / confidences set\n"); 00144 00145 *dst=NULL; 00146 *out_num_labels=num_labels; 00147 *out_num_classes=m_num_classes; 00148 00149 float64_t* out_conf= (float64_t*) malloc((size_t) sizeof(float64_t)*num_labels*m_num_classes); 00150 memcpy(out_conf, m_confidences, num_labels*m_num_classes*sizeof(float64_t)); 00151 *dst=out_conf; 00152 } 00153 00154 float64_t* CLabels::get_sample_confidences(const int32_t& in_sample_index, 00155 int32_t& out_num_classes) 00156 { 00157 out_num_classes=m_num_classes; 00158 00159 if (!(in_sample_index>=0 && in_sample_index<num_labels && 00160 m_num_classes && m_confidences)) 00161 { 00162 SG_ERROR("No labels / confidences set\n"); 00163 } 00164 00165 float64_t* out_conf=new float64_t[m_num_classes]; 00166 for (int32_t n_class=0; n_class<m_num_classes; n_class++) 00167 { 00168 out_conf[n_class]=m_confidences[n_class+in_sample_index*m_num_classes]; 00169 } 00170 return out_conf; 00171 } 00172 00173 void CLabels::find_labels() 00174 { 00175 ASSERT(m_confidences); 00176 ASSERT(labels); 00177 00178 float64_t max_conf; 00179 int32_t index; 00180 for (int32_t n_samp=0; n_samp<num_labels; n_samp++) 00181 { 00182 max_conf=m_confidences[n_samp]; 00183 labels[n_samp]=0; 00184 for (int32_t n_class=1; n_class<m_num_classes; n_class++) 00185 { 00186 index=n_samp+n_class*m_num_classes; 00187 if (m_confidences[index]>max_conf) 00188 { 00189 max_conf=m_confidences[index]; 00190 labels[n_samp]=n_class; 00191 } 00192 } 00193 } 00194 } 00195 00196 bool CLabels::is_two_class_labeling() 00197 { 00198 ASSERT(labels); 00199 bool found_plus_one=false; 00200 bool found_minus_one=false; 00201 00202 for (int32_t i=0; i<num_labels; i++) 00203 { 00204 if (labels[i]==+1.0) 00205 found_plus_one=true; 00206 else if (labels[i]==-1.0) 00207 found_minus_one=true; 00208 else 00209 SG_ERROR("Not a two class labeling label[%d]=%f (only +1/-1 allowed)\n", i, labels[i]); 00210 } 00211 00212 if (!found_plus_one) 00213 SG_ERROR("Not a two class labeling - no positively labeled examples found\n"); 00214 if (!found_minus_one) 00215 SG_ERROR("Not a two class labeling - no negatively labeled examples found\n"); 00216 00217 return true; 00218 } 00219 00220 int32_t CLabels::get_num_classes() 00221 { 00222 int32_t n=-1; 00223 int32_t* lab=get_int_labels(n); 00224 00225 int32_t num_classes=0; 00226 for (int32_t i=0; i<n; i++) 00227 num_classes=CMath::max(num_classes,lab[i]); 00228 00229 delete[] lab; 00230 00231 return num_classes+1; 00232 } 00233 00234 float64_t* CLabels::get_labels(int32_t &len) 00235 { 00236 len=num_labels; 00237 00238 if (num_labels>0) 00239 { 00240 float64_t* _labels=new float64_t[num_labels] ; 00241 for (int32_t i=0; i<len; i++) 00242 _labels[i]=get_label(i) ; 00243 return _labels ; 00244 } 00245 else 00246 return NULL; 00247 } 00248 00249 void CLabels::get_labels(float64_t** p_labels, int32_t* len) 00250 { 00251 ASSERT(p_labels && len); 00252 *p_labels=NULL; 00253 *len=num_labels; 00254 00255 if (num_labels>0) 00256 { 00257 *p_labels=(float64_t*) malloc(sizeof(float64_t)*num_labels); 00258 00259 for (int32_t i=0; i<num_labels; i++) 00260 (*p_labels)[i]=get_label(i); 00261 } 00262 } 00263 00264 int32_t* CLabels::get_int_labels(int32_t &len) 00265 { 00266 len=num_labels; 00267 00268 if (num_labels>0) 00269 { 00270 int32_t* _labels=new int32_t[num_labels] ; 00271 for (int32_t i=0; i<len; i++) 00272 _labels[i]= (int32_t) get_label(i) ; 00273 return _labels ; 00274 } 00275 else 00276 return NULL; 00277 } 00278 00279 void CLabels::set_int_labels(int32_t * mylabels, int32_t len) 00280 { 00281 num_labels = len ; 00282 delete[] labels ; 00283 00284 labels = new float64_t[num_labels] ; 00285 for (int32_t i=0; i<num_labels; i++) 00286 set_int_label(i, mylabels[i]) ; 00287 } 00288 00289 void CLabels::load(CFile* loader) 00290 { 00291 delete[] labels; 00292 delete[] m_confidences; 00293 m_confidences = NULL; 00294 num_labels=0; 00295 ASSERT(loader); 00296 loader->get_real_vector(labels, num_labels); 00297 m_num_classes=get_num_classes(); 00298 } 00299 00300 void CLabels::save(CFile* writer) 00301 { 00302 ASSERT(writer); 00303 ASSERT(labels && labels>0); 00304 writer->set_real_vector(labels, num_labels); 00305 }