|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include "lib/common.h" 00012 #include "lib/io.h" 00013 #include "features/StringFeatures.h" 00014 #include "features/Labels.h" 00015 #include "distributions/LinearHMM.h" 00016 #include "classifier/PluginEstimate.h" 00017 00018 using namespace shogun; 00019 00020 CPluginEstimate::CPluginEstimate(float64_t pos_pseudo, float64_t neg_pseudo) 00021 : CClassifier(), m_pos_pseudo(1e-10), m_neg_pseudo(1e-10), 00022 pos_model(NULL), neg_model(NULL), features(NULL) 00023 { 00024 } 00025 00026 CPluginEstimate::~CPluginEstimate() 00027 { 00028 delete pos_model; 00029 delete neg_model; 00030 00031 SG_UNREF(features); 00032 } 00033 00034 bool CPluginEstimate::train(CFeatures* data) 00035 { 00036 ASSERT(labels); 00037 if (data) 00038 { 00039 if (data->get_feature_class() != C_STRING || 00040 data->get_feature_type() != F_WORD) 00041 { 00042 SG_ERROR("Features not of class string type word\n"); 00043 } 00044 00045 set_features((CStringFeatures<uint16_t>*) data); 00046 } 00047 ASSERT(features); 00048 00049 delete pos_model; 00050 delete neg_model; 00051 00052 pos_model=new CLinearHMM(features); 00053 neg_model=new CLinearHMM(features); 00054 00055 int32_t* pos_indizes=new int32_t[((CStringFeatures<uint16_t>*) features)->get_num_vectors()]; 00056 int32_t* neg_indizes=new int32_t[((CStringFeatures<uint16_t>*) features)->get_num_vectors()]; 00057 00058 ASSERT(labels->get_num_labels()==features->get_num_vectors()); 00059 00060 int32_t pos_idx=0; 00061 int32_t neg_idx=0; 00062 00063 for (int32_t i=0; i<labels->get_num_labels(); i++) 00064 { 00065 if (labels->get_label(i) > 0) 00066 pos_indizes[pos_idx++]=i; 00067 else 00068 neg_indizes[neg_idx++]=i; 00069 } 00070 00071 SG_INFO( "training using pseudos %f and %f\n", m_pos_pseudo, m_neg_pseudo); 00072 pos_model->train(pos_indizes, pos_idx, m_pos_pseudo); 00073 neg_model->train(neg_indizes, neg_idx, m_neg_pseudo); 00074 00075 delete[] pos_indizes; 00076 delete[] neg_indizes; 00077 00078 return true; 00079 } 00080 00081 CLabels* CPluginEstimate::classify() 00082 { 00083 ASSERT(features); 00084 CLabels* result=new CLabels(features->get_num_vectors()); 00085 ASSERT(result->get_num_labels()==features->get_num_vectors()); 00086 00087 for (int32_t vec=0; vec<features->get_num_vectors(); vec++) 00088 result->set_label(vec, classify_example(vec)); 00089 00090 return result; 00091 } 00092 00093 CLabels* CPluginEstimate::classify(CFeatures* data) 00094 { 00095 if (!data) 00096 SG_ERROR("No features specified\n"); 00097 00098 if (data->get_feature_class() != C_STRING || 00099 data->get_feature_type() != F_WORD) 00100 { 00101 SG_ERROR("Features not of class string type word\n"); 00102 } 00103 00104 set_features((CStringFeatures<uint16_t>*) data); 00105 return classify(); 00106 } 00107 00108 float64_t CPluginEstimate::classify_example(int32_t vec_idx) 00109 { 00110 ASSERT(features); 00111 00112 int32_t len; 00113 bool free_vec; 00114 uint16_t* vector=features->get_feature_vector(vec_idx, len, free_vec); 00115 00116 if ((!pos_model) || (!neg_model)) 00117 SG_ERROR( "model(s) not assigned\n"); 00118 00119 float64_t result=pos_model->get_log_likelihood_example(vector, len) - neg_model->get_log_likelihood_example(vector, len); 00120 features->free_feature_vector(vector, vec_idx, free_vec); 00121 return result; 00122 }