|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009-2010 Soeren Sonnenburg 00008 * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 * Copyright (C) 2010 Berlin Institute of Technology 00010 */ 00011 00012 #ifndef _IMPLICITSPECFEATURES_H___ 00013 #define _IMPLICITSPECFEATURES_H___ 00014 00015 #include "lib/common.h" 00016 #include "lib/io.h" 00017 #include "features/DotFeatures.h" 00018 #include "features/StringFeatures.h" 00019 00020 namespace shogun 00021 { 00022 00023 template <class ST> class CStringFeatures; 00024 00030 class CImplicitWeightedSpecFeatures : public CDotFeatures 00031 { 00032 public: 00033 00039 CImplicitWeightedSpecFeatures(CStringFeatures<uint16_t>* str, bool normalize=true); 00040 00042 CImplicitWeightedSpecFeatures(const CImplicitWeightedSpecFeatures & orig); 00043 00044 virtual ~CImplicitWeightedSpecFeatures(); 00045 00050 virtual CFeatures* duplicate() const; 00051 00059 inline virtual int32_t get_dim_feature_space() 00060 { 00061 return spec_size; 00062 } 00063 00070 virtual float64_t dot(int32_t vec_idx1, int32_t vec_idx2); 00071 00078 virtual float64_t dense_dot(int32_t vec_idx1, float64_t* vec2, int32_t vec2_len); 00079 00088 virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val=false); 00089 00095 virtual inline int32_t get_nnz_features_for_vector(int32_t num) 00096 { 00097 int32_t vlen=-1; 00098 bool free_vec; 00099 uint16_t* vec1=strings->get_feature_vector(num, vlen, free_vec); 00100 strings->free_feature_vector(vec1, num, free_vec); 00101 int32_t nnz=0; 00102 for (int32_t i=1; i<=degree; i++) 00103 nnz+=CMath::min(CMath::pow(alphabet_size,i), vlen); 00104 return nnz; 00105 } 00106 00111 inline virtual EFeatureType get_feature_type() 00112 { 00113 return F_UNKNOWN; 00114 } 00115 00120 inline virtual EFeatureClass get_feature_class() 00121 { 00122 return C_WEIGHTEDSPEC; 00123 } 00124 00129 inline virtual int32_t get_num_vectors() 00130 { 00131 return num_strings; 00132 } 00133 00138 inline virtual int32_t get_size() 00139 { 00140 return sizeof(float64_t); 00141 } 00142 00147 bool set_wd_weights(); 00148 00155 bool set_weights(float64_t* w, int32_t d); 00156 00158 struct wspec_feature_iterator 00159 { 00161 uint16_t* vec; 00163 int32_t vidx; 00165 int32_t vlen; 00167 bool vfree; 00168 00173 int32_t offs; 00174 int32_t d; 00175 int32_t j; 00176 uint8_t mask; 00177 float64_t alpha; 00179 }; 00180 00190 virtual void* get_feature_iterator(int32_t vector_index); 00191 00202 virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator); 00203 00209 virtual void free_feature_iterator(void* iterator); 00210 00212 inline virtual const char* get_name() const { return "ImplicitWeightedSpecFeatures"; } 00213 00214 protected: 00219 void compute_normalization_const(); 00220 00221 protected: 00223 CStringFeatures<uint16_t>* strings; 00224 00226 float64_t* normalization_factors; 00228 int32_t num_strings; 00230 int32_t alphabet_size; 00231 00233 int32_t degree; 00235 int32_t spec_size; 00236 00238 float64_t* spec_weights; 00239 }; 00240 } 00241 #endif // _IMPLICITSPECFEATURES_H___