|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Soeren Sonnenburg 00008 * Copyright (C) 2010 Berlin Institute of Technology 00009 */ 00010 00011 #ifndef _HASHEDWDFEATURESTRANSPOSED_H___ 00012 #define _HASHEDWDFEATURESTRANSPOSED_H___ 00013 00014 #include "lib/common.h" 00015 #include "features/DotFeatures.h" 00016 #include "features/StringFeatures.h" 00017 #include "lib/Hash.h" 00018 00019 namespace shogun 00020 { 00021 template <class ST> class CStringFeatures; 00022 00028 class CHashedWDFeaturesTransposed : public CDotFeatures 00029 { 00030 public: 00031 00040 CHashedWDFeaturesTransposed(CStringFeatures<uint8_t>* str, int32_t start_order, 00041 int32_t order, int32_t from_order, int32_t hash_bits=12); 00042 00044 CHashedWDFeaturesTransposed(const CHashedWDFeaturesTransposed & orig); 00045 00047 virtual ~CHashedWDFeaturesTransposed(); 00048 00056 inline virtual int32_t get_dim_feature_space() 00057 { 00058 return w_dim; 00059 } 00060 00067 virtual float64_t dot(int32_t vec_idx1, int32_t vec_idx2); 00068 00075 virtual float64_t dense_dot(int32_t vec_idx1, float64_t* vec2, int32_t vec2_len); 00076 00088 virtual void dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b); 00089 00101 virtual void dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b); 00102 00103 00112 virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val=false); 00113 00119 virtual inline int32_t get_nnz_features_for_vector(int32_t num) 00120 { 00121 return w_dim/alphabet_size; 00122 } 00123 00128 virtual CFeatures* duplicate() const; 00129 00134 inline virtual EFeatureType get_feature_type() 00135 { 00136 return F_UNKNOWN; 00137 } 00138 00143 inline virtual EFeatureClass get_feature_class() 00144 { 00145 return C_WD; 00146 } 00147 00148 inline virtual int32_t get_num_vectors() 00149 { 00150 return num_strings; 00151 } 00152 00153 inline virtual int32_t get_size() 00154 { 00155 return sizeof(float64_t); 00156 } 00157 00160 void set_normalization_const(float64_t n=0); 00161 00163 inline float64_t get_normalization_const() 00164 { 00165 return normalization_const; 00166 } 00167 00169 struct hashed_wd_transposed_feature_iterator 00170 { 00172 uint16_t* vec; 00174 int32_t vidx; 00176 int32_t vlen; 00178 bool vfree; 00179 00181 int32_t index; 00182 00183 }; 00184 00194 virtual void* get_feature_iterator(int32_t vector_index) 00195 { 00196 SG_NOTIMPLEMENTED; 00197 return NULL; 00198 } 00199 00210 virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator) 00211 { 00212 SG_NOTIMPLEMENTED; 00213 return NULL; 00214 } 00215 00221 virtual void free_feature_iterator(void* iterator) 00222 { 00223 SG_NOTIMPLEMENTED; 00224 } 00225 00227 inline virtual const char* get_name() const { return "HashedWDFeaturesTransposed"; } 00228 00229 protected: 00230 00232 void set_wd_weights(); 00233 static void* dense_dot_range_helper(void* p); 00234 00235 protected: 00237 CStringFeatures<uint8_t>* strings; 00238 00240 T_STRING<uint8_t>* transposed_strings; 00241 00243 int32_t degree; 00245 int32_t start_degree; 00247 int32_t from_degree; 00249 int32_t string_length; 00251 int32_t num_strings; 00253 int32_t alphabet_size; 00255 int32_t w_dim; 00257 int32_t partial_w_dim; 00259 float64_t* wd_weights; 00261 uint32_t mask; 00263 int32_t m_hash_bits; 00264 00266 float64_t normalization_const; 00267 }; 00268 } 00269 #endif // _HASHEDWDFEATURESTRANSPOSED_H___