|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Soeren Sonnenburg 00008 * Copyright (C) 2010 Berlin Institute of Technology 00009 */ 00010 #include "features/SparsePolyFeatures.h" 00011 #include "lib/Hash.h" 00012 00013 using namespace shogun; 00014 00015 CSparsePolyFeatures::CSparsePolyFeatures(CSparseFeatures<float64_t>* feat, int32_t degree, bool normalize, int32_t hash_bits) 00016 : CDotFeatures(), m_normalization_values(NULL) 00017 { 00018 ASSERT(feat); 00019 00020 m_feat = feat; 00021 SG_REF(m_feat); 00022 m_degree=degree; 00023 m_normalize=normalize; 00024 m_hash_bits=hash_bits; 00025 mask=(uint32_t) (((uint64_t) 1)<<m_hash_bits)-1; 00026 m_output_dimensions=1<<m_hash_bits; 00027 m_input_dimensions=feat->get_num_features(); 00028 00029 if (m_normalize) 00030 store_normalization_values(); 00031 } 00032 00033 CSparsePolyFeatures::~CSparsePolyFeatures() 00034 { 00035 delete[] m_normalization_values; 00036 SG_UNREF(m_feat); 00037 } 00038 00039 float64_t CSparsePolyFeatures::dot(int32_t vec_idx1, int32_t vec_idx2) 00040 { 00041 00042 int32_t len1, len2; 00043 bool do_free1, do_free2; 00044 TSparseEntry<float64_t>* vec1 = m_feat->get_sparse_feature_vector(vec_idx1, len1, do_free1); 00045 TSparseEntry<float64_t>* vec2 = m_feat->get_sparse_feature_vector(vec_idx2, len2, do_free2); 00046 00047 float64_t result=CSparseFeatures<float64_t>::sparse_dot(1, vec1, len1, vec2, len2); 00048 result=CMath::pow(result, m_degree); 00049 00050 m_feat->free_feature_vector(vec1, len1, do_free1); 00051 m_feat->free_feature_vector(vec2, len2, do_free2); 00052 00053 return result; 00054 } 00055 00056 float64_t CSparsePolyFeatures::dense_dot(int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00057 { 00058 if (vec2_len != m_output_dimensions) 00059 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions); 00060 00061 int32_t vlen; 00062 bool do_free; 00063 TSparseEntry<float64_t>* vec = m_feat->get_sparse_feature_vector(vec_idx1, vlen, do_free); 00064 00065 float64_t result=0; 00066 00067 if (vec) 00068 { 00069 if (m_degree==2) 00070 { 00071 /* (a+b)^2 = a^2 + 2ab +b^2 */ 00072 for (int32_t i=0; i<vlen; i++) 00073 { 00074 float64_t v1=vec[i].entry; 00075 uint32_t seed=CHash::MurmurHash2((uint8_t*) &(vec[i].feat_index), sizeof(int32_t), 0xDEADBEAF); 00076 00077 for (int32_t j=i; j<vlen; j++) 00078 { 00079 float64_t v2=vec[j].entry; 00080 uint32_t h=CHash::MurmurHash2((uint8_t*) &(vec[j].feat_index), sizeof(int32_t), seed) & mask; 00081 float64_t v; 00082 00083 if (i==j) 00084 v=v1*v1; 00085 else 00086 v=CMath::sqrt(2.0)*v1*v2; 00087 00088 result+=v*vec2[h]; 00089 } 00090 } 00091 } 00092 else if (m_degree==3) 00093 SG_NOTIMPLEMENTED; 00094 } 00095 00096 if (m_normalize) 00097 result/=m_normalization_values[vec_idx1]; 00098 00099 m_feat->free_feature_vector(vec, vlen, do_free); 00100 return result; 00101 } 00102 00103 void CSparsePolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val) 00104 { 00105 if (vec2_len != m_output_dimensions) 00106 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions); 00107 00108 int32_t vlen; 00109 bool do_free; 00110 TSparseEntry<float64_t>* vec = m_feat->get_sparse_feature_vector(vec_idx1, vlen, do_free); 00111 00112 float64_t norm_val=1.0; 00113 if (m_normalize) 00114 norm_val = m_normalization_values[vec_idx1]; 00115 alpha/=norm_val; 00116 00117 if (m_degree==2) 00118 { 00119 /* (a+b)^2 = a^2 + 2ab +b^2 */ 00120 for (int32_t i=0; i<vlen; i++) 00121 { 00122 float64_t v1=vec[i].entry; 00123 uint32_t seed=CHash::MurmurHash2((uint8_t*) &(vec[i].feat_index), sizeof(int32_t), 0xDEADBEAF); 00124 00125 for (int32_t j=i; j<vlen; j++) 00126 { 00127 float64_t v2=vec[j].entry; 00128 uint32_t h=CHash::MurmurHash2((uint8_t*) &(vec[j].feat_index), sizeof(int32_t), seed) & mask; 00129 float64_t v; 00130 00131 if (i==j) 00132 v=alpha*v1*v1; 00133 else 00134 v=alpha*CMath::sqrt(2.0)*v1*v2; 00135 00136 if (abs_val) 00137 vec2[h]+=CMath::abs(v); 00138 else 00139 vec2[h]+=v; 00140 } 00141 } 00142 } 00143 else if (m_degree==3) 00144 SG_NOTIMPLEMENTED; 00145 00146 m_feat->free_feature_vector(vec, vlen, do_free); 00147 } 00148 00149 void CSparsePolyFeatures::store_normalization_values() 00150 { 00151 delete[] m_normalization_values; 00152 00153 int32_t num_vec = this->get_num_vectors(); 00154 00155 m_normalization_values=new float64_t[num_vec]; 00156 for (int i=0; i<num_vec; i++) 00157 { 00158 float64_t val = CMath::sqrt(dot(i,i)); 00159 if (val==0) 00160 // trap division by zero 00161 m_normalization_values[i]=1.0; 00162 else 00163 m_normalization_values[i]=val; 00164 } 00165 00166 } 00167 00168 CFeatures* CSparsePolyFeatures::duplicate() const 00169 { 00170 return new CSparsePolyFeatures(*this); 00171 }