|
SHOGUN v0.9.3
|
00001 #include "features/PolyFeatures.h" 00002 00003 using namespace shogun; 00004 00005 CPolyFeatures::CPolyFeatures(CSimpleFeatures<float64_t>* feat, int32_t degree, bool normalize) 00006 : CDotFeatures(), m_multi_index(NULL), m_multinomial_coefficients(NULL), 00007 m_normalization_values(NULL) 00008 { 00009 ASSERT(feat); 00010 00011 m_feat = feat; 00012 SG_REF(m_feat); 00013 m_degree=degree; 00014 m_normalize=normalize; 00015 m_input_dimensions=feat->get_num_features(); 00016 m_output_dimensions=calc_feature_space_dimensions(m_input_dimensions, m_degree); 00017 00018 store_multi_index(); 00019 store_multinomial_coefficients(); 00020 if (m_normalize) 00021 store_normalization_values(); 00022 } 00023 00024 CPolyFeatures::~CPolyFeatures() 00025 { 00026 delete[] m_multi_index; 00027 delete[] m_multinomial_coefficients; 00028 delete[] m_normalization_values; 00029 SG_UNREF(m_feat); 00030 } 00031 00032 float64_t CPolyFeatures::dot(int32_t vec_idx1, int32_t vec_idx2) 00033 { 00034 int32_t len1; 00035 bool do_free1; 00036 float64_t* vec1 = m_feat->get_feature_vector(vec_idx1, len1, do_free1); 00037 00038 int32_t len2; 00039 bool do_free2; 00040 float64_t* vec2 = m_feat->get_feature_vector(vec_idx2, len2, do_free2); 00041 00042 float64_t sum=0; 00043 int cnt=0; 00044 for (int j=0; j<m_output_dimensions; j++) 00045 { 00046 float64_t out1=m_multinomial_coefficients[j]; 00047 float64_t out2=m_multinomial_coefficients[j]; 00048 for (int k=0; k<m_degree; k++) 00049 { 00050 out1*=vec1[m_multi_index[cnt]]; 00051 out2*=vec2[m_multi_index[cnt]]; 00052 cnt++; 00053 } 00054 sum+=out1*out2; 00055 } 00056 m_feat->free_feature_vector(vec1, len1, do_free1); 00057 m_feat->free_feature_vector(vec2, len2, do_free2); 00058 00059 return sum; 00060 } 00061 00062 float64_t CPolyFeatures::dense_dot(int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00063 { 00064 if (vec2_len != m_output_dimensions) 00065 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions); 00066 00067 int32_t len; 00068 bool do_free; 00069 float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free); 00070 00071 00072 int cnt=0; 00073 float64_t sum=0; 00074 for (int j=0; j<vec2_len; j++) 00075 { 00076 float64_t output=m_multinomial_coefficients[j]; 00077 for (int k=0; k<m_degree; k++) 00078 { 00079 output*=vec[m_multi_index[cnt]]; 00080 cnt++; 00081 } 00082 sum+=output*vec2[j]; 00083 } 00084 if (m_normalize) 00085 sum = sum/m_normalization_values[vec_idx1]; 00086 00087 m_feat->free_feature_vector(vec, len, do_free); 00088 return sum; 00089 } 00090 void CPolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val) 00091 { 00092 if (vec2_len != m_output_dimensions) 00093 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions); 00094 00095 int32_t len; 00096 bool do_free; 00097 float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free); 00098 00099 00100 int cnt=0; 00101 float32_t norm_val=1; 00102 if (m_normalize) 00103 norm_val = m_normalization_values[vec_idx1]; 00104 alpha/=norm_val; 00105 for (int j=0; j<vec2_len; j++) 00106 { 00107 float64_t output=m_multinomial_coefficients[j]; 00108 for (int k=0; k<m_degree; k++) 00109 { 00110 output*=vec[m_multi_index[cnt]]; 00111 cnt++; 00112 } 00113 if (abs_val) 00114 output=CMath::abs(output); 00115 00116 vec2[j]+=alpha*output; 00117 } 00118 m_feat->free_feature_vector(vec, len, do_free); 00119 } 00120 void CPolyFeatures::store_normalization_values() 00121 { 00122 delete[] m_normalization_values; 00123 00124 int32_t num_vec = this->get_num_vectors(); 00125 00126 m_normalization_values=new float32_t[num_vec]; 00127 for (int i=0; i<num_vec; i++) 00128 { 00129 float64_t tmp = CMath::sqrt(dot(i,i)); 00130 if (tmp==0) 00131 // trap division by zero 00132 m_normalization_values[i]=1; 00133 else 00134 m_normalization_values[i]=tmp; 00135 } 00136 00137 } 00138 00139 void CPolyFeatures::store_multi_index() 00140 { 00141 delete[] m_multi_index; 00142 00143 m_multi_index=new uint16_t[m_output_dimensions*m_degree]; 00144 00145 uint16_t* exponents = new uint16_t[m_input_dimensions]; 00146 if (!exponents) 00147 SG_ERROR( "Error allocating mem \n"); 00148 /*copy adress: otherwise it will be overwritten in recursion*/ 00149 uint16_t* index = m_multi_index; 00150 enumerate_multi_index(0, &index, exponents, m_degree); 00151 00152 delete[] exponents; 00153 } 00154 00155 void CPolyFeatures::enumerate_multi_index(const int32_t feat_idx, uint16_t** index, uint16_t* exponents, const int32_t degree) 00156 { 00157 if (feat_idx==m_input_dimensions-1 || degree==0) 00158 { 00159 if (feat_idx==m_input_dimensions-1) 00160 exponents[feat_idx] = degree; 00161 if (degree==0) 00162 exponents[feat_idx] = 0; 00163 int32_t i, j; 00164 for (j=0; j<feat_idx+1; j++) 00165 for (i=0; i<exponents[j]; i++) 00166 { 00167 **index = j; 00168 (*index)++; 00169 } 00170 exponents[feat_idx] = 0; 00171 return; 00172 } 00173 int32_t k; 00174 for (k=0; k<=degree; k++) 00175 { 00176 exponents[feat_idx] = k; 00177 enumerate_multi_index(feat_idx+1, index, exponents, degree-k); 00178 } 00179 return; 00180 00181 } 00182 00183 void CPolyFeatures::store_multinomial_coefficients() 00184 { 00185 delete[] m_multinomial_coefficients; 00186 00187 m_multinomial_coefficients = new float64_t[m_output_dimensions]; 00188 int32_t* exponents = new int32_t[m_input_dimensions]; 00189 if (!exponents) 00190 SG_ERROR( "Error allocating mem \n"); 00191 int32_t j=0; 00192 for (j=0; j<m_input_dimensions; j++) 00193 exponents[j] = 0; 00194 int32_t k, cnt=0; 00195 for (j=0; j<m_output_dimensions; j++) 00196 { 00197 for (k=0; k<m_degree; k++) 00198 { 00199 exponents[m_multi_index[cnt]] ++; 00200 cnt++; 00201 } 00202 m_multinomial_coefficients[j] = sqrt((double) multinomialcoef(exponents, m_input_dimensions)); 00203 for (k=0; k<m_input_dimensions; k++) 00204 { 00205 exponents[k]=0; 00206 } 00207 } 00208 delete[] exponents; 00209 } 00210 00211 int32_t CPolyFeatures::bico2(int32_t n, int32_t k) 00212 { 00213 00214 /* for this problem k is usually small (<=degree), 00215 * thus it is efficient to 00216 * to use recursion and prune end recursions*/ 00217 if (n<k) 00218 return 0; 00219 if (k>n/2) 00220 k = n-k; 00221 if (k<0) 00222 return 0; 00223 if (k==0) 00224 return 1; 00225 if (k==1) 00226 return n; 00227 if (k<4) 00228 return bico2(n-1, k-1)+bico2(n-1, k); 00229 00230 /* call function as implemented in numerical recipies: 00231 * much more efficient for large binomial coefficients*/ 00232 return bico(n, k); 00233 00234 } 00235 00236 int32_t CPolyFeatures::calc_feature_space_dimensions(int32_t N, int32_t D) 00237 { 00238 if (N==1) 00239 return 1; 00240 if (D==0) 00241 return 1; 00242 int32_t d; 00243 int32_t ret = 0; 00244 for (d=0; d<=D; d++) 00245 ret += calc_feature_space_dimensions(N-1, d); 00246 00247 return ret; 00248 } 00249 00250 int32_t CPolyFeatures::multinomialcoef(int32_t* exps, int32_t len) 00251 { 00252 int32_t ret = 1, i; 00253 int32_t n = 0; 00254 for (i=0; i<len; i++) 00255 { 00256 n += exps[i]; 00257 ret *= bico2(n, exps[i]); 00258 } 00259 return ret; 00260 } 00261 00262 /* gammln as implemented in the 00263 * second edition of Numerical Recipes in C */ 00264 float64_t CPolyFeatures::gammln(float64_t xx) 00265 { 00266 float64_t x,y,tmp,ser; 00267 static float64_t cof[6]={76.18009172947146, -86.50532032941677, 00268 24.01409824083091, -1.231739572450155, 00269 0.1208650973866179e-2,-0.5395239384953e-5}; 00270 int32_t j; 00271 00272 y=x=xx; 00273 tmp=x+5.5; 00274 tmp -= (x+0.5)*log(tmp); 00275 ser=1.000000000190015; 00276 for (j=0;j<=5;j++) ser += cof[j]/++y; 00277 return -tmp+log(2.5066282746310005*ser/x); 00278 } 00279 00280 float64_t CPolyFeatures::factln(int32_t n) 00281 { 00282 static float64_t a[101]; 00283 00284 if (n < 0) SG_ERROR("Negative factorial in routine factln\n"); 00285 if (n <= 1) return 0.0; 00286 if (n <= 100) return a[n] ? a[n] : (a[n]=gammln(n+1.0)); 00287 else return gammln(n+1.0); 00288 } 00289 00290 int32_t CPolyFeatures::bico(int32_t n, int32_t k) 00291 { 00292 /* use floor to clean roundoff errors*/ 00293 return (int32_t) floor(0.5+exp(factln(n)-factln(k)-factln(n-k))); 00294 } 00295 CFeatures* CPolyFeatures::duplicate() const 00296 { 00297 return new CPolyFeatures(*this); 00298 }