|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009 Soeren Sonnenburg 00008 * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include "features/ExplicitSpecFeatures.h" 00012 #include "lib/io.h" 00013 00014 using namespace shogun; 00015 00016 CExplicitSpecFeatures::CExplicitSpecFeatures(CStringFeatures<uint16_t>* str, bool normalize) : CDotFeatures() 00017 { 00018 ASSERT(str); 00019 00020 use_normalization=normalize; 00021 num_strings = str->get_num_vectors(); 00022 spec_size = str->get_num_symbols(); 00023 00024 obtain_kmer_spectrum(str); 00025 00026 SG_DEBUG("SPEC size=%d, num_str=%d\n", spec_size, num_strings); 00027 } 00028 00029 CExplicitSpecFeatures::CExplicitSpecFeatures(const CExplicitSpecFeatures& orig) : CDotFeatures(orig), 00030 num_strings(orig.num_strings), alphabet_size(orig.alphabet_size), spec_size(orig.spec_size) 00031 { 00032 k_spectrum= new float64_t*[num_strings]; 00033 for (int32_t i=0; i<num_strings; i++) 00034 k_spectrum[i]=CMath::clone_vector(k_spectrum[i], spec_size); 00035 } 00036 00037 CExplicitSpecFeatures::~CExplicitSpecFeatures() 00038 { 00039 delete_kmer_spectrum(); 00040 } 00041 00042 float64_t CExplicitSpecFeatures::dot(int32_t vec_idx1, int32_t vec_idx2) 00043 { 00044 ASSERT(vec_idx1 < num_strings); 00045 ASSERT(vec_idx2 < num_strings); 00046 float64_t* vec1=k_spectrum[vec_idx1]; 00047 float64_t* vec2=k_spectrum[vec_idx2]; 00048 00049 return CMath::dot(vec1, vec2, spec_size); 00050 } 00051 00052 float64_t CExplicitSpecFeatures::dense_dot(int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00053 { 00054 ASSERT(vec2_len == spec_size); 00055 ASSERT(vec_idx1 < num_strings); 00056 float64_t* vec1=k_spectrum[vec_idx1]; 00057 float64_t result=0; 00058 00059 for (int32_t i=0; i<spec_size; i++) 00060 result+=vec1[i]*vec2[i]; 00061 00062 return result; 00063 } 00064 00065 void CExplicitSpecFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val) 00066 { 00067 ASSERT(vec2_len == spec_size); 00068 ASSERT(vec_idx1 < num_strings); 00069 float64_t* vec1=k_spectrum[vec_idx1]; 00070 00071 if (abs_val) 00072 { 00073 for (int32_t i=0; i<spec_size; i++) 00074 vec2[i]+=alpha*CMath::abs(vec1[i]); 00075 } 00076 else 00077 { 00078 for (int32_t i=0; i<spec_size; i++) 00079 vec2[i]+=alpha*vec1[i]; 00080 } 00081 } 00082 00083 void CExplicitSpecFeatures::obtain_kmer_spectrum(CStringFeatures<uint16_t>* str) 00084 { 00085 k_spectrum= new float64_t*[num_strings]; 00086 00087 for (int32_t i=0; i<num_strings; i++) 00088 { 00089 k_spectrum[i]=new float64_t[spec_size]; 00090 memset(k_spectrum[i], 0, sizeof(float64_t)*spec_size); 00091 00092 int32_t len=0; 00093 bool free_fv; 00094 uint16_t* fv=str->get_feature_vector(i, len, free_fv); 00095 00096 for (int32_t j=0; j<len; j++) 00097 k_spectrum[i][fv[j]]++; 00098 00099 str->free_feature_vector(fv, i, free_fv); 00100 00101 if (use_normalization) 00102 { 00103 float64_t n=0; 00104 for (int32_t j=0; j<spec_size; j++) 00105 n+=CMath::sq(k_spectrum[i][j]); 00106 00107 n=CMath::sqrt(n); 00108 00109 for (int32_t j=0; j<spec_size; j++) 00110 k_spectrum[i][j]/=n; 00111 } 00112 } 00113 } 00114 00115 void CExplicitSpecFeatures::delete_kmer_spectrum() 00116 { 00117 for (int32_t i=0; i<num_strings; i++) 00118 delete[] k_spectrum[i]; 00119 00120 delete[] k_spectrum; 00121 k_spectrum=NULL; 00122 } 00123 00124 CFeatures* CExplicitSpecFeatures::duplicate() const 00125 { 00126 return new CExplicitSpecFeatures(*this); 00127 }