|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include "features/RealFileFeatures.h" 00012 #include "features/Features.h" 00013 #include "lib/io.h" 00014 00015 #include <stdio.h> 00016 #include <string.h> 00017 00018 using namespace shogun; 00019 00020 CRealFileFeatures::CRealFileFeatures(int32_t size, char* fname) 00021 : CSimpleFeatures<float64_t>(size) 00022 { 00023 working_file=fopen(fname, "r"); 00024 working_filename=strdup(fname); 00025 ASSERT(working_file); 00026 intlen=0; 00027 doublelen=0; 00028 endian=0; 00029 fourcc=0; 00030 preprocd=0; 00031 labels=NULL; 00032 status=load_base_data(); 00033 } 00034 00035 CRealFileFeatures::CRealFileFeatures(int32_t size, FILE* file) 00036 : CSimpleFeatures<float64_t>(size), working_file(file), working_filename(NULL) 00037 { 00038 ASSERT(working_file); 00039 intlen=0; 00040 doublelen=0; 00041 endian=0; 00042 fourcc=0; 00043 preprocd=0; 00044 labels=NULL; 00045 status=load_base_data(); 00046 } 00047 00048 CRealFileFeatures::~CRealFileFeatures() 00049 { 00050 delete[] feature_matrix; 00051 delete[] working_filename; 00052 delete[] labels; 00053 } 00054 00055 CRealFileFeatures::CRealFileFeatures(const CRealFileFeatures & orig) 00056 : CSimpleFeatures<float64_t>(orig), working_file(orig.working_file), status(orig.status) 00057 { 00058 if (orig.working_filename) 00059 working_filename=strdup(orig.working_filename); 00060 if (orig.labels && get_num_vectors()) 00061 { 00062 labels=new int32_t[get_num_vectors()]; 00063 memcpy(labels, orig.labels, sizeof(int32_t)*get_num_vectors()); 00064 } 00065 } 00066 00067 float64_t* CRealFileFeatures::compute_feature_vector( 00068 int32_t num, int32_t &len, float64_t* target) 00069 { 00070 ASSERT(num<num_vectors); 00071 len=num_features; 00072 float64_t* featurevector=target; 00073 if (!featurevector) 00074 featurevector=new float64_t[num_features]; 00075 ASSERT(working_file); 00076 fseek(working_file, filepos+num_features*doublelen*num, SEEK_SET); 00077 ASSERT(fread(featurevector, doublelen, num_features, working_file)==(size_t) num_features); 00078 return featurevector; 00079 } 00080 00081 float64_t* CRealFileFeatures::load_feature_matrix() 00082 { 00083 ASSERT(working_file); 00084 fseek(working_file, filepos, SEEK_SET); 00085 delete[] feature_matrix; 00086 00087 SG_INFO( "allocating feature matrix of size %.2fM\n", sizeof(double)*num_features*num_vectors/1024.0/1024.0); 00088 free_feature_matrix(); 00089 feature_matrix=new float64_t[num_features*num_vectors]; 00090 00091 SG_INFO( "loading... be patient.\n"); 00092 00093 for (int32_t i=0; i<(int32_t) num_vectors; i++) 00094 { 00095 if (!(i % (num_vectors/10+1))) 00096 SG_PRINT( "%02d%%.", (int) (100.0*i/num_vectors)); 00097 else if (!(i % (num_vectors/200+1))) 00098 SG_PRINT( "."); 00099 00100 ASSERT(fread(&feature_matrix[num_features*i], doublelen, num_features, working_file)==(size_t) num_features); 00101 } 00102 SG_DONE(); 00103 00104 return feature_matrix; 00105 } 00106 00107 int32_t CRealFileFeatures::get_label(int32_t idx) 00108 { 00109 ASSERT(idx<num_vectors); 00110 if (labels) 00111 return labels[idx]; 00112 return 0; 00113 } 00114 00115 bool CRealFileFeatures::load_base_data() 00116 { 00117 ASSERT(working_file); 00118 uint32_t num_vec=0; 00119 uint32_t num_feat=0; 00120 00121 ASSERT(fread(&intlen, sizeof(uint8_t), 1, working_file)==1); 00122 ASSERT(fread(&doublelen, sizeof(uint8_t), 1, working_file)==1); 00123 ASSERT(fread(&endian, (uint32_t) intlen, 1, working_file)== 1); 00124 ASSERT(fread(&fourcc, (uint32_t) intlen, 1, working_file)==1); 00125 ASSERT(fread(&num_vec, (uint32_t) intlen, 1, working_file)==1); 00126 ASSERT(fread(&num_feat, (uint32_t) intlen, 1, working_file)==1); 00127 ASSERT(fread(&preprocd, (uint32_t) intlen, 1, working_file)==1); 00128 SG_INFO( "detected: intsize=%d, doublesize=%d, num_vec=%d, num_feat=%d, preprocd=%d\n", intlen, doublelen, num_vec, num_feat, preprocd); 00129 filepos=ftell(working_file); 00130 set_num_vectors(num_vec); 00131 set_num_features(num_feat); 00132 fseek(working_file, filepos+num_features*num_vectors*doublelen, SEEK_SET); 00133 delete[] labels; 00134 labels=new int[num_vec]; 00135 ASSERT(fread(labels, intlen, num_vec, working_file) == num_vec); 00136 return true; 00137 }