|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Soeren Sonnenburg 00008 * Copyright (C) 2010 Berlin Institute of Technology 00009 */ 00010 00011 #include "features/SparseFeatures.h" 00012 #include "lib/File.h" 00013 #include "lib/AsciiFile.h" 00014 #include "lib/Mathematics.h" 00015 00016 using namespace shogun; 00017 00018 CAsciiFile::CAsciiFile(FILE* f, const char* name) : CFile(f, name) 00019 { 00020 } 00021 00022 CAsciiFile::CAsciiFile(char* fname, char rw, const char* name) : CFile(fname, rw, name) 00023 { 00024 } 00025 00026 CAsciiFile::~CAsciiFile() 00027 { 00028 } 00029 00030 #define GET_VECTOR(fname, mfname, sg_type) \ 00031 void CAsciiFile::fname(sg_type*& vec, int32_t& len) \ 00032 { \ 00033 vec=NULL; \ 00034 len=0; \ 00035 int32_t num_feat=0; \ 00036 int32_t num_vec=0; \ 00037 mfname(vec, num_feat, num_vec); \ 00038 if ((num_feat==1) || (num_vec==1)) \ 00039 { \ 00040 if (num_feat==1) \ 00041 len=num_vec; \ 00042 else \ 00043 len=num_feat; \ 00044 } \ 00045 else \ 00046 { \ 00047 delete[] vec; \ 00048 vec=NULL; \ 00049 len=0; \ 00050 SG_ERROR("Could not read vector from" \ 00051 " file %s (shape %dx%d found but " \ 00052 "vector expected).\n", filename, \ 00053 num_vec, num_feat); \ 00054 } \ 00055 } 00056 00057 GET_VECTOR(get_byte_vector, get_byte_matrix, uint8_t) 00058 GET_VECTOR(get_char_vector, get_char_matrix, char) 00059 GET_VECTOR(get_int_vector, get_int_matrix, int32_t) 00060 GET_VECTOR(get_shortreal_vector, get_shortreal_matrix, float32_t) 00061 GET_VECTOR(get_real_vector, get_real_matrix, float64_t) 00062 GET_VECTOR(get_short_vector, get_short_matrix, int16_t) 00063 GET_VECTOR(get_word_vector, get_word_matrix, uint16_t) 00064 #undef GET_VECTOR 00065 00066 #define GET_MATRIX(fname, conv, sg_type) \ 00067 void CAsciiFile::fname(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \ 00068 { \ 00069 struct stat stats; \ 00070 if (stat(filename, &stats)!=0) \ 00071 SG_ERROR("Could not get file statistics.\n"); \ 00072 \ 00073 char* data=new char[stats.st_size+1]; \ 00074 memset(data, 0, sizeof(char)*(stats.st_size+1)); \ 00075 size_t nread=fread(data, sizeof(char), stats.st_size, file); \ 00076 if (nread<=0) \ 00077 SG_ERROR("Could not read data from %s.\n", filename); \ 00078 \ 00079 SG_DEBUG("data read from file:\n%s\n", data); \ 00080 \ 00081 /* determine num_feat and num_vec, populate dynamic array */ \ 00082 int32_t nf=0; \ 00083 num_feat=0; \ 00084 num_vec=0; \ 00085 char* ptr_item=NULL; \ 00086 char* ptr_data=data; \ 00087 CDynamicArray<char*>* items=new CDynamicArray<char*>(); \ 00088 \ 00089 while (*ptr_data) \ 00090 { \ 00091 if (*ptr_data=='\n') \ 00092 { \ 00093 if (ptr_item) \ 00094 nf++; \ 00095 \ 00096 if (num_feat!=0 && nf!=num_feat) \ 00097 SG_ERROR("Number of features mismatches (%d != %d) in vector" \ 00098 " %d in file %s.\n", num_feat, nf, num_vec, filename); \ 00099 \ 00100 append_item(items, ptr_data, ptr_item); \ 00101 num_feat=nf; \ 00102 num_vec++; \ 00103 nf=0; \ 00104 ptr_item=NULL; \ 00105 } \ 00106 else if (!isblank(*ptr_data) && !ptr_item) \ 00107 { \ 00108 ptr_item=ptr_data; \ 00109 } \ 00110 else if (isblank(*ptr_data) && ptr_item) \ 00111 { \ 00112 append_item(items, ptr_data, ptr_item); \ 00113 ptr_item=NULL; \ 00114 nf++; \ 00115 } \ 00116 \ 00117 ptr_data++; \ 00118 } \ 00119 \ 00120 SG_DEBUG("num feat: %d, num_vec %d\n", num_feat, num_vec); \ 00121 delete[] data; \ 00122 \ 00123 /* now copy data into matrix */ \ 00124 matrix=new sg_type[num_vec*num_feat]; \ 00125 for (int32_t i=0; i<num_vec; i++) \ 00126 { \ 00127 for (int32_t j=0; j<num_feat; j++) \ 00128 { \ 00129 char* item=items->get_element(i*num_feat+j); \ 00130 matrix[i*num_feat+j]=conv(item); \ 00131 delete[] item; \ 00132 } \ 00133 } \ 00134 delete items; \ 00135 } 00136 00137 GET_MATRIX(get_byte_matrix, atoi, uint8_t) 00138 GET_MATRIX(get_char_matrix, atoi, char) 00139 GET_MATRIX(get_int_matrix, atoi, int32_t) 00140 GET_MATRIX(get_uint_matrix, atoi, uint32_t) 00141 GET_MATRIX(get_long_matrix, atoll, int64_t) 00142 GET_MATRIX(get_ulong_matrix, atoll, uint64_t) 00143 GET_MATRIX(get_shortreal_matrix, atof, float32_t) 00144 GET_MATRIX(get_real_matrix, atof, float64_t) 00145 GET_MATRIX(get_longreal_matrix, atof, floatmax_t) 00146 GET_MATRIX(get_short_matrix, atoi, int16_t) 00147 GET_MATRIX(get_word_matrix, atoi, uint16_t) 00148 #undef GET_MATRIX 00149 00150 void CAsciiFile::get_byte_ndarray(uint8_t*& array, int32_t*& dims, int32_t& num_dims) 00151 { 00152 } 00153 00154 void CAsciiFile::get_char_ndarray(char*& array, int32_t*& dims, int32_t& num_dims) 00155 { 00156 } 00157 00158 void CAsciiFile::get_int_ndarray(int32_t*& array, int32_t*& dims, int32_t& num_dims) 00159 { 00160 } 00161 00162 void CAsciiFile::get_shortreal_ndarray(float32_t*& array, int32_t*& dims, int32_t& num_dims) 00163 { 00164 } 00165 00166 void CAsciiFile::get_real_ndarray(float64_t*& array, int32_t*& dims, int32_t& num_dims) 00167 { 00168 } 00169 00170 void CAsciiFile::get_short_ndarray(int16_t*& array, int32_t*& dims, int32_t& num_dims) 00171 { 00172 } 00173 00174 void CAsciiFile::get_word_ndarray(uint16_t*& array, int32_t*& dims, int32_t& num_dims) 00175 { 00176 } 00177 00178 #define GET_SPARSEMATRIX(fname, conv, sg_type) \ 00179 void CAsciiFile::fname(TSparse<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \ 00180 { \ 00181 size_t blocksize=1024*1024; \ 00182 size_t required_blocksize=blocksize; \ 00183 uint8_t* dummy=new uint8_t[blocksize]; \ 00184 \ 00185 if (file) \ 00186 { \ 00187 num_vec=0; \ 00188 num_feat=0; \ 00189 \ 00190 SG_INFO("counting line numbers in file %s\n", filename); \ 00191 size_t sz=blocksize; \ 00192 size_t block_offs=0; \ 00193 size_t old_block_offs=0; \ 00194 fseek(file, 0, SEEK_END); \ 00195 size_t fsize=ftell(file); \ 00196 rewind(file); \ 00197 \ 00198 while (sz == blocksize) \ 00199 { \ 00200 sz=fread(dummy, sizeof(uint8_t), blocksize, file); \ 00201 bool contains_cr=false; \ 00202 for (size_t i=0; i<sz; i++) \ 00203 { \ 00204 block_offs++; \ 00205 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) \ 00206 { \ 00207 num_vec++; \ 00208 contains_cr=true; \ 00209 required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs+1); \ 00210 old_block_offs=block_offs; \ 00211 } \ 00212 } \ 00213 SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t"); \ 00214 } \ 00215 \ 00216 SG_INFO("found %d feature vectors\n", num_vec); \ 00217 delete[] dummy; \ 00218 blocksize=required_blocksize; \ 00219 dummy = new uint8_t[blocksize+1]; /*allow setting of '\0' at EOL*/ \ 00220 matrix=new TSparse<sg_type>[num_vec]; \ 00221 \ 00222 rewind(file); \ 00223 sz=blocksize; \ 00224 int32_t lines=0; \ 00225 while (sz == blocksize) \ 00226 { \ 00227 sz=fread(dummy, sizeof(uint8_t), blocksize, file); \ 00228 \ 00229 size_t old_sz=0; \ 00230 for (size_t i=0; i<sz; i++) \ 00231 { \ 00232 if (i==sz-1 && dummy[i]!='\n' && sz==blocksize) \ 00233 { \ 00234 size_t len=i-old_sz+1; \ 00235 uint8_t* data=&dummy[old_sz]; \ 00236 \ 00237 for (size_t j=0; j<len; j++) \ 00238 dummy[j]=data[j]; \ 00239 \ 00240 sz=fread(dummy+len, sizeof(uint8_t), blocksize-len, file); \ 00241 i=0; \ 00242 old_sz=0; \ 00243 sz+=len; \ 00244 } \ 00245 \ 00246 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) \ 00247 { \ 00248 \ 00249 size_t len=i-old_sz; \ 00250 uint8_t* data=&dummy[old_sz]; \ 00251 \ 00252 int32_t dims=0; \ 00253 for (size_t j=0; j<len; j++) \ 00254 { \ 00255 if (data[j]==':') \ 00256 dims++; \ 00257 } \ 00258 \ 00259 if (dims<=0) \ 00260 { \ 00261 SG_ERROR("Error in line %d - number of" \ 00262 " dimensions is %d line is %d characters" \ 00263 " long\n line_content:'%.*s'\n", lines, \ 00264 dims, len, len, (const char*) data); \ 00265 } \ 00266 \ 00267 TSparseEntry<sg_type>* feat=new TSparseEntry<sg_type>[dims]; \ 00268 \ 00269 /* skip label part */ \ 00270 size_t j=0; \ 00271 for (; j<len; j++) \ 00272 { \ 00273 if (data[j]==':') \ 00274 { \ 00275 j=-1; /* file without label*/ \ 00276 break; \ 00277 } \ 00278 \ 00279 if (data[j]==' ') \ 00280 { \ 00281 data[j]='\0'; \ 00282 \ 00283 /* skip label part */ \ 00284 break; \ 00285 } \ 00286 } \ 00287 \ 00288 int32_t d=0; \ 00289 j++; \ 00290 uint8_t* start=&data[j]; \ 00291 for (; j<len; j++) \ 00292 { \ 00293 if (data[j]==':') \ 00294 { \ 00295 data[j]='\0'; \ 00296 \ 00297 feat[d].feat_index=(int32_t) atoi((const char*) start)-1; \ 00298 num_feat=CMath::max(num_feat, feat[d].feat_index+1); \ 00299 \ 00300 j++; \ 00301 start=&data[j]; \ 00302 for (; j<len; j++) \ 00303 { \ 00304 if (data[j]==' ' || data[j]=='\n') \ 00305 { \ 00306 data[j]='\0'; \ 00307 feat[d].entry=(sg_type) conv((const char*) start); \ 00308 d++; \ 00309 break; \ 00310 } \ 00311 } \ 00312 \ 00313 if (j==len) \ 00314 { \ 00315 data[j]='\0'; \ 00316 feat[dims-1].entry=(sg_type) conv((const char*) start); \ 00317 } \ 00318 \ 00319 j++; \ 00320 start=&data[j]; \ 00321 } \ 00322 } \ 00323 \ 00324 matrix[lines].vec_index=lines; \ 00325 matrix[lines].num_feat_entries=dims; \ 00326 matrix[lines].features=feat; \ 00327 \ 00328 old_sz=i+1; \ 00329 lines++; \ 00330 SG_PROGRESS(lines, 0, num_vec, 1, "LOADING:\t"); \ 00331 } \ 00332 } \ 00333 } \ 00334 \ 00335 SG_INFO("file successfully read\n"); \ 00336 } \ 00337 \ 00338 delete[] dummy; \ 00339 } 00340 00341 GET_SPARSEMATRIX(get_bool_sparsematrix, atoi, bool) 00342 GET_SPARSEMATRIX(get_byte_sparsematrix, atoi, uint8_t) 00343 GET_SPARSEMATRIX(get_char_sparsematrix, atoi, char) 00344 GET_SPARSEMATRIX(get_int_sparsematrix, atoi, int32_t) 00345 GET_SPARSEMATRIX(get_uint_sparsematrix, atoi, uint32_t) 00346 GET_SPARSEMATRIX(get_long_sparsematrix, atoll, int64_t) 00347 GET_SPARSEMATRIX(get_ulong_sparsematrix, atoll, uint64_t) 00348 GET_SPARSEMATRIX(get_shortreal_sparsematrix, atof, float32_t) 00349 GET_SPARSEMATRIX(get_real_sparsematrix, atof, float64_t) 00350 GET_SPARSEMATRIX(get_longreal_sparsematrix, atof, floatmax_t) 00351 GET_SPARSEMATRIX(get_short_sparsematrix, atoi, int16_t) 00352 GET_SPARSEMATRIX(get_word_sparsematrix, atoi, uint16_t) 00353 #undef GET_SPARSEMATRIX 00354 00355 00356 void CAsciiFile::get_byte_string_list(T_STRING<uint8_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00357 { 00358 size_t blocksize=1024*1024; 00359 size_t required_blocksize=0; 00360 uint8_t* dummy=new uint8_t[blocksize]; 00361 uint8_t* overflow=NULL; 00362 int32_t overflow_len=0; 00363 00364 if (file) 00365 { 00366 num_str=0; 00367 max_string_len=0; 00368 00369 SG_INFO("counting line numbers in file %s\n", filename); 00370 size_t sz=blocksize; 00371 size_t block_offs=0; 00372 size_t old_block_offs=0; 00373 fseek(file, 0, SEEK_END); 00374 size_t fsize=ftell(file); 00375 rewind(file); 00376 00377 while (sz == blocksize) 00378 { 00379 sz=fread(dummy, sizeof(uint8_t), blocksize, file); 00380 bool contains_cr=false; 00381 for (size_t i=0; i<sz; i++) 00382 { 00383 block_offs++; 00384 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) 00385 { 00386 num_str++; 00387 contains_cr=true; 00388 required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs); 00389 old_block_offs=block_offs; 00390 } 00391 } 00392 SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t"); 00393 } 00394 00395 SG_INFO("found %d strings\n", num_str); 00396 SG_DEBUG("block_size=%d\n", required_blocksize); 00397 delete[] dummy; 00398 blocksize=required_blocksize; 00399 dummy=new uint8_t[blocksize]; 00400 overflow=new uint8_t[blocksize]; 00401 strings=new T_STRING<uint8_t>[num_str]; 00402 00403 rewind(file); 00404 sz=blocksize; 00405 int32_t lines=0; 00406 size_t old_sz=0; 00407 while (sz == blocksize) 00408 { 00409 sz=fread(dummy, sizeof(uint8_t), blocksize, file); 00410 00411 old_sz=0; 00412 for (size_t i=0; i<sz; i++) 00413 { 00414 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) 00415 { 00416 int32_t len=i-old_sz; 00417 max_string_len=CMath::max(max_string_len, len+overflow_len); 00418 00419 strings[lines].length=len+overflow_len; 00420 strings[lines].string=new uint8_t[len+overflow_len]; 00421 00422 for (int32_t j=0; j<overflow_len; j++) 00423 strings[lines].string[j]=overflow[j]; 00424 for (int32_t j=0; j<len; j++) 00425 strings[lines].string[j+overflow_len]=dummy[old_sz+j]; 00426 00427 // clear overflow 00428 overflow_len=0; 00429 00430 //CMath::display_vector(strings[lines].string, len); 00431 old_sz=i+1; 00432 lines++; 00433 SG_PROGRESS(lines, 0, num_str, 1, "LOADING:\t"); 00434 } 00435 } 00436 00437 for (size_t i=old_sz; i<sz; i++) 00438 overflow[i-old_sz]=dummy[i]; 00439 00440 overflow_len=sz-old_sz; 00441 } 00442 SG_INFO("file successfully read\n"); 00443 SG_INFO("max_string_length=%d\n", max_string_len); 00444 SG_INFO("num_strings=%d\n", num_str); 00445 } 00446 00447 delete[] dummy; 00448 delete[] overflow; 00449 } 00450 00451 void CAsciiFile::get_char_string_list(T_STRING<char>*& strings, int32_t& num_str, int32_t& max_string_len) 00452 { 00453 size_t blocksize=1024*1024; 00454 size_t required_blocksize=0; 00455 char* dummy=new char[blocksize]; 00456 char* overflow=NULL; 00457 int32_t overflow_len=0; 00458 00459 if (file) 00460 { 00461 num_str=0; 00462 max_string_len=0; 00463 00464 SG_INFO("counting line numbers in file %s\n", filename); 00465 size_t sz=blocksize; 00466 size_t block_offs=0; 00467 size_t old_block_offs=0; 00468 fseek(file, 0, SEEK_END); 00469 size_t fsize=ftell(file); 00470 rewind(file); 00471 00472 while (sz == blocksize) 00473 { 00474 sz=fread(dummy, sizeof(char), blocksize, file); 00475 bool contains_cr=false; 00476 for (size_t i=0; i<sz; i++) 00477 { 00478 block_offs++; 00479 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) 00480 { 00481 num_str++; 00482 contains_cr=true; 00483 required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs); 00484 old_block_offs=block_offs; 00485 } 00486 } 00487 SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t"); 00488 } 00489 00490 SG_INFO("found %d strings\n", num_str); 00491 SG_DEBUG("block_size=%d\n", required_blocksize); 00492 delete[] dummy; 00493 blocksize=required_blocksize; 00494 dummy=new char[blocksize]; 00495 overflow=new char[blocksize]; 00496 strings=new T_STRING<char>[num_str]; 00497 00498 rewind(file); 00499 sz=blocksize; 00500 int32_t lines=0; 00501 size_t old_sz=0; 00502 while (sz == blocksize) 00503 { 00504 sz=fread(dummy, sizeof(char), blocksize, file); 00505 00506 old_sz=0; 00507 for (size_t i=0; i<sz; i++) 00508 { 00509 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) 00510 { 00511 int32_t len=i-old_sz; 00512 max_string_len=CMath::max(max_string_len, len+overflow_len); 00513 00514 strings[lines].length=len+overflow_len; 00515 strings[lines].string=new char[len+overflow_len]; 00516 00517 for (int32_t j=0; j<overflow_len; j++) 00518 strings[lines].string[j]=overflow[j]; 00519 for (int32_t j=0; j<len; j++) 00520 strings[lines].string[j+overflow_len]=dummy[old_sz+j]; 00521 00522 // clear overflow 00523 overflow_len=0; 00524 00525 //CMath::display_vector(strings[lines].string, len); 00526 old_sz=i+1; 00527 lines++; 00528 SG_PROGRESS(lines, 0, num_str, 1, "LOADING:\t"); 00529 } 00530 } 00531 00532 for (size_t i=old_sz; i<sz; i++) 00533 overflow[i-old_sz]=dummy[i]; 00534 00535 overflow_len=sz-old_sz; 00536 } 00537 SG_INFO("file successfully read\n"); 00538 SG_INFO("max_string_length=%d\n", max_string_len); 00539 SG_INFO("num_strings=%d\n", num_str); 00540 } 00541 00542 delete[] dummy; 00543 delete[] overflow; 00544 } 00545 00546 void CAsciiFile::get_int_string_list(T_STRING<int32_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00547 { 00548 strings=NULL; 00549 num_str=0; 00550 max_string_len=0; 00551 } 00552 00553 void CAsciiFile::get_uint_string_list(T_STRING<uint32_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00554 { 00555 strings=NULL; 00556 num_str=0; 00557 max_string_len=0; 00558 } 00559 00560 void CAsciiFile::get_short_string_list(T_STRING<int16_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00561 { 00562 strings=NULL; 00563 num_str=0; 00564 max_string_len=0; 00565 } 00566 00567 void CAsciiFile::get_word_string_list(T_STRING<uint16_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00568 { 00569 strings=NULL; 00570 num_str=0; 00571 max_string_len=0; 00572 } 00573 00574 void CAsciiFile::get_long_string_list(T_STRING<int64_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00575 { 00576 strings=NULL; 00577 num_str=0; 00578 max_string_len=0; 00579 } 00580 00581 void CAsciiFile::get_ulong_string_list(T_STRING<uint64_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00582 { 00583 strings=NULL; 00584 num_str=0; 00585 max_string_len=0; 00586 } 00587 00588 void CAsciiFile::get_shortreal_string_list(T_STRING<float32_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00589 { 00590 strings=NULL; 00591 num_str=0; 00592 max_string_len=0; 00593 } 00594 00595 void CAsciiFile::get_real_string_list(T_STRING<float64_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00596 { 00597 strings=NULL; 00598 num_str=0; 00599 max_string_len=0; 00600 } 00601 00602 void CAsciiFile::get_longreal_string_list(T_STRING<floatmax_t>*& strings, int32_t& num_str, int32_t& max_string_len) 00603 { 00604 strings=NULL; 00605 num_str=0; 00606 max_string_len=0; 00607 } 00608 00609 00612 #define SET_VECTOR(fname, mfname, sg_type) \ 00613 void CAsciiFile::fname(const sg_type* vec, int32_t len) \ 00614 { \ 00615 mfname(vec, len, 1); \ 00616 } 00617 SET_VECTOR(set_byte_vector, set_byte_matrix, uint8_t) 00618 SET_VECTOR(set_char_vector, set_char_matrix, char) 00619 SET_VECTOR(set_int_vector, set_int_matrix, int32_t) 00620 SET_VECTOR(set_shortreal_vector, set_shortreal_matrix, float32_t) 00621 SET_VECTOR(set_real_vector, set_real_matrix, float64_t) 00622 SET_VECTOR(set_short_vector, set_short_matrix, int16_t) 00623 SET_VECTOR(set_word_vector, set_word_matrix, uint16_t) 00624 #undef SET_VECTOR 00625 00626 #define SET_MATRIX(fname, sg_type, fprt_type, type_str) \ 00627 void CAsciiFile::fname(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \ 00628 { \ 00629 if (!(file && matrix)) \ 00630 SG_ERROR("File or matrix invalid.\n"); \ 00631 \ 00632 for (int32_t i=0; i<num_vec; i++) \ 00633 { \ 00634 for (int32_t j=0; j<num_feat; j++) \ 00635 { \ 00636 sg_type v=matrix[num_feat*i+j]; \ 00637 if (j==num_feat-1) \ 00638 fprintf(file, type_str "\n", (fprt_type) v); \ 00639 else \ 00640 fprintf(file, type_str " ", (fprt_type) v); \ 00641 } \ 00642 } \ 00643 } 00644 SET_MATRIX(set_char_matrix, char, char, "%c") 00645 SET_MATRIX(set_byte_matrix, uint8_t, uint8_t, "%u") 00646 SET_MATRIX(set_int_matrix, int32_t, int32_t, "%i") 00647 SET_MATRIX(set_uint_matrix, uint32_t, uint32_t, "%u") 00648 SET_MATRIX(set_long_matrix, int64_t, long long int, "%lli") 00649 SET_MATRIX(set_ulong_matrix, uint64_t, long long unsigned int, "%llu") 00650 SET_MATRIX(set_short_matrix, int16_t, int16_t, "%i") 00651 SET_MATRIX(set_word_matrix, uint16_t, uint16_t, "%u") 00652 SET_MATRIX(set_shortreal_matrix, float32_t, float32_t, "%f") 00653 SET_MATRIX(set_real_matrix, float64_t, float64_t, "%f") 00654 SET_MATRIX(set_longreal_matrix, floatmax_t, floatmax_t, "%Lf") 00655 #undef SET_MATRIX 00656 00657 #define SET_SPARSEMATRIX(fname, sg_type, fprt_type, type_str) \ 00658 void CAsciiFile::fname(const TSparse<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \ 00659 { \ 00660 if (!(file && matrix)) \ 00661 SG_ERROR("File or matrix invalid.\n"); \ 00662 \ 00663 for (int32_t i=0; i<num_vec; i++) \ 00664 { \ 00665 TSparseEntry<sg_type>* vec = matrix[i].features; \ 00666 int32_t len=matrix[i].num_feat_entries; \ 00667 \ 00668 for (int32_t j=0; j<len; j++) \ 00669 { \ 00670 if (j<len-1) \ 00671 { \ 00672 fprintf(file, "%d:" type_str " ", \ 00673 (int32_t) vec[j].feat_index+1, (fprt_type) vec[j].entry); \ 00674 } \ 00675 else \ 00676 { \ 00677 fprintf(file, "%d:" type_str "\n", \ 00678 (int32_t) vec[j].feat_index+1, (fprt_type) vec[j].entry); \ 00679 } \ 00680 } \ 00681 } \ 00682 } 00683 SET_SPARSEMATRIX(set_bool_sparsematrix, bool, uint8_t, "%u") 00684 SET_SPARSEMATRIX(set_char_sparsematrix, char, char, "%c") 00685 SET_SPARSEMATRIX(set_byte_sparsematrix, uint8_t, uint8_t, "%u") 00686 SET_SPARSEMATRIX(set_int_sparsematrix, int32_t, int32_t, "%i") 00687 SET_SPARSEMATRIX(set_uint_sparsematrix, uint32_t, uint32_t, "%u") 00688 SET_SPARSEMATRIX(set_long_sparsematrix, int64_t, long long int, "%lli") 00689 SET_SPARSEMATRIX(set_ulong_sparsematrix, uint64_t, long long unsigned int, "%llu") 00690 SET_SPARSEMATRIX(set_short_sparsematrix, int16_t, int16_t, "%i") 00691 SET_SPARSEMATRIX(set_word_sparsematrix, uint16_t, uint16_t, "%u") 00692 SET_SPARSEMATRIX(set_shortreal_sparsematrix, float32_t, float32_t, "%f") 00693 SET_SPARSEMATRIX(set_real_sparsematrix, float64_t, float64_t, "%f") 00694 SET_SPARSEMATRIX(set_longreal_sparsematrix, floatmax_t, floatmax_t, "%Lf") 00695 #undef SET_SPARSEMATRIX 00696 00697 void CAsciiFile::set_byte_string_list(const T_STRING<uint8_t>* strings, int32_t num_str) 00698 { 00699 if (!(file && strings)) 00700 SG_ERROR("File or strings invalid.\n"); 00701 00702 for (int32_t i=0; i<num_str; i++) 00703 { 00704 int32_t len = strings[i].length; 00705 fwrite(strings[i].string, sizeof(uint8_t), len, file); 00706 fprintf(file, "\n"); 00707 } 00708 } 00709 00710 void CAsciiFile::set_char_string_list(const T_STRING<char>* strings, int32_t num_str) 00711 { 00712 if (!(file && strings)) 00713 SG_ERROR("File or strings invalid.\n"); 00714 00715 for (int32_t i=0; i<num_str; i++) 00716 { 00717 int32_t len = strings[i].length; 00718 fwrite(strings[i].string, sizeof(char), len, file); 00719 fprintf(file, "\n"); 00720 } 00721 } 00722 00723 void CAsciiFile::set_int_string_list(const T_STRING<int32_t>* strings, int32_t num_str) 00724 { 00725 } 00726 00727 void CAsciiFile::set_uint_string_list(const T_STRING<uint32_t>* strings, int32_t num_str) 00728 { 00729 } 00730 00731 void CAsciiFile::set_short_string_list(const T_STRING<int16_t>* strings, int32_t num_str) 00732 { 00733 } 00734 00735 void CAsciiFile::set_word_string_list(const T_STRING<uint16_t>* strings, int32_t num_str) 00736 { 00737 } 00738 00739 void CAsciiFile::set_long_string_list(const T_STRING<int64_t>* strings, int32_t num_str) 00740 { 00741 } 00742 00743 void CAsciiFile::set_ulong_string_list(const T_STRING<uint64_t>* strings, int32_t num_str) 00744 { 00745 } 00746 00747 void CAsciiFile::set_shortreal_string_list(const T_STRING<float32_t>* strings, int32_t num_str) 00748 { 00749 } 00750 00751 void CAsciiFile::set_real_string_list(const T_STRING<float64_t>* strings, int32_t num_str) 00752 { 00753 } 00754 00755 void CAsciiFile::set_longreal_string_list(const T_STRING<floatmax_t>* strings, int32_t num_str) 00756 { 00757 } 00758 00759 template <class T> void CAsciiFile::append_item( 00760 CDynamicArray<T>* items, char* ptr_data, char* ptr_item) 00761 { 00762 size_t len=(ptr_data-ptr_item)/sizeof(char); 00763 char* item=new char[len+1]; 00764 memset(item, 0, sizeof(char)*(len+1)); 00765 item=strncpy(item, ptr_item, len); 00766 00767 SG_DEBUG("current %c, len %d, item %s\n", *ptr_data, len, item); 00768 items->append_element(item); 00769 }