|
SHOGUN v0.9.3
|
00001 /***********************************************************************/ 00002 /* */ 00003 /* SVM_light.cpp */ 00004 /* */ 00005 /* Author: Thorsten Joachims */ 00006 /* Date: 19.07.99 */ 00007 /* */ 00008 /* Copyright (c) 1999 Universitaet Dortmund - All rights reserved */ 00009 /* */ 00010 /* This software is available for non-commercial use only. It must */ 00011 /* not be modified and distributed without prior permission of the */ 00012 /* author. The author is not responsible for implications from the */ 00013 /* use of this software. */ 00014 /* */ 00015 /* THIS INCLUDES THE FOLLOWING ADDITIONS */ 00016 /* Generic Kernel Interfacing: Soeren Sonnenburg */ 00017 /* Parallizations: Soeren Sonnenburg */ 00018 /* Multiple Kernel Learning: Gunnar Raetsch, Soeren Sonnenburg, */ 00019 /* Alexander Zien, Marius Kloft, Chen Guohua */ 00020 /* Linadd Speedup: Gunnar Raetsch, Soeren Sonnenburg */ 00021 /* */ 00022 /***********************************************************************/ 00023 #include "lib/config.h" 00024 00025 #ifdef USE_SVMLIGHT 00026 00027 #include "lib/io.h" 00028 #include "lib/Signal.h" 00029 #include "lib/Mathematics.h" 00030 #include "lib/Time.h" 00031 #include "lib/lapack.h" 00032 00033 #include "features/SimpleFeatures.h" 00034 #include "classifier/svm/SVM_light.h" 00035 #include "classifier/svm/pr_loqo.h" 00036 00037 #include "kernel/Kernel.h" 00038 #include "classifier/KernelMachine.h" 00039 #include "kernel/CombinedKernel.h" 00040 00041 #include <unistd.h> 00042 00043 #include "base/Parallel.h" 00044 00045 #ifndef WIN32 00046 #include <pthread.h> 00047 #endif 00048 00049 00050 #ifdef HAVE_BOOST_SERIALIZATION 00051 #include <boost/serialization/export.hpp> 00052 BOOST_CLASS_EXPORT(shogun::CSVMLight); 00053 #endif //HAVE_BOOST_SERIALIZATION 00054 00055 using namespace shogun; 00056 00057 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00058 struct S_THREAD_PARAM_REACTIVATE_LINADD 00059 { 00060 CKernel* kernel; 00061 float64_t* lin; 00062 float64_t* last_lin; 00063 int32_t* active; 00064 int32_t* docs; 00065 int32_t start; 00066 int32_t end; 00067 }; 00068 00069 struct S_THREAD_PARAM 00070 { 00071 float64_t * lin ; 00072 float64_t* W; 00073 int32_t start, end; 00074 int32_t * active2dnum ; 00075 int32_t * docs ; 00076 CKernel* kernel ; 00077 }; 00078 00079 struct S_THREAD_PARAM_REACTIVATE_VANILLA 00080 { 00081 CKernel* kernel; 00082 float64_t* lin; 00083 float64_t* aicache; 00084 float64_t* a; 00085 float64_t* a_old; 00086 int32_t* changed2dnum; 00087 int32_t* inactive2dnum; 00088 int32_t* label; 00089 int32_t start; 00090 int32_t end; 00091 }; 00092 00093 struct S_THREAD_PARAM_KERNEL 00094 { 00095 float64_t *Kval ; 00096 int32_t *KI, *KJ ; 00097 int32_t start, end; 00098 CSVMLight* svmlight; 00099 }; 00100 00101 #endif // DOXYGEN_SHOULD_SKIP_THIS 00102 00103 void* CSVMLight::update_linear_component_linadd_helper(void* p) 00104 { 00105 S_THREAD_PARAM* params = (S_THREAD_PARAM*) p; 00106 00107 int32_t jj=0, j=0 ; 00108 00109 for (jj=params->start;(jj<params->end) && (j=params->active2dnum[jj])>=0;jj++) 00110 params->lin[j]+=params->kernel->compute_optimized(params->docs[j]); 00111 00112 return NULL ; 00113 } 00114 00115 void* CSVMLight::compute_kernel_helper(void* p) 00116 { 00117 S_THREAD_PARAM_KERNEL* params = (S_THREAD_PARAM_KERNEL*) p; 00118 00119 int32_t jj=0 ; 00120 for (jj=params->start;jj<params->end;jj++) 00121 params->Kval[jj]=params->svmlight->compute_kernel(params->KI[jj], params->KJ[jj]) ; 00122 00123 return NULL ; 00124 } 00125 00126 CSVMLight::CSVMLight() 00127 : CSVM() 00128 { 00129 init(); 00130 set_kernel(NULL); 00131 } 00132 00133 CSVMLight::CSVMLight(float64_t C, CKernel* k, CLabels* lab) 00134 : CSVM(C, k, lab) 00135 { 00136 init(); 00137 } 00138 00139 void CSVMLight::init() 00140 { 00141 //optimizer settings 00142 primal=NULL; 00143 dual=NULL; 00144 init_margin=0.15; 00145 init_iter=500; 00146 precision_violations=0; 00147 model_b=0; 00148 verbosity=1; 00149 opt_precision=DEF_PRECISION; 00150 00151 // svm variables 00152 W=NULL; 00153 model=new MODEL[1]; 00154 learn_parm=new LEARN_PARM[1]; 00155 model->supvec=NULL; 00156 model->alpha=NULL; 00157 model->index=NULL; 00158 00159 // MKL stuff 00160 mymaxdiff=1 ; 00161 mkl_converged=false; 00162 } 00163 00164 CSVMLight::~CSVMLight() 00165 { 00166 00167 delete[] model->supvec; 00168 delete[] model->alpha; 00169 delete[] model->index; 00170 delete[] model; 00171 delete[] learn_parm; 00172 00173 // MKL stuff 00174 delete[] W ; 00175 00176 // optimizer variables 00177 delete[] dual; 00178 delete[] primal; 00179 } 00180 00181 bool CSVMLight::train(CFeatures* data) 00182 { 00183 //certain setup params 00184 mkl_converged=false; 00185 verbosity=1 ; 00186 init_margin=0.15; 00187 init_iter=500; 00188 precision_violations=0; 00189 opt_precision=DEF_PRECISION; 00190 00191 strcpy (learn_parm->predfile, ""); 00192 learn_parm->biased_hyperplane= get_bias_enabled() ? 1 : 0; 00193 learn_parm->sharedslack=0; 00194 learn_parm->remove_inconsistent=0; 00195 learn_parm->skip_final_opt_check=0; 00196 learn_parm->svm_maxqpsize=get_qpsize(); 00197 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize-1; 00198 learn_parm->maxiter=100000; 00199 learn_parm->svm_iter_to_shrink=100; 00200 learn_parm->svm_c=C1; 00201 learn_parm->transduction_posratio=0.33; 00202 learn_parm->svm_costratio=C2/C1; 00203 learn_parm->svm_costratio_unlab=1.0; 00204 learn_parm->svm_unlabbound=1E-5; 00205 learn_parm->epsilon_crit=epsilon; // GU: better decrease it ... ?? 00206 learn_parm->epsilon_a=1E-15; 00207 learn_parm->compute_loo=0; 00208 learn_parm->rho=1.0; 00209 learn_parm->xa_depth=0; 00210 00211 if (!kernel) 00212 SG_ERROR( "SVM_light can not proceed without kernel!\n"); 00213 00214 if (data) 00215 { 00216 if (labels->get_num_labels() != data->get_num_vectors()) 00217 SG_ERROR("Number of training vectors does not match number of labels\n"); 00218 kernel->init(data, data); 00219 } 00220 00221 if (!kernel->has_features()) 00222 SG_ERROR( "SVM_light can not proceed without initialized kernel!\n"); 00223 00224 ASSERT(labels && labels->get_num_labels()); 00225 ASSERT(labels->is_two_class_labeling()); 00226 ASSERT(kernel->get_num_vec_lhs()==labels->get_num_labels()); 00227 00228 // in case of LINADD enabled kernels cleanup! 00229 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) 00230 kernel->clear_normal() ; 00231 00232 // output some info 00233 SG_DEBUG( "threads = %i\n", parallel->get_num_threads()) ; 00234 SG_DEBUG( "qpsize = %i\n", learn_parm->svm_maxqpsize) ; 00235 SG_DEBUG( "epsilon = %1.1e\n", learn_parm->epsilon_crit) ; 00236 SG_DEBUG( "kernel->has_property(KP_LINADD) = %i\n", kernel->has_property(KP_LINADD)) ; 00237 SG_DEBUG( "kernel->has_property(KP_KERNCOMBINATION) = %i\n", kernel->has_property(KP_KERNCOMBINATION)) ; 00238 SG_DEBUG( "kernel->has_property(KP_BATCHEVALUATION) = %i\n", kernel->has_property(KP_BATCHEVALUATION)) ; 00239 SG_DEBUG( "kernel->get_optimization_type() = %s\n", kernel->get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : "SLOWBUTMEMEFFICIENT" ) ; 00240 SG_DEBUG( "get_solver_type() = %i\n", get_solver_type()); 00241 SG_DEBUG( "get_linadd_enabled() = %i\n", get_linadd_enabled()) ; 00242 SG_DEBUG( "get_batch_computation_enabled() = %i\n", get_batch_computation_enabled()) ; 00243 SG_DEBUG( "kernel->get_num_subkernels() = %i\n", kernel->get_num_subkernels()) ; 00244 00245 use_kernel_cache = !((kernel->get_kernel_type() == K_CUSTOM) || 00246 (get_linadd_enabled() && kernel->has_property(KP_LINADD))); 00247 00248 SG_DEBUG( "use_kernel_cache = %i\n", use_kernel_cache) ; 00249 00250 if (kernel->get_kernel_type() == K_COMBINED) 00251 { 00252 CKernel* kn = ((CCombinedKernel*)kernel)->get_first_kernel(); 00253 00254 while (kn) 00255 { 00256 // allocate kernel cache but clean up beforehand 00257 kn->resize_kernel_cache(kn->get_cache_size()); 00258 SG_UNREF(kn); 00259 kn = ((CCombinedKernel*) kernel)->get_next_kernel(); 00260 } 00261 } 00262 00263 kernel->resize_kernel_cache(kernel->get_cache_size()); 00264 00265 // train the svm 00266 svm_learn(); 00267 00268 // brain damaged svm light work around 00269 create_new_model(model->sv_num-1); 00270 set_bias(-model->b); 00271 for (int32_t i=0; i<model->sv_num-1; i++) 00272 { 00273 set_alpha(i, model->alpha[i+1]); 00274 set_support_vector(i, model->supvec[i+1]); 00275 } 00276 00277 // in case of LINADD enabled kernels cleanup! 00278 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) 00279 { 00280 kernel->clear_normal() ; 00281 kernel->delete_optimization() ; 00282 } 00283 00284 if (use_kernel_cache) 00285 kernel->kernel_cache_cleanup(); 00286 00287 return true ; 00288 } 00289 00290 int32_t CSVMLight::get_runtime() 00291 { 00292 clock_t start; 00293 start = clock(); 00294 return((int32_t)((float64_t)start*100.0/(float64_t)CLOCKS_PER_SEC)); 00295 } 00296 00297 00298 void CSVMLight::svm_learn() 00299 { 00300 int32_t *inconsistent, i; 00301 int32_t inconsistentnum; 00302 int32_t misclassified,upsupvecnum; 00303 float64_t maxdiff, *lin, *c, *a; 00304 int32_t runtime_start,runtime_end; 00305 int32_t iterations; 00306 int32_t trainpos=0, trainneg=0 ; 00307 int32_t totdoc=0; 00308 ASSERT(labels); 00309 int32_t* label=labels->get_int_labels(totdoc); 00310 ASSERT(label); 00311 int32_t* docs=new int32_t[totdoc]; 00312 delete[] W; 00313 W=NULL; 00314 count = 0 ; 00315 00316 if (kernel->has_property(KP_KERNCOMBINATION) && callback) 00317 { 00318 W = new float64_t[totdoc*kernel->get_num_subkernels()]; 00319 for (i=0; i<totdoc*kernel->get_num_subkernels(); i++) 00320 W[i]=0; 00321 } 00322 00323 for (i=0; i<totdoc; i++) 00324 docs[i]=i; 00325 00326 float64_t *xi_fullset; /* buffer for storing xi on full sample in loo */ 00327 float64_t *a_fullset; /* buffer for storing alpha on full sample in loo */ 00328 TIMING timing_profile; 00329 SHRINK_STATE shrink_state; 00330 00331 runtime_start=get_runtime(); 00332 timing_profile.time_kernel=0; 00333 timing_profile.time_opti=0; 00334 timing_profile.time_shrink=0; 00335 timing_profile.time_update=0; 00336 timing_profile.time_model=0; 00337 timing_profile.time_check=0; 00338 timing_profile.time_select=0; 00339 00340 /* make sure -n value is reasonable */ 00341 if((learn_parm->svm_newvarsinqp < 2) 00342 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { 00343 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; 00344 } 00345 00346 init_shrink_state(&shrink_state,totdoc,(int32_t)MAXSHRINK); 00347 00348 inconsistent = new int32_t[totdoc]; 00349 c = new float64_t[totdoc]; 00350 a = new float64_t[totdoc]; 00351 a_fullset = new float64_t[totdoc]; 00352 xi_fullset = new float64_t[totdoc]; 00353 lin = new float64_t[totdoc]; 00354 if (linear_term.size() > 0) 00355 learn_parm->eps=get_linear_term_array(); 00356 else 00357 { 00358 learn_parm->eps=new float64_t[totdoc]; /* equivalent regression epsilon for classification */ 00359 CMath::fill_vector(learn_parm->eps, totdoc, -1.0); 00360 } 00361 00362 learn_parm->svm_cost = new float64_t[totdoc]; 00363 00364 delete[] model->supvec; 00365 delete[] model->alpha; 00366 delete[] model->index; 00367 model->supvec = new int32_t[totdoc+2]; 00368 model->alpha = new float64_t[totdoc+2]; 00369 model->index = new int32_t[totdoc+2]; 00370 00371 model->at_upper_bound=0; 00372 model->b=0; 00373 model->supvec[0]=0; /* element 0 reserved and empty for now */ 00374 model->alpha[0]=0; 00375 model->totdoc=totdoc; 00376 00377 model->kernel=kernel; 00378 00379 model->sv_num=1; 00380 model->loo_error=-1; 00381 model->loo_recall=-1; 00382 model->loo_precision=-1; 00383 model->xa_error=-1; 00384 model->xa_recall=-1; 00385 model->xa_precision=-1; 00386 inconsistentnum=0; 00387 00388 00389 for (i=0;i<totdoc;i++) { /* various inits */ 00390 inconsistent[i]=0; 00391 c[i]=0; 00392 a[i]=0; 00393 lin[i]=0; 00394 00395 if(label[i] > 0) { 00396 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* 00397 fabs((float64_t)label[i]); 00398 label[i]=1; 00399 trainpos++; 00400 } 00401 else if(label[i] < 0) { 00402 learn_parm->svm_cost[i]=learn_parm->svm_c*fabs((float64_t)label[i]); 00403 label[i]=-1; 00404 trainneg++; 00405 } 00406 else { 00407 learn_parm->svm_cost[i]=0; 00408 } 00409 } 00410 00411 /* compute starting state for initial alpha values */ 00412 SG_DEBUG( "alpha:%d num_sv:%d\n", m_alpha, get_num_support_vectors()); 00413 00414 if(m_alpha && get_num_support_vectors()) { 00415 if(verbosity>=1) { 00416 SG_INFO( "Computing starting state..."); 00417 } 00418 00419 float64_t* alpha = new float64_t[totdoc]; 00420 00421 for (i=0; i<totdoc; i++) 00422 alpha[i]=0; 00423 00424 for (i=0; i<get_num_support_vectors(); i++) 00425 alpha[get_support_vector(i)]=get_alpha(i); 00426 00427 int32_t* index = new int32_t[totdoc]; 00428 int32_t* index2dnum = new int32_t[totdoc+11]; 00429 float64_t* aicache = new float64_t[totdoc]; 00430 for (i=0;i<totdoc;i++) { /* create full index and clip alphas */ 00431 index[i]=1; 00432 alpha[i]=fabs(alpha[i]); 00433 if(alpha[i]<0) alpha[i]=0; 00434 if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; 00435 } 00436 00437 if (use_kernel_cache) 00438 { 00439 if (callback && 00440 (!((CCombinedKernel*)kernel)->get_append_subkernel_weights()) 00441 ) 00442 { 00443 CCombinedKernel* k = (CCombinedKernel*) kernel; 00444 CKernel* kn = k->get_first_kernel(); 00445 00446 while (kn) 00447 { 00448 for (i=0;i<totdoc;i++) // fill kernel cache with unbounded SV 00449 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i]) 00450 && (kn->kernel_cache_space_available())) 00451 kn->cache_kernel_row(i); 00452 00453 for (i=0;i<totdoc;i++) // fill rest of kernel cache with bounded SV 00454 if((alpha[i]==learn_parm->svm_cost[i]) 00455 && (kn->kernel_cache_space_available())) 00456 kn->cache_kernel_row(i); 00457 00458 SG_UNREF(kn); 00459 kn = k->get_next_kernel(); 00460 } 00461 } 00462 else 00463 { 00464 for (i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */ 00465 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i]) 00466 && (kernel->kernel_cache_space_available())) 00467 kernel->cache_kernel_row(i); 00468 00469 for (i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */ 00470 if((alpha[i]==learn_parm->svm_cost[i]) 00471 && (kernel->kernel_cache_space_available())) 00472 kernel->cache_kernel_row(i); 00473 } 00474 } 00475 (void)compute_index(index,totdoc,index2dnum); 00476 update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, 00477 lin,aicache,NULL); 00478 (void)calculate_svm_model(docs,label,lin,alpha,a,c, 00479 index2dnum,index2dnum); 00480 for (i=0;i<totdoc;i++) { /* copy initial alphas */ 00481 a[i]=alpha[i]; 00482 } 00483 00484 delete[] index; 00485 delete[] index2dnum; 00486 delete[] aicache; 00487 delete[] alpha; 00488 00489 if(verbosity>=1) 00490 SG_DONE(); 00491 } 00492 SG_DEBUG( "%d totdoc %d pos %d neg\n", totdoc, trainpos, trainneg); 00493 SG_DEBUG( "Optimizing...\n"); 00494 00495 /* train the svm */ 00496 iterations=optimize_to_convergence(docs,label,totdoc, 00497 &shrink_state,inconsistent,a,lin, 00498 c,&timing_profile, 00499 &maxdiff,(int32_t)-1, 00500 (int32_t)1); 00501 00502 00503 if(verbosity>=1) { 00504 if(verbosity==1) 00505 { 00506 SG_DONE(); 00507 SG_DEBUG("(%ld iterations)", iterations); 00508 } 00509 00510 misclassified=0; 00511 for (i=0;(i<totdoc);i++) { /* get final statistic */ 00512 if((lin[i]-model->b)*(float64_t)label[i] <= 0.0) 00513 misclassified++; 00514 } 00515 00516 SG_INFO( "Optimization finished (%ld misclassified, maxdiff=%.8f).\n", 00517 misclassified,maxdiff); 00518 00519 SG_INFO( "obj = %.16f, rho = %.16f\n",get_objective(),model->b); 00520 if (maxdiff>epsilon) 00521 SG_WARNING( "maximum violation (%f) exceeds svm_epsilon (%f) due to numerical difficulties\n", maxdiff, epsilon); 00522 00523 runtime_end=get_runtime(); 00524 upsupvecnum=0; 00525 for (i=1;i<model->sv_num;i++) 00526 { 00527 if(fabs(model->alpha[i]) >= 00528 (learn_parm->svm_cost[model->supvec[i]]- 00529 learn_parm->epsilon_a)) 00530 upsupvecnum++; 00531 } 00532 SG_INFO( "Number of SV: %ld (including %ld at upper bound)\n", 00533 model->sv_num-1,upsupvecnum); 00534 } 00535 00536 shrink_state_cleanup(&shrink_state); 00537 delete[] label; 00538 delete[] inconsistent; 00539 delete[] c; 00540 delete[] a; 00541 delete[] a_fullset; 00542 delete[] xi_fullset; 00543 delete[] lin; 00544 delete[] learn_parm->eps; 00545 delete[] learn_parm->svm_cost; 00546 delete[] docs; 00547 } 00548 00549 int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_t totdoc, 00550 SHRINK_STATE *shrink_state, 00551 int32_t *inconsistent, 00552 float64_t *a, float64_t *lin, float64_t *c, 00553 TIMING *timing_profile, float64_t *maxdiff, 00554 int32_t heldout, int32_t retrain) 00555 /* docs: Training vectors (x-part) */ 00556 /* label: Training labels/value (y-part, zero if test example for 00557 transduction) */ 00558 /* totdoc: Number of examples in docs/label */ 00559 /* laern_parm: Learning paramenters */ 00560 /* kernel_parm: Kernel paramenters */ 00561 /* kernel_cache: Initialized/partly filled Cache, if using a kernel. 00562 NULL if linear. */ 00563 /* shrink_state: State of active variables */ 00564 /* inconsistent: examples thrown out as inconstistent */ 00565 /* a: alphas */ 00566 /* lin: linear component of gradient */ 00567 /* c: right hand side of inequalities (margin) */ 00568 /* maxdiff: returns maximum violation of KT-conditions */ 00569 /* heldout: marks held-out example for leave-one-out (or -1) */ 00570 /* retrain: selects training mode (1=regular / 2=holdout) */ 00571 { 00572 int32_t *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink; 00573 int32_t inconsistentnum,choosenum,already_chosen=0,iteration; 00574 int32_t misclassified,supvecnum=0,*active2dnum,inactivenum; 00575 int32_t *working2dnum,*selexam; 00576 int32_t activenum; 00577 float64_t criterion, eq; 00578 float64_t *a_old; 00579 int32_t t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ 00580 int32_t transductcycle; 00581 int32_t transduction; 00582 float64_t epsilon_crit_org; 00583 float64_t bestmaxdiff; 00584 float64_t worstmaxdiff; 00585 int32_t bestmaxdiffiter,terminate; 00586 bool reactivated=false; 00587 00588 float64_t *selcrit; /* buffer for sorting */ 00589 float64_t *aicache; /* buffer to keep one row of hessian */ 00590 QP qp; /* buffer for one quadratic program */ 00591 00592 epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ 00593 if(kernel->has_property(KP_LINADD) && get_linadd_enabled()) { 00594 learn_parm->epsilon_crit=2.0; 00595 /* caching makes no sense for linear kernel */ 00596 } 00597 learn_parm->epsilon_shrink=2; 00598 (*maxdiff)=1; 00599 00600 SG_DEBUG("totdoc:%d\n",totdoc); 00601 chosen = new int32_t[totdoc]; 00602 last_suboptimal_at =new int32_t[totdoc]; 00603 key =new int32_t[totdoc+11]; 00604 selcrit =new float64_t[totdoc]; 00605 selexam =new int32_t[totdoc]; 00606 a_old =new float64_t[totdoc]; 00607 aicache =new float64_t[totdoc]; 00608 working2dnum =new int32_t[totdoc+11]; 00609 active2dnum =new int32_t[totdoc+11]; 00610 qp.opt_ce =new float64_t[learn_parm->svm_maxqpsize]; 00611 qp.opt_ce0 =new float64_t[1]; 00612 qp.opt_g =new float64_t[learn_parm->svm_maxqpsize*learn_parm->svm_maxqpsize]; 00613 qp.opt_g0 =new float64_t[learn_parm->svm_maxqpsize]; 00614 qp.opt_xinit =new float64_t[learn_parm->svm_maxqpsize]; 00615 qp.opt_low=new float64_t[learn_parm->svm_maxqpsize]; 00616 qp.opt_up=new float64_t[learn_parm->svm_maxqpsize]; 00617 00618 choosenum=0; 00619 inconsistentnum=0; 00620 transductcycle=0; 00621 transduction=0; 00622 if(!retrain) retrain=1; 00623 iteration=1; 00624 bestmaxdiffiter=1; 00625 bestmaxdiff=999999999; 00626 worstmaxdiff=1e-10; 00627 terminate=0; 00628 00629 kernel->set_time(iteration); /* for lru cache */ 00630 00631 for (i=0;i<totdoc;i++) { /* various inits */ 00632 chosen[i]=0; 00633 a_old[i]=a[i]; 00634 last_suboptimal_at[i]=1; 00635 if(inconsistent[i]) 00636 inconsistentnum++; 00637 } 00638 activenum=compute_index(shrink_state->active,totdoc,active2dnum); 00639 inactivenum=totdoc-activenum; 00640 clear_index(working2dnum); 00641 00642 /* repeat this loop until we have convergence */ 00643 CTime start_time; 00644 mkl_converged=false; 00645 00646 #ifdef CYGWIN 00647 for (;((iteration<100 || (!mkl_converged && callback) ) || (retrain && (!terminate))); iteration++){ 00648 #else 00649 CSignal::clear_cancel(); 00650 for (;((!CSignal::cancel_computations()) && ((iteration<3 || (!mkl_converged && callback) ) || (retrain && (!terminate)))); iteration++){ 00651 #endif 00652 00653 if(use_kernel_cache) 00654 kernel->set_time(iteration); /* for lru cache */ 00655 00656 if(verbosity>=2) t0=get_runtime(); 00657 if(verbosity>=3) { 00658 SG_DEBUG( "\nSelecting working set... "); 00659 } 00660 00661 if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) 00662 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; 00663 00664 i=0; 00665 for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ 00666 if((chosen[j]>=(learn_parm->svm_maxqpsize/ 00667 CMath::min(learn_parm->svm_maxqpsize, 00668 learn_parm->svm_newvarsinqp))) 00669 || (inconsistent[j]) 00670 || (j == heldout)) { 00671 chosen[j]=0; 00672 choosenum--; 00673 } 00674 else { 00675 chosen[j]++; 00676 working2dnum[i++]=j; 00677 } 00678 } 00679 working2dnum[i]=-1; 00680 00681 if(retrain == 2) { 00682 choosenum=0; 00683 for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */ 00684 chosen[j]=0; 00685 } 00686 clear_index(working2dnum); 00687 for (i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */ 00688 if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) { 00689 chosen[i]=99999; 00690 choosenum++; 00691 a[i]=0; 00692 } 00693 } 00694 if(learn_parm->biased_hyperplane) { 00695 eq=0; 00696 for (i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */ 00697 eq+=a[i]*label[i]; 00698 } 00699 for (i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) { 00700 if((eq*label[i] > 0) && (a[i] > 0)) { 00701 chosen[i]=88888; 00702 choosenum++; 00703 if((eq*label[i]) > a[i]) { 00704 eq-=(a[i]*label[i]); 00705 a[i]=0; 00706 } 00707 else { 00708 a[i]-=(eq*label[i]); 00709 eq=0; 00710 } 00711 } 00712 } 00713 } 00714 compute_index(chosen,totdoc,working2dnum); 00715 } 00716 else 00717 { /* select working set according to steepest gradient */ 00718 if(iteration % 101) 00719 { 00720 already_chosen=0; 00721 if(CMath::min(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4 && 00722 (!(kernel->has_property(KP_LINADD) && get_linadd_enabled()))) 00723 { 00724 /* select part of the working set from cache */ 00725 already_chosen=select_next_qp_subproblem_grad( 00726 label,a,lin,c,totdoc, 00727 (int32_t)(CMath::min(learn_parm->svm_maxqpsize-choosenum, 00728 learn_parm->svm_newvarsinqp)/2), 00729 inconsistent,active2dnum, 00730 working2dnum,selcrit,selexam,1, 00731 key,chosen); 00732 choosenum+=already_chosen; 00733 } 00734 choosenum+=select_next_qp_subproblem_grad( 00735 label,a,lin,c,totdoc, 00736 CMath::min(learn_parm->svm_maxqpsize-choosenum, 00737 learn_parm->svm_newvarsinqp-already_chosen), 00738 inconsistent,active2dnum, 00739 working2dnum,selcrit,selexam,0,key, 00740 chosen); 00741 } 00742 else { /* once in a while, select a somewhat random working set 00743 to get unlocked of infinite loops due to numerical 00744 inaccuracies in the core qp-solver */ 00745 choosenum+=select_next_qp_subproblem_rand( 00746 label,a,lin,c,totdoc, 00747 CMath::min(learn_parm->svm_maxqpsize-choosenum, 00748 learn_parm->svm_newvarsinqp), 00749 inconsistent,active2dnum, 00750 working2dnum,selcrit,selexam,key, 00751 chosen,iteration); 00752 } 00753 } 00754 00755 if(verbosity>=2) { 00756 SG_INFO( " %ld vectors chosen\n",choosenum); 00757 } 00758 00759 if(verbosity>=2) t1=get_runtime(); 00760 00761 if (use_kernel_cache) 00762 { 00763 // in case of MKL w/o linadd cache each kernel independently 00764 // else if linadd is disabled cache single kernel 00765 if ( callback && 00766 (!((CCombinedKernel*) kernel)->get_append_subkernel_weights()) 00767 ) 00768 { 00769 CCombinedKernel* k = (CCombinedKernel*) kernel; 00770 CKernel* kn = k->get_first_kernel(); 00771 00772 while (kn) 00773 { 00774 kn->cache_multiple_kernel_rows(working2dnum, choosenum); 00775 SG_UNREF(kn); 00776 kn = k->get_next_kernel() ; 00777 } 00778 } 00779 else 00780 kernel->cache_multiple_kernel_rows(working2dnum, choosenum); 00781 } 00782 00783 if(verbosity>=2) t2=get_runtime(); 00784 00785 if(retrain != 2) { 00786 optimize_svm(docs,label,inconsistent,0.0,chosen,active2dnum, 00787 totdoc,working2dnum,choosenum,a,lin,c, 00788 aicache,&qp,&epsilon_crit_org); 00789 } 00790 00791 if(verbosity>=2) t3=get_runtime(); 00792 update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, 00793 lin,aicache,c); 00794 00795 if(verbosity>=2) t4=get_runtime(); 00796 supvecnum=calculate_svm_model(docs,label,lin,a,a_old,c,working2dnum,active2dnum); 00797 00798 if(verbosity>=2) t5=get_runtime(); 00799 00800 for (jj=0;(i=working2dnum[jj])>=0;jj++) { 00801 a_old[i]=a[i]; 00802 } 00803 00804 retrain=check_optimality(label,a,lin,c,totdoc, 00805 maxdiff,epsilon_crit_org,&misclassified, 00806 inconsistent,active2dnum,last_suboptimal_at, 00807 iteration); 00808 00809 if(verbosity>=2) { 00810 t6=get_runtime(); 00811 timing_profile->time_select+=t1-t0; 00812 timing_profile->time_kernel+=t2-t1; 00813 timing_profile->time_opti+=t3-t2; 00814 timing_profile->time_update+=t4-t3; 00815 timing_profile->time_model+=t5-t4; 00816 timing_profile->time_check+=t6-t5; 00817 } 00818 00819 /* checking whether optimizer got stuck */ 00820 if((*maxdiff) < bestmaxdiff) { 00821 bestmaxdiff=(*maxdiff); 00822 bestmaxdiffiter=iteration; 00823 } 00824 if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { 00825 /* int32_t time no progress? */ 00826 terminate=1; 00827 retrain=0; 00828 SG_WARNING( "Relaxing KT-Conditions due to slow progress! Terminating!\n"); 00829 } 00830 00831 noshrink= (get_shrinking_enabled()) ? 0 : 1; 00832 00833 if ((!callback) && (!retrain) && (inactivenum>0) && 00834 ((!learn_parm->skip_final_opt_check) || (kernel->has_property(KP_LINADD) && get_linadd_enabled()))) 00835 { 00836 t1=get_runtime(); 00837 SG_DEBUG( "reactivating inactive examples\n"); 00838 00839 reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc, 00840 iteration,inconsistent, 00841 docs,aicache, 00842 maxdiff); 00843 reactivated=true; 00844 SG_DEBUG("done reactivating inactive examples (maxdiff:%8f eps_crit:%8f orig_eps:%8f)\n", *maxdiff, learn_parm->epsilon_crit, epsilon_crit_org); 00845 /* Update to new active variables. */ 00846 activenum=compute_index(shrink_state->active,totdoc,active2dnum); 00847 inactivenum=totdoc-activenum; 00848 /* reset watchdog */ 00849 bestmaxdiff=(*maxdiff); 00850 bestmaxdiffiter=iteration; 00851 00852 /* termination criterion */ 00853 noshrink=1; 00854 retrain=0; 00855 if((*maxdiff) > learn_parm->epsilon_crit) 00856 { 00857 SG_INFO( "restarting optimization as we are - due to shrinking - deviating too much (maxdiff(%f) > eps(%f))\n", *maxdiff, learn_parm->epsilon_crit); 00858 retrain=1; 00859 } 00860 timing_profile->time_shrink+=get_runtime()-t1; 00861 if (((verbosity>=1) && (!(kernel->has_property(KP_LINADD) && get_linadd_enabled()))) 00862 || (verbosity>=2)) { 00863 SG_DONE(); 00864 SG_DEBUG("Number of inactive variables = %ld\n", inactivenum); 00865 } 00866 } 00867 00868 if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) 00869 learn_parm->epsilon_crit=(*maxdiff); 00870 if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { 00871 learn_parm->epsilon_crit/=4.0; 00872 retrain=1; 00873 noshrink=1; 00874 } 00875 if(learn_parm->epsilon_crit<epsilon_crit_org) 00876 learn_parm->epsilon_crit=epsilon_crit_org; 00877 00878 if(verbosity>=2) { 00879 SG_INFO( " => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", 00880 supvecnum,model->at_upper_bound,(*maxdiff)); 00881 00882 } 00883 mymaxdiff=*maxdiff ; 00884 00885 //don't shrink w/ mkl 00886 if (((iteration % 10) == 0) && (!noshrink) && !callback) 00887 { 00888 activenum=shrink_problem(shrink_state,active2dnum,last_suboptimal_at,iteration,totdoc, 00889 CMath::max((int32_t)(activenum/10), 00890 CMath::max((int32_t)(totdoc/500),(int32_t) 100)), 00891 a,inconsistent, c, lin, label); 00892 00893 inactivenum=totdoc-activenum; 00894 00895 if (use_kernel_cache && (supvecnum>kernel->get_max_elems_cache()) 00896 && ((kernel->get_activenum_cache()-activenum)>CMath::max((int32_t)(activenum/10),(int32_t) 500))) { 00897 00898 kernel->kernel_cache_shrink(totdoc, CMath::min((int32_t) (kernel->get_activenum_cache()-activenum), 00899 (int32_t) (kernel->get_activenum_cache()-supvecnum)), 00900 shrink_state->active); 00901 } 00902 } 00903 00904 if (bestmaxdiff>worstmaxdiff) 00905 worstmaxdiff=bestmaxdiff; 00906 00907 SG_ABS_PROGRESS(bestmaxdiff, -CMath::log10(bestmaxdiff), -CMath::log10(worstmaxdiff), -CMath::log10(epsilon), 6); 00908 00909 /* Terminate loop */ 00910 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time) { 00911 terminate = 1; 00912 retrain = 0; 00913 } 00914 } /* end of loop */ 00915 00916 SG_DEBUG( "inactive:%d\n", inactivenum); 00917 00918 if (inactivenum && !reactivated && !callback) 00919 { 00920 SG_DEBUG( "reactivating inactive examples\n"); 00921 reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc, 00922 iteration,inconsistent, 00923 docs,aicache, 00924 maxdiff); 00925 SG_DEBUG( "done reactivating inactive examples\n"); 00926 /* Update to new active variables. */ 00927 activenum=compute_index(shrink_state->active,totdoc,active2dnum); 00928 inactivenum=totdoc-activenum; 00929 /* reset watchdog */ 00930 bestmaxdiff=(*maxdiff); 00931 bestmaxdiffiter=iteration; 00932 } 00933 00934 criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,totdoc); 00935 CSVM::set_objective(criterion); 00936 00937 delete[] chosen; 00938 delete[] last_suboptimal_at; 00939 delete[] key; 00940 delete[] selcrit; 00941 delete[] selexam; 00942 delete[] a_old; 00943 delete[] aicache; 00944 delete[] working2dnum; 00945 delete[] active2dnum; 00946 delete[] qp.opt_ce; 00947 delete[] qp.opt_ce0; 00948 delete[] qp.opt_g; 00949 delete[] qp.opt_g0; 00950 delete[] qp.opt_xinit; 00951 delete[] qp.opt_low; 00952 delete[] qp.opt_up; 00953 00954 learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ 00955 00956 return(iteration); 00957 } 00958 00959 float64_t CSVMLight::compute_objective_function( 00960 float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label, 00961 int32_t totdoc) 00962 /* Return value of objective function. */ 00963 /* Works only relative to the active variables! */ 00964 { 00965 /* calculate value of objective function */ 00966 float64_t criterion=0; 00967 00968 for (int32_t i=0;i<totdoc;i++) 00969 criterion=criterion+(eps[i]-(float64_t)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i]; 00970 00971 return(criterion); 00972 } 00973 00974 00975 void CSVMLight::clear_index(int32_t *index) 00976 /* initializes and empties index */ 00977 { 00978 index[0]=-1; 00979 } 00980 00981 void CSVMLight::add_to_index(int32_t *index, int32_t elem) 00982 /* initializes and empties index */ 00983 { 00984 register int32_t i; 00985 for (i=0;index[i] != -1;i++); 00986 index[i]=elem; 00987 index[i+1]=-1; 00988 } 00989 00990 int32_t CSVMLight::compute_index( 00991 int32_t *binfeature, int32_t range, int32_t *index) 00992 /* create an inverted index of binfeature */ 00993 { 00994 register int32_t i,ii; 00995 00996 ii=0; 00997 for (i=0;i<range;i++) { 00998 if(binfeature[i]) { 00999 index[ii]=i; 01000 ii++; 01001 } 01002 } 01003 for (i=0;i<4;i++) { 01004 index[ii+i]=-1; 01005 } 01006 return(ii); 01007 } 01008 01009 01010 void CSVMLight::optimize_svm( 01011 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const, 01012 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc, 01013 int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin, 01014 float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target) 01015 /* Do optimization on the working set. */ 01016 { 01017 int32_t i; 01018 float64_t *a_v; 01019 01020 //compute_matrices_for_optimization_parallel(docs,label, 01021 // exclude_from_eq_const,eq_target,chosen, 01022 // active2dnum,working2dnum,a,lin,c, 01023 // varnum,totdoc,aicache,qp); 01024 01025 compute_matrices_for_optimization(docs,label, 01026 exclude_from_eq_const,eq_target,chosen, 01027 active2dnum,working2dnum,a,lin,c, 01028 varnum,totdoc,aicache,qp); 01029 01030 if(verbosity>=3) { 01031 SG_DEBUG( "Running optimizer..."); 01032 } 01033 /* call the qp-subsolver */ 01034 a_v=optimize_qp(qp,epsilon_crit_target, 01035 learn_parm->svm_maxqpsize, 01036 &(model->b), /* in case the optimizer gives us */ 01037 learn_parm->svm_maxqpsize); /* the threshold for free. otherwise */ 01038 /* b is calculated in calculate_model. */ 01039 if(verbosity>=3) { 01040 SG_DONE(); 01041 } 01042 01043 for (i=0;i<varnum;i++) 01044 a[working2dnum[i]]=a_v[i]; 01045 } 01046 01047 void CSVMLight::compute_matrices_for_optimization_parallel( 01048 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const, 01049 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key, 01050 float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc, 01051 float64_t *aicache, QP *qp) 01052 { 01053 if (parallel->get_num_threads()<=1) 01054 { 01055 compute_matrices_for_optimization(docs, label, exclude_from_eq_const, eq_target, 01056 chosen, active2dnum, key, a, lin, c, 01057 varnum, totdoc, aicache, qp) ; 01058 } 01059 #ifndef WIN32 01060 else 01061 { 01062 register int32_t ki,kj,i,j; 01063 register float64_t kernel_temp; 01064 01065 qp->opt_n=varnum; 01066 qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */ 01067 for (j=1;j<model->sv_num;j++) { /* start at 1 */ 01068 if((!chosen[model->supvec[j]]) 01069 && (!exclude_from_eq_const[(model->supvec[j])])) { 01070 qp->opt_ce0[0]+=model->alpha[j]; 01071 } 01072 } 01073 if(learn_parm->biased_hyperplane) 01074 qp->opt_m=1; 01075 else 01076 qp->opt_m=0; /* eq-constraint will be ignored */ 01077 01078 /* init linear part of objective function */ 01079 for (i=0;i<varnum;i++) { 01080 qp->opt_g0[i]=lin[key[i]]; 01081 } 01082 01083 ASSERT(parallel->get_num_threads()>1); 01084 int32_t *KI=new int32_t[varnum*varnum] ; 01085 int32_t *KJ=new int32_t[varnum*varnum] ; 01086 int32_t Knum=0 ; 01087 float64_t *Kval = new float64_t[varnum*(varnum+1)/2] ; 01088 for (i=0;i<varnum;i++) { 01089 ki=key[i]; 01090 KI[Knum]=docs[ki] ; 01091 KJ[Knum]=docs[ki] ; 01092 Knum++ ; 01093 for (j=i+1;j<varnum;j++) 01094 { 01095 kj=key[j]; 01096 KI[Knum]=docs[ki] ; 01097 KJ[Knum]=docs[kj] ; 01098 Knum++ ; 01099 } 01100 } 01101 ASSERT(Knum<=varnum*(varnum+1)/2); 01102 01103 pthread_t* threads = new pthread_t[parallel->get_num_threads()-1]; 01104 S_THREAD_PARAM_KERNEL* params = new S_THREAD_PARAM_KERNEL[parallel->get_num_threads()-1]; 01105 int32_t step= Knum/parallel->get_num_threads(); 01106 //SG_DEBUG( "\nkernel-step size: %i\n", step) ; 01107 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01108 { 01109 params[t].svmlight = this; 01110 params[t].start = t*step; 01111 params[t].end = (t+1)*step; 01112 params[t].KI=KI ; 01113 params[t].KJ=KJ ; 01114 params[t].Kval=Kval ; 01115 pthread_create(&threads[t], NULL, CSVMLight::compute_kernel_helper, (void*)¶ms[t]); 01116 } 01117 for (i=params[parallel->get_num_threads()-2].end; i<Knum; i++) 01118 Kval[i]=compute_kernel(KI[i],KJ[i]) ; 01119 01120 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01121 pthread_join(threads[t], NULL); 01122 01123 delete[] params; 01124 delete[] threads; 01125 01126 Knum=0 ; 01127 for (i=0;i<varnum;i++) { 01128 ki=key[i]; 01129 01130 /* Compute the matrix for equality constraints */ 01131 qp->opt_ce[i]=label[ki]; 01132 qp->opt_low[i]=0; 01133 qp->opt_up[i]=learn_parm->svm_cost[ki]; 01134 01135 kernel_temp=Kval[Knum] ; 01136 Knum++ ; 01137 /* compute linear part of objective function */ 01138 qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]); 01139 /* compute quadratic part of objective function */ 01140 qp->opt_g[varnum*i+i]=kernel_temp; 01141 01142 for (j=i+1;j<varnum;j++) { 01143 kj=key[j]; 01144 kernel_temp=Kval[Knum] ; 01145 Knum++ ; 01146 /* compute linear part of objective function */ 01147 qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]); 01148 qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]); 01149 /* compute quadratic part of objective function */ 01150 qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp; 01151 qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp; 01152 } 01153 01154 if(verbosity>=3) { 01155 if(i % 20 == 0) { 01156 SG_DEBUG( "%ld..",i); 01157 } 01158 } 01159 } 01160 01161 delete[] KI ; 01162 delete[] KJ ; 01163 delete[] Kval ; 01164 01165 for (i=0;i<varnum;i++) { 01166 /* assure starting at feasible point */ 01167 qp->opt_xinit[i]=a[key[i]]; 01168 /* set linear part of objective function */ 01169 qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(float64_t)label[key[i]]; 01170 } 01171 01172 if(verbosity>=3) { 01173 SG_DONE(); 01174 } 01175 } 01176 #endif 01177 } 01178 01179 void CSVMLight::compute_matrices_for_optimization( 01180 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const, 01181 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key, 01182 float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc, 01183 float64_t *aicache, QP *qp) 01184 { 01185 register int32_t ki,kj,i,j; 01186 register float64_t kernel_temp; 01187 01188 qp->opt_n=varnum; 01189 qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */ 01190 for (j=1;j<model->sv_num;j++) { /* start at 1 */ 01191 if((!chosen[model->supvec[j]]) 01192 && (!exclude_from_eq_const[(model->supvec[j])])) { 01193 qp->opt_ce0[0]+=model->alpha[j]; 01194 } 01195 } 01196 if(learn_parm->biased_hyperplane) 01197 qp->opt_m=1; 01198 else 01199 qp->opt_m=0; /* eq-constraint will be ignored */ 01200 01201 /* init linear part of objective function */ 01202 for (i=0;i<varnum;i++) { 01203 qp->opt_g0[i]=lin[key[i]]; 01204 } 01205 01206 for (i=0;i<varnum;i++) { 01207 ki=key[i]; 01208 01209 /* Compute the matrix for equality constraints */ 01210 qp->opt_ce[i]=label[ki]; 01211 qp->opt_low[i]=0; 01212 qp->opt_up[i]=learn_parm->svm_cost[ki]; 01213 01214 kernel_temp=compute_kernel(docs[ki], docs[ki]); 01215 /* compute linear part of objective function */ 01216 qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]); 01217 /* compute quadratic part of objective function */ 01218 qp->opt_g[varnum*i+i]=kernel_temp; 01219 01220 for (j=i+1;j<varnum;j++) { 01221 kj=key[j]; 01222 kernel_temp=compute_kernel(docs[ki], docs[kj]); 01223 01224 /* compute linear part of objective function */ 01225 qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]); 01226 qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]); 01227 /* compute quadratic part of objective function */ 01228 qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp; 01229 qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp; 01230 } 01231 01232 if(verbosity>=3) { 01233 if(i % 20 == 0) { 01234 SG_DEBUG( "%ld..",i); 01235 } 01236 } 01237 } 01238 01239 for (i=0;i<varnum;i++) { 01240 /* assure starting at feasible point */ 01241 qp->opt_xinit[i]=a[key[i]]; 01242 /* set linear part of objective function */ 01243 qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]]) + qp->opt_g0[i]*(float64_t)label[key[i]]; 01244 } 01245 01246 if(verbosity>=3) { 01247 SG_DONE(); 01248 } 01249 } 01250 01251 01252 int32_t CSVMLight::calculate_svm_model( 01253 int32_t* docs, int32_t *label, float64_t *lin, float64_t *a, 01254 float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum) 01255 /* Compute decision function based on current values */ 01256 /* of alpha. */ 01257 { 01258 int32_t i,ii,pos,b_calculated=0,first_low,first_high; 01259 float64_t ex_c,b_temp,b_low,b_high; 01260 01261 if(verbosity>=3) { 01262 SG_DEBUG( "Calculating model..."); 01263 } 01264 01265 if(!learn_parm->biased_hyperplane) { 01266 model->b=0; 01267 b_calculated=1; 01268 } 01269 01270 for (ii=0;(i=working2dnum[ii])>=0;ii++) { 01271 if((a_old[i]>0) && (a[i]==0)) { /* remove from model */ 01272 pos=model->index[i]; 01273 model->index[i]=-1; 01274 (model->sv_num)--; 01275 model->supvec[pos]=model->supvec[model->sv_num]; 01276 model->alpha[pos]=model->alpha[model->sv_num]; 01277 model->index[model->supvec[pos]]=pos; 01278 } 01279 else if((a_old[i]==0) && (a[i]>0)) { /* add to model */ 01280 model->supvec[model->sv_num]=docs[i]; 01281 model->alpha[model->sv_num]=a[i]*(float64_t)label[i]; 01282 model->index[i]=model->sv_num; 01283 (model->sv_num)++; 01284 } 01285 else if(a_old[i]==a[i]) { /* nothing to do */ 01286 } 01287 else { /* just update alpha */ 01288 model->alpha[model->index[i]]=a[i]*(float64_t)label[i]; 01289 } 01290 01291 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; 01292 if((a_old[i]>=ex_c) && (a[i]<ex_c)) { 01293 (model->at_upper_bound)--; 01294 } 01295 else if((a_old[i]<ex_c) && (a[i]>=ex_c)) { 01296 (model->at_upper_bound)++; 01297 } 01298 01299 if((!b_calculated) 01300 && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) { /* calculate b */ 01301 model->b=((float64_t)label[i]*learn_parm->eps[i]-c[i]+lin[i]); 01302 b_calculated=1; 01303 } 01304 } 01305 01306 /* No alpha in the working set not at bounds, so b was not 01307 calculated in the usual way. The following handles this special 01308 case. */ 01309 if(learn_parm->biased_hyperplane 01310 && (!b_calculated) 01311 && (model->sv_num-1 == model->at_upper_bound)) { 01312 first_low=1; 01313 first_high=1; 01314 b_low=0; 01315 b_high=0; 01316 for (ii=0;(i=active2dnum[ii])>=0;ii++) { 01317 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; 01318 if(a[i]<ex_c) { 01319 if(label[i]>0) { 01320 b_temp=-(learn_parm->eps[i]-c[i]+lin[i]); 01321 if((b_temp>b_low) || (first_low)) { 01322 b_low=b_temp; 01323 first_low=0; 01324 } 01325 } 01326 else { 01327 b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]); 01328 if((b_temp<b_high) || (first_high)) { 01329 b_high=b_temp; 01330 first_high=0; 01331 } 01332 } 01333 } 01334 else { 01335 if(label[i]<0) { 01336 b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]); 01337 if((b_temp>b_low) || (first_low)) { 01338 b_low=b_temp; 01339 first_low=0; 01340 } 01341 } 01342 else { 01343 b_temp=-(learn_parm->eps[i]-c[i]+lin[i]); 01344 if((b_temp<b_high) || (first_high)) { 01345 b_high=b_temp; 01346 first_high=0; 01347 } 01348 } 01349 } 01350 } 01351 if(first_high) { 01352 model->b=-b_low; 01353 } 01354 else if(first_low) { 01355 model->b=-b_high; 01356 } 01357 else { 01358 model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */ 01359 } 01360 } 01361 01362 if(verbosity>=3) { 01363 SG_DONE(); 01364 } 01365 01366 return(model->sv_num-1); /* have to substract one, since element 0 is empty*/ 01367 } 01368 01369 int32_t CSVMLight::check_optimality( 01370 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, 01371 float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified, 01372 int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at, 01373 int32_t iteration) 01374 /* Check KT-conditions */ 01375 { 01376 int32_t i,ii,retrain; 01377 float64_t dist,ex_c,target; 01378 01379 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) 01380 learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org; 01381 else 01382 learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; 01383 retrain=0; 01384 (*maxdiff)=0; 01385 (*misclassified)=0; 01386 for (ii=0;(i=active2dnum[ii])>=0;ii++) { 01387 if((!inconsistent[i]) && label[i]) { 01388 dist=(lin[i]-model->b)*(float64_t)label[i];/* 'distance' from 01389 hyperplane*/ 01390 target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]); 01391 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; 01392 if(dist <= 0) { 01393 (*misclassified)++; /* does not work due to deactivation of var */ 01394 } 01395 if((a[i]>learn_parm->epsilon_a) && (dist > target)) { 01396 if((dist-target)>(*maxdiff)) /* largest violation */ 01397 (*maxdiff)=dist-target; 01398 } 01399 else if((a[i]<ex_c) && (dist < target)) { 01400 if((target-dist)>(*maxdiff)) /* largest violation */ 01401 (*maxdiff)=target-dist; 01402 } 01403 /* Count how int32_t a variable was at lower/upper bound (and optimal).*/ 01404 /* Variables, which were at the bound and optimal for a int32_t */ 01405 /* time are unlikely to become support vectors. In case our */ 01406 /* cache is filled up, those variables are excluded to save */ 01407 /* kernel evaluations. (See chapter 'Shrinking').*/ 01408 if((a[i]>(learn_parm->epsilon_a)) 01409 && (a[i]<ex_c)) { 01410 last_suboptimal_at[i]=iteration; /* not at bound */ 01411 } 01412 else if((a[i]<=(learn_parm->epsilon_a)) 01413 && (dist < (target+learn_parm->epsilon_shrink))) { 01414 last_suboptimal_at[i]=iteration; /* not likely optimal */ 01415 } 01416 else if((a[i]>=ex_c) 01417 && (dist > (target-learn_parm->epsilon_shrink))) { 01418 last_suboptimal_at[i]=iteration; /* not likely optimal */ 01419 } 01420 } 01421 } 01422 01423 /* termination criterion */ 01424 if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { 01425 retrain=1; 01426 } 01427 return(retrain); 01428 } 01429 01430 void CSVMLight::update_linear_component( 01431 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a, 01432 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, 01433 float64_t *aicache, float64_t* c) 01434 /* keep track of the linear component */ 01435 /* lin of the gradient etc. by updating */ 01436 /* based on the change of the variables */ 01437 /* in the current working set */ 01438 { 01439 register int32_t i=0,ii=0,j=0,jj=0; 01440 01441 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) 01442 { 01443 if (callback) 01444 { 01445 update_linear_component_mkl_linadd(docs, label, active2dnum, a, 01446 a_old, working2dnum, totdoc, lin, aicache); 01447 } 01448 else 01449 { 01450 kernel->clear_normal(); 01451 01452 int32_t num_working=0; 01453 for (ii=0;(i=working2dnum[ii])>=0;ii++) { 01454 if(a[i] != a_old[i]) { 01455 kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]); 01456 num_working++; 01457 } 01458 } 01459 01460 if (num_working>0) 01461 { 01462 if (parallel->get_num_threads() < 2) 01463 { 01464 for (jj=0;(j=active2dnum[jj])>=0;jj++) { 01465 lin[j]+=kernel->compute_optimized(docs[j]); 01466 } 01467 } 01468 #ifndef WIN32 01469 else 01470 { 01471 int32_t num_elem = 0 ; 01472 for (jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ; 01473 01474 pthread_t* threads = new pthread_t[parallel->get_num_threads()-1] ; 01475 S_THREAD_PARAM* params = new S_THREAD_PARAM[parallel->get_num_threads()-1] ; 01476 int32_t start = 0 ; 01477 int32_t step = num_elem/parallel->get_num_threads(); 01478 int32_t end = step ; 01479 01480 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01481 { 01482 params[t].kernel = kernel ; 01483 params[t].lin = lin ; 01484 params[t].docs = docs ; 01485 params[t].active2dnum=active2dnum ; 01486 params[t].start = start ; 01487 params[t].end = end ; 01488 start=end ; 01489 end+=step ; 01490 pthread_create(&threads[t], NULL, update_linear_component_linadd_helper, (void*)¶ms[t]) ; 01491 } 01492 01493 for (jj=params[parallel->get_num_threads()-2].end;(j=active2dnum[jj])>=0;jj++) { 01494 lin[j]+=kernel->compute_optimized(docs[j]); 01495 } 01496 void* ret; 01497 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01498 pthread_join(threads[t], &ret) ; 01499 01500 delete[] params; 01501 delete[] threads; 01502 } 01503 #endif 01504 } 01505 } 01506 } 01507 else 01508 { 01509 if (callback) 01510 { 01511 update_linear_component_mkl(docs, label, active2dnum, 01512 a, a_old, working2dnum, totdoc, lin, aicache); 01513 } 01514 else { 01515 for (jj=0;(i=working2dnum[jj])>=0;jj++) { 01516 if(a[i] != a_old[i]) { 01517 kernel->get_kernel_row(i,active2dnum,aicache); 01518 for (ii=0;(j=active2dnum[ii])>=0;ii++) 01519 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i]; 01520 } 01521 } 01522 } 01523 } 01524 } 01525 01526 01527 void CSVMLight::update_linear_component_mkl( 01528 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a, 01529 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, 01530 float64_t *aicache) 01531 { 01532 //int inner_iters=0; 01533 int32_t num = kernel->get_num_vec_rhs(); 01534 int32_t num_weights = -1; 01535 int32_t num_kernels = kernel->get_num_subkernels() ; 01536 const float64_t* old_beta = kernel->get_subkernel_weights(num_weights); 01537 ASSERT(num_weights==num_kernels); 01538 01539 if ((kernel->get_kernel_type()==K_COMBINED) && 01540 (!((CCombinedKernel*)kernel)->get_append_subkernel_weights()))// for combined kernel 01541 { 01542 CCombinedKernel* k = (CCombinedKernel*) kernel; 01543 CKernel* kn = k->get_first_kernel() ; 01544 int32_t n = 0, i, j ; 01545 01546 while (kn!=NULL) 01547 { 01548 for (i=0;i<num;i++) 01549 { 01550 if(a[i] != a_old[i]) 01551 { 01552 kn->get_kernel_row(i,NULL,aicache, true); 01553 for (j=0;j<num;j++) 01554 W[j*num_kernels+n]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i]; 01555 } 01556 } 01557 01558 SG_UNREF(kn); 01559 kn = k->get_next_kernel(); 01560 n++ ; 01561 } 01562 } 01563 else // hope the kernel is fast ... 01564 { 01565 float64_t* w_backup = new float64_t[num_kernels] ; 01566 float64_t* w1 = new float64_t[num_kernels] ; 01567 01568 // backup and set to zero 01569 for (int32_t i=0; i<num_kernels; i++) 01570 { 01571 w_backup[i] = old_beta[i] ; 01572 w1[i]=0.0 ; 01573 } 01574 for (int32_t n=0; n<num_kernels; n++) 01575 { 01576 w1[n]=1.0 ; 01577 kernel->set_subkernel_weights(w1, num_weights) ; 01578 01579 for (int32_t i=0;i<num;i++) 01580 { 01581 if(a[i] != a_old[i]) 01582 { 01583 for (int32_t j=0;j<num;j++) 01584 W[j*num_kernels+n]+=(a[i]-a_old[i])*compute_kernel(i,j)*(float64_t)label[i]; 01585 } 01586 } 01587 w1[n]=0.0 ; 01588 } 01589 01590 // restore old weights 01591 kernel->set_subkernel_weights(w_backup,num_weights) ; 01592 01593 delete[] w_backup ; 01594 delete[] w1 ; 01595 } 01596 01597 call_mkl_callback(a, label, lin); 01598 } 01599 01600 01601 void CSVMLight::update_linear_component_mkl_linadd( 01602 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a, 01603 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, 01604 float64_t *aicache) 01605 { 01606 //int inner_iters=0; 01607 01608 // kernel with LP_LINADD property is assumed to have 01609 // compute_by_subkernel functions 01610 int32_t num = kernel->get_num_vec_rhs(); 01611 int32_t num_weights = -1; 01612 int32_t num_kernels = kernel->get_num_subkernels() ; 01613 const float64_t* old_beta = kernel->get_subkernel_weights(num_weights); 01614 ASSERT(num_weights==num_kernels); 01615 01616 float64_t* w_backup = new float64_t[num_kernels] ; 01617 float64_t* w1 = new float64_t[num_kernels] ; 01618 01619 // backup and set to one 01620 for (int32_t i=0; i<num_kernels; i++) 01621 { 01622 w_backup[i] = old_beta[i] ; 01623 w1[i]=1.0 ; 01624 } 01625 // set the kernel weights 01626 kernel->set_subkernel_weights(w1, num_weights) ; 01627 01628 // create normal update (with changed alphas only) 01629 kernel->clear_normal(); 01630 for (int32_t ii=0, i=0;(i=working2dnum[ii])>=0;ii++) { 01631 if(a[i] != a_old[i]) { 01632 kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]); 01633 } 01634 } 01635 01636 if (parallel->get_num_threads() < 2) 01637 { 01638 // determine contributions of different kernels 01639 for (int32_t i=0; i<num; i++) 01640 kernel->compute_by_subkernel(i,&W[i*num_kernels]); 01641 } 01642 #ifndef WIN32 01643 else 01644 { 01645 pthread_t* threads = new pthread_t[parallel->get_num_threads()-1]; 01646 S_THREAD_PARAM* params = new S_THREAD_PARAM[parallel->get_num_threads()-1]; 01647 int32_t step= num/parallel->get_num_threads(); 01648 01649 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01650 { 01651 params[t].kernel = kernel; 01652 params[t].W = W; 01653 params[t].start = t*step; 01654 params[t].end = (t+1)*step; 01655 pthread_create(&threads[t], NULL, CSVMLight::update_linear_component_mkl_linadd_helper, (void*)¶ms[t]); 01656 } 01657 01658 for (int32_t i=params[parallel->get_num_threads()-2].end; i<num; i++) 01659 kernel->compute_by_subkernel(i,&W[i*num_kernels]); 01660 01661 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01662 pthread_join(threads[t], NULL); 01663 01664 delete[] params; 01665 delete[] threads; 01666 } 01667 #endif 01668 01669 // restore old weights 01670 kernel->set_subkernel_weights(w_backup,num_weights); 01671 01672 delete[] w_backup; 01673 delete[] w1; 01674 01675 call_mkl_callback(a, label, lin); 01676 } 01677 01678 void* CSVMLight::update_linear_component_mkl_linadd_helper(void* p) 01679 { 01680 S_THREAD_PARAM* params = (S_THREAD_PARAM*) p; 01681 01682 int32_t num_kernels=params->kernel->get_num_subkernels(); 01683 01684 // determine contributions of different kernels 01685 for (int32_t i=params->start; i<params->end; i++) 01686 params->kernel->compute_by_subkernel(i,&(params->W[i*num_kernels])); 01687 01688 return NULL ; 01689 } 01690 01691 void CSVMLight::call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin) 01692 { 01693 int32_t num = kernel->get_num_vec_rhs(); 01694 int32_t num_kernels = kernel->get_num_subkernels() ; 01695 01696 int nk = (int) num_kernels; /* calling external lib */ 01697 01698 float64_t suma=0; 01699 float64_t* sumw=new float64_t[num_kernels]; 01700 #ifdef HAVE_LAPACK 01701 double* alphay = new double[num]; 01702 01703 for (int32_t i=0; i<num; i++) 01704 { 01705 alphay[i]=a[i]*label[i]; 01706 suma+=a[i]; 01707 } 01708 01709 for (int32_t i=0; i<num_kernels; i++) 01710 sumw[i]=0; 01711 01712 cblas_dgemv(CblasColMajor, CblasNoTrans, num_kernels, (int) num, 0.5, (double*) W, 01713 num_kernels, alphay, 1, 1.0, (double*) sumw, 1); 01714 01715 delete[] alphay; 01716 #else 01717 for (int32_t i=0; i<num; i++) 01718 suma += a[i]; 01719 01720 for (int32_t d=0; d<num_kernels; d++) 01721 { 01722 sumw[d]=0; 01723 for (int32_t i=0; i<num; i++) 01724 sumw[d] += a[i]*(0.5*label[i]*W[i*num_kernels+d]); 01725 } 01726 #endif 01727 01728 if (callback) 01729 mkl_converged=callback(mkl, sumw, suma); 01730 01731 01732 const float64_t* new_beta = kernel->get_subkernel_weights(num_kernels); 01733 01734 // update lin 01735 #ifdef HAVE_LAPACK 01736 cblas_dgemv(CblasColMajor, CblasTrans, nk, (int) num, 1.0, (double*) W, 01737 nk, (double*) new_beta, 1, 0.0, (double*) lin, 1); 01738 #else 01739 for (int32_t i=0; i<num; i++) 01740 lin[i]=0 ; 01741 for (int32_t d=0; d<num_kernels; d++) 01742 if (new_beta[d]!=0) 01743 for (int32_t i=0; i<num; i++) 01744 lin[i] += new_beta[d]*W[i*num_kernels+d] ; 01745 #endif 01746 01747 delete[] sumw; 01748 } 01749 01750 01751 /*************************** Working set selection ***************************/ 01752 01753 int32_t CSVMLight::select_next_qp_subproblem_grad( 01754 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, 01755 int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum, 01756 int32_t *working2dnum, float64_t *selcrit, int32_t *select, 01757 int32_t cache_only, int32_t *key, int32_t *chosen) 01758 /* Use the feasible direction approach to select the next 01759 qp-subproblem (see chapter 'Selecting a good working set'). If 01760 'cache_only' is true, then the variables are selected only among 01761 those for which the kernel evaluations are cached. */ 01762 { 01763 int32_t choosenum,i,j,k,activedoc,inum,valid; 01764 float64_t s; 01765 01766 for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ 01767 choosenum=0; 01768 activedoc=0; 01769 for (i=0;(j=active2dnum[i])>=0;i++) { 01770 s=-label[j]; 01771 if(cache_only) 01772 { 01773 if (use_kernel_cache) 01774 valid=(kernel->kernel_cache_check(j)); 01775 else 01776 valid = 1 ; 01777 } 01778 else 01779 valid=1; 01780 if(valid 01781 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) 01782 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 01783 && (s>0))) 01784 && (!chosen[j]) 01785 && (label[j]) 01786 && (!inconsistent[j])) 01787 { 01788 selcrit[activedoc]=(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]); 01789 key[activedoc]=j; 01790 activedoc++; 01791 } 01792 } 01793 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2)); 01794 for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) { 01795 i=key[select[k]]; 01796 chosen[i]=1; 01797 working2dnum[inum+choosenum]=i; 01798 choosenum+=1; 01799 if (use_kernel_cache) 01800 kernel->kernel_cache_touch(i); 01801 /* make sure it does not get kicked */ 01802 /* out of cache */ 01803 } 01804 01805 activedoc=0; 01806 for (i=0;(j=active2dnum[i])>=0;i++) { 01807 s=label[j]; 01808 if(cache_only) 01809 { 01810 if (use_kernel_cache) 01811 valid=(kernel->kernel_cache_check(j)); 01812 else 01813 valid = 1 ; 01814 } 01815 else 01816 valid=1; 01817 if(valid 01818 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) 01819 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 01820 && (s>0))) 01821 && (!chosen[j]) 01822 && (label[j]) 01823 && (!inconsistent[j])) 01824 { 01825 selcrit[activedoc]=-(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]); 01826 /* selcrit[activedoc]=-(float64_t)(label[j]*(-1.0+(float64_t)label[j]*lin[j])); */ 01827 key[activedoc]=j; 01828 activedoc++; 01829 } 01830 } 01831 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2)); 01832 for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) { 01833 i=key[select[k]]; 01834 chosen[i]=1; 01835 working2dnum[inum+choosenum]=i; 01836 choosenum+=1; 01837 if (use_kernel_cache) 01838 kernel->kernel_cache_touch(i); /* make sure it does not get kicked */ 01839 /* out of cache */ 01840 } 01841 working2dnum[inum+choosenum]=-1; /* complete index */ 01842 return(choosenum); 01843 } 01844 01845 int32_t CSVMLight::select_next_qp_subproblem_rand( 01846 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, 01847 int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum, 01848 int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key, 01849 int32_t *chosen, int32_t iteration) 01850 /* Use the feasible direction approach to select the next 01851 qp-subproblem (see section 'Selecting a good working set'). Chooses 01852 a feasible direction at (pseudo) random to help jump over numerical 01853 problem. */ 01854 { 01855 int32_t choosenum,i,j,k,activedoc,inum; 01856 float64_t s; 01857 01858 for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ 01859 choosenum=0; 01860 activedoc=0; 01861 for (i=0;(j=active2dnum[i])>=0;i++) { 01862 s=-label[j]; 01863 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) 01864 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 01865 && (s>0))) 01866 && (!inconsistent[j]) 01867 && (label[j]) 01868 && (!chosen[j])) { 01869 selcrit[activedoc]=(j+iteration) % totdoc; 01870 key[activedoc]=j; 01871 activedoc++; 01872 } 01873 } 01874 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2)); 01875 for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) { 01876 i=key[select[k]]; 01877 chosen[i]=1; 01878 working2dnum[inum+choosenum]=i; 01879 choosenum+=1; 01880 if (use_kernel_cache) 01881 kernel->kernel_cache_touch(i); /* make sure it does not get kicked */ 01882 /* out of cache */ 01883 } 01884 01885 activedoc=0; 01886 for (i=0;(j=active2dnum[i])>=0;i++) { 01887 s=label[j]; 01888 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) 01889 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 01890 && (s>0))) 01891 && (!inconsistent[j]) 01892 && (label[j]) 01893 && (!chosen[j])) { 01894 selcrit[activedoc]=(j+iteration) % totdoc; 01895 key[activedoc]=j; 01896 activedoc++; 01897 } 01898 } 01899 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2)); 01900 for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) { 01901 i=key[select[k]]; 01902 chosen[i]=1; 01903 working2dnum[inum+choosenum]=i; 01904 choosenum+=1; 01905 if (use_kernel_cache) 01906 kernel->kernel_cache_touch(i); /* make sure it does not get kicked */ 01907 /* out of cache */ 01908 } 01909 working2dnum[inum+choosenum]=-1; /* complete index */ 01910 return(choosenum); 01911 } 01912 01913 01914 01915 void CSVMLight::select_top_n( 01916 float64_t *selcrit, int32_t range, int32_t* select, int32_t n) 01917 { 01918 register int32_t i,j; 01919 01920 for (i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */ 01921 for (j=i;j>=0;j--) { 01922 if((j>0) && (selcrit[select[j-1]]<selcrit[i])){ 01923 select[j]=select[j-1]; 01924 } 01925 else { 01926 select[j]=i; 01927 j=-1; 01928 } 01929 } 01930 } 01931 if(n>0) { 01932 for (i=n;i<range;i++) { 01933 if(selcrit[i]>selcrit[select[n-1]]) { 01934 for (j=n-1;j>=0;j--) { 01935 if((j>0) && (selcrit[select[j-1]]<selcrit[i])) { 01936 select[j]=select[j-1]; 01937 } 01938 else { 01939 select[j]=i; 01940 j=-1; 01941 } 01942 } 01943 } 01944 } 01945 } 01946 } 01947 01948 01949 /******************************** Shrinking *********************************/ 01950 01951 void CSVMLight::init_shrink_state( 01952 SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory) 01953 { 01954 int32_t i; 01955 01956 shrink_state->deactnum=0; 01957 shrink_state->active = new int32_t[totdoc]; 01958 shrink_state->inactive_since = new int32_t[totdoc]; 01959 shrink_state->a_history = new float64_t*[maxhistory]; 01960 shrink_state->maxhistory=maxhistory; 01961 shrink_state->last_lin = new float64_t[totdoc]; 01962 shrink_state->last_a = new float64_t[totdoc]; 01963 01964 for (i=0;i<totdoc;i++) { 01965 shrink_state->active[i]=1; 01966 shrink_state->inactive_since[i]=0; 01967 shrink_state->last_a[i]=0; 01968 shrink_state->last_lin[i]=0; 01969 } 01970 } 01971 01972 void CSVMLight::shrink_state_cleanup(SHRINK_STATE *shrink_state) 01973 { 01974 delete[] shrink_state->active; 01975 delete[] shrink_state->inactive_since; 01976 if(shrink_state->deactnum > 0) 01977 delete[] (shrink_state->a_history[shrink_state->deactnum-1]); 01978 delete[] (shrink_state->a_history); 01979 delete[] (shrink_state->last_a); 01980 delete[] (shrink_state->last_lin); 01981 } 01982 01983 int32_t CSVMLight::shrink_problem( 01984 SHRINK_STATE *shrink_state, int32_t *active2dnum, 01985 int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc, 01986 int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t* c, 01987 float64_t* lin, int* label) 01988 /* Shrink some variables away. Do the shrinking only if at least 01989 minshrink variables can be removed. */ 01990 { 01991 int32_t i,ii,change,activenum,lastiter; 01992 float64_t *a_old=NULL; 01993 01994 activenum=0; 01995 change=0; 01996 for (ii=0;active2dnum[ii]>=0;ii++) { 01997 i=active2dnum[ii]; 01998 activenum++; 01999 lastiter=last_suboptimal_at[i]; 02000 02001 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) 02002 || (inconsistent[i])) { 02003 change++; 02004 } 02005 } 02006 if((change>=minshrink) /* shrink only if sufficiently many candidates */ 02007 && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */ 02008 /* Shrink problem by removing those variables which are */ 02009 /* optimal at a bound for a minimum number of iterations */ 02010 if(verbosity>=2) { 02011 SG_INFO( " Shrinking..."); 02012 } 02013 02014 if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* non-linear case save alphas */ 02015 02016 a_old=new float64_t[totdoc]; 02017 shrink_state->a_history[shrink_state->deactnum]=a_old; 02018 for (i=0;i<totdoc;i++) { 02019 a_old[i]=a[i]; 02020 } 02021 } 02022 for (ii=0;active2dnum[ii]>=0;ii++) { 02023 i=active2dnum[ii]; 02024 lastiter=last_suboptimal_at[i]; 02025 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) 02026 || (inconsistent[i])) { 02027 shrink_state->active[i]=0; 02028 shrink_state->inactive_since[i]=shrink_state->deactnum; 02029 } 02030 } 02031 activenum=compute_index(shrink_state->active,totdoc,active2dnum); 02032 shrink_state->deactnum++; 02033 if(kernel->has_property(KP_LINADD) && get_linadd_enabled()) 02034 shrink_state->deactnum=0; 02035 02036 if(verbosity>=2) { 02037 SG_DONE(); 02038 SG_DEBUG("Number of inactive variables = %ld\n", totdoc-activenum); 02039 } 02040 } 02041 return(activenum); 02042 } 02043 02044 void* CSVMLight::reactivate_inactive_examples_linadd_helper(void* p) 02045 { 02046 S_THREAD_PARAM_REACTIVATE_LINADD* params = (S_THREAD_PARAM_REACTIVATE_LINADD*) p; 02047 02048 CKernel* k = params->kernel; 02049 float64_t* lin = params->lin; 02050 float64_t* last_lin = params->last_lin; 02051 int32_t* active = params->active; 02052 int32_t* docs = params->docs; 02053 int32_t start = params->start; 02054 int32_t end = params->end; 02055 02056 for (int32_t i=start;i<end;i++) 02057 { 02058 if (!active[i]) 02059 lin[i] = last_lin[i]+k->compute_optimized(docs[i]); 02060 02061 last_lin[i]=lin[i]; 02062 } 02063 02064 return NULL; 02065 } 02066 02067 void* CSVMLight::reactivate_inactive_examples_vanilla_helper(void* p) 02068 { 02069 S_THREAD_PARAM_REACTIVATE_VANILLA* params = (S_THREAD_PARAM_REACTIVATE_VANILLA*) p; 02070 ASSERT(params); 02071 ASSERT(params->kernel); 02072 ASSERT(params->lin); 02073 ASSERT(params->aicache); 02074 ASSERT(params->a); 02075 ASSERT(params->a_old); 02076 ASSERT(params->changed2dnum); 02077 ASSERT(params->inactive2dnum); 02078 ASSERT(params->label); 02079 02080 CKernel* k = params->kernel; 02081 float64_t* lin = params->lin; 02082 float64_t* aicache = params->aicache; 02083 float64_t* a= params->a; 02084 float64_t* a_old = params->a_old; 02085 int32_t* changed2dnum = params->changed2dnum; 02086 int32_t* inactive2dnum = params->inactive2dnum; 02087 int32_t* label = params->label; 02088 int32_t start = params->start; 02089 int32_t end = params->end; 02090 02091 for (int32_t ii=start;ii<end;ii++) 02092 { 02093 int32_t i=changed2dnum[ii]; 02094 int32_t j=0; 02095 ASSERT(i>=0); 02096 02097 k->get_kernel_row(i,inactive2dnum,aicache); 02098 for (int32_t jj=0;(j=inactive2dnum[jj])>=0;jj++) 02099 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i]; 02100 } 02101 return NULL; 02102 } 02103 02104 void CSVMLight::reactivate_inactive_examples( 02105 int32_t* label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin, 02106 float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent, 02107 int32_t* docs, float64_t *aicache, float64_t *maxdiff) 02108 /* Make all variables active again which had been removed by 02109 shrinking. */ 02110 /* Computes lin for those variables from scratch. */ 02111 { 02112 register int32_t i,j,ii,jj,t,*changed2dnum,*inactive2dnum; 02113 int32_t *changed,*inactive; 02114 register float64_t *a_old,dist; 02115 float64_t ex_c,target; 02116 02117 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) { /* special linear case */ 02118 a_old=shrink_state->last_a; 02119 02120 if (!use_batch_computation || !kernel->has_property(KP_BATCHEVALUATION)) 02121 { 02122 SG_DEBUG( " clear normal - linadd\n"); 02123 kernel->clear_normal(); 02124 02125 int32_t num_modified=0; 02126 for (i=0;i<totdoc;i++) { 02127 if(a[i] != a_old[i]) { 02128 kernel->add_to_normal(docs[i], ((a[i]-a_old[i])*(float64_t)label[i])); 02129 a_old[i]=a[i]; 02130 num_modified++; 02131 } 02132 } 02133 02134 if (num_modified>0) 02135 { 02136 int32_t num_threads=parallel->get_num_threads(); 02137 ASSERT(num_threads>0); 02138 if (num_threads < 2) 02139 { 02140 S_THREAD_PARAM_REACTIVATE_LINADD params; 02141 params.kernel=kernel; 02142 params.lin=lin; 02143 params.last_lin=shrink_state->last_lin; 02144 params.docs=docs; 02145 params.active=shrink_state->active; 02146 params.start=0; 02147 params.end=totdoc; 02148 reactivate_inactive_examples_linadd_helper((void*) ¶ms); 02149 } 02150 #ifndef WIN32 02151 else 02152 { 02153 pthread_t* threads = new pthread_t[num_threads-1]; 02154 S_THREAD_PARAM_REACTIVATE_LINADD* params = new S_THREAD_PARAM_REACTIVATE_LINADD[num_threads]; 02155 int32_t step= totdoc/num_threads; 02156 02157 for (t=0; t<num_threads-1; t++) 02158 { 02159 params[t].kernel=kernel; 02160 params[t].lin=lin; 02161 params[t].last_lin=shrink_state->last_lin; 02162 params[t].docs=docs; 02163 params[t].active=shrink_state->active; 02164 params[t].start = t*step; 02165 params[t].end = (t+1)*step; 02166 pthread_create(&threads[t], NULL, CSVMLight::reactivate_inactive_examples_linadd_helper, (void*)¶ms[t]); 02167 } 02168 02169 params[t].kernel=kernel; 02170 params[t].lin=lin; 02171 params[t].last_lin=shrink_state->last_lin; 02172 params[t].docs=docs; 02173 params[t].active=shrink_state->active; 02174 params[t].start = t*step; 02175 params[t].end = totdoc; 02176 reactivate_inactive_examples_linadd_helper((void*) ¶ms[t]); 02177 02178 for (t=0; t<num_threads-1; t++) 02179 pthread_join(threads[t], NULL); 02180 02181 delete[] threads; 02182 delete[] params; 02183 } 02184 #endif 02185 02186 } 02187 } 02188 else 02189 { 02190 float64_t *alphas = new float64_t[totdoc] ; 02191 int32_t *idx = new int32_t[totdoc] ; 02192 int32_t num_suppvec=0 ; 02193 02194 for (i=0; i<totdoc; i++) 02195 { 02196 if(a[i] != a_old[i]) 02197 { 02198 alphas[num_suppvec] = (a[i]-a_old[i])*(float64_t)label[i]; 02199 idx[num_suppvec] = i ; 02200 a_old[i] = a[i] ; 02201 num_suppvec++ ; 02202 } 02203 } 02204 02205 if (num_suppvec>0) 02206 { 02207 int32_t num_inactive=0; 02208 int32_t* inactive_idx=new int32_t[totdoc]; // infact we only need a subset 02209 02210 j=0; 02211 for (i=0;i<totdoc;i++) 02212 { 02213 if(!shrink_state->active[i]) 02214 { 02215 inactive_idx[j++] = i; 02216 num_inactive++; 02217 } 02218 } 02219 02220 if (num_inactive>0) 02221 { 02222 float64_t* dest = new float64_t[num_inactive]; 02223 memset(dest, 0, sizeof(float64_t)*num_inactive); 02224 02225 kernel->compute_batch(num_inactive, inactive_idx, dest, num_suppvec, idx, alphas); 02226 02227 j=0; 02228 for (i=0;i<totdoc;i++) { 02229 if(!shrink_state->active[i]) { 02230 lin[i] = shrink_state->last_lin[i] + dest[j++] ; 02231 } 02232 shrink_state->last_lin[i]=lin[i]; 02233 } 02234 02235 delete[] dest; 02236 } 02237 else 02238 { 02239 for (i=0;i<totdoc;i++) 02240 shrink_state->last_lin[i]=lin[i]; 02241 } 02242 delete[] inactive_idx; 02243 } 02244 delete[] alphas; 02245 delete[] idx; 02246 } 02247 02248 kernel->delete_optimization(); 02249 } 02250 else 02251 { 02252 changed=new int32_t[totdoc]; 02253 changed2dnum=new int32_t[totdoc+11]; 02254 inactive=new int32_t[totdoc]; 02255 inactive2dnum=new int32_t[totdoc+11]; 02256 for (t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) 02257 { 02258 if(verbosity>=2) { 02259 SG_INFO( "%ld..",t); 02260 } 02261 a_old=shrink_state->a_history[t]; 02262 for (i=0;i<totdoc;i++) { 02263 inactive[i]=((!shrink_state->active[i]) 02264 && (shrink_state->inactive_since[i] == t)); 02265 changed[i]= (a[i] != a_old[i]); 02266 } 02267 compute_index(inactive,totdoc,inactive2dnum); 02268 compute_index(changed,totdoc,changed2dnum); 02269 02270 02271 int32_t num_threads=parallel->get_num_threads(); 02272 ASSERT(num_threads>0); 02273 02274 if (num_threads < 2) 02275 { 02276 for (ii=0;(i=changed2dnum[ii])>=0;ii++) { 02277 kernel->get_kernel_row(i,inactive2dnum,aicache); 02278 for (jj=0;(j=inactive2dnum[jj])>=0;jj++) 02279 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i]; 02280 } 02281 } 02282 #ifndef WIN32 02283 else 02284 { 02285 //find number of the changed ones 02286 int32_t num_changed=0; 02287 for (ii=0;changed2dnum[ii]>=0;ii++) 02288 num_changed++; 02289 02290 if (num_changed>0) 02291 { 02292 pthread_t* threads= new pthread_t[num_threads-1]; 02293 S_THREAD_PARAM_REACTIVATE_VANILLA* params = new S_THREAD_PARAM_REACTIVATE_VANILLA[num_threads]; 02294 int32_t step= num_changed/num_threads; 02295 02296 // alloc num_threads many tmp buffers 02297 float64_t* tmp_lin=new float64_t[totdoc*num_threads]; 02298 memset(tmp_lin, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads); 02299 float64_t* tmp_aicache=new float64_t[totdoc*num_threads]; 02300 memset(tmp_aicache, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads); 02301 02302 int32_t thr; 02303 for (thr=0; thr<num_threads-1; thr++) 02304 { 02305 params[thr].kernel=kernel; 02306 params[thr].lin=&tmp_lin[thr*totdoc]; 02307 params[thr].aicache=&tmp_aicache[thr*totdoc]; 02308 params[thr].a=a; 02309 params[thr].a_old=a_old; 02310 params[thr].changed2dnum=changed2dnum; 02311 params[thr].inactive2dnum=inactive2dnum; 02312 params[thr].label=label; 02313 params[thr].start = thr*step; 02314 params[thr].end = (thr+1)*step; 02315 pthread_create(&threads[thr], NULL, CSVMLight::reactivate_inactive_examples_vanilla_helper, (void*)¶ms[thr]); 02316 } 02317 02318 params[thr].kernel=kernel; 02319 params[thr].lin=&tmp_lin[thr*totdoc]; 02320 params[thr].aicache=&tmp_aicache[thr*totdoc]; 02321 params[thr].a=a; 02322 params[thr].a_old=a_old; 02323 params[thr].changed2dnum=changed2dnum; 02324 params[thr].inactive2dnum=inactive2dnum; 02325 params[thr].label=label; 02326 params[thr].start = thr*step; 02327 params[thr].end = num_changed; 02328 reactivate_inactive_examples_vanilla_helper((void*) ¶ms[thr]); 02329 02330 for (jj=0;(j=inactive2dnum[jj])>=0;jj++) 02331 lin[j]+=tmp_lin[totdoc*thr+j]; 02332 02333 for (thr=0; thr<num_threads-1; thr++) 02334 { 02335 pthread_join(threads[thr], NULL); 02336 02337 //add up results 02338 for (jj=0;(j=inactive2dnum[jj])>=0;jj++) 02339 lin[j]+=tmp_lin[totdoc*thr+j]; 02340 } 02341 02342 delete[] tmp_lin; 02343 delete[] tmp_aicache; 02344 delete[] threads; 02345 delete[] params; 02346 } 02347 } 02348 #endif 02349 } 02350 delete[] changed; 02351 delete[] changed2dnum; 02352 delete[] inactive; 02353 delete[] inactive2dnum; 02354 } 02355 02356 (*maxdiff)=0; 02357 for (i=0;i<totdoc;i++) { 02358 shrink_state->inactive_since[i]=shrink_state->deactnum-1; 02359 if(!inconsistent[i]) { 02360 dist=(lin[i]-model->b)*(float64_t)label[i]; 02361 target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]); 02362 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; 02363 if((a[i]>learn_parm->epsilon_a) && (dist > target)) { 02364 if((dist-target)>(*maxdiff)) /* largest violation */ 02365 (*maxdiff)=dist-target; 02366 } 02367 else if((a[i]<ex_c) && (dist < target)) { 02368 if((target-dist)>(*maxdiff)) /* largest violation */ 02369 (*maxdiff)=target-dist; 02370 } 02371 if((a[i]>(0+learn_parm->epsilon_a)) 02372 && (a[i]<ex_c)) { 02373 shrink_state->active[i]=1; /* not at bound */ 02374 } 02375 else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) { 02376 shrink_state->active[i]=1; 02377 } 02378 else if((a[i]>=ex_c) 02379 && (dist > (target-learn_parm->epsilon_shrink))) { 02380 shrink_state->active[i]=1; 02381 } 02382 else if(learn_parm->sharedslack) { /* make all active when sharedslack */ 02383 shrink_state->active[i]=1; 02384 } 02385 } 02386 } 02387 02388 if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* update history for non-linear */ 02389 for (i=0;i<totdoc;i++) { 02390 (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i]; 02391 } 02392 for (t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) { 02393 delete[] shrink_state->a_history[t]; 02394 shrink_state->a_history[t]=0; 02395 } 02396 } 02397 } 02398 02399 02400 02401 /* start the optimizer and return the optimal values */ 02402 float64_t* CSVMLight::optimize_qp( 02403 QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold, 02404 int32_t& svm_maxqpsize) 02405 { 02406 register int32_t i, j, result; 02407 float64_t margin, obj_before, obj_after; 02408 float64_t sigdig, dist, epsilon_loqo; 02409 int32_t iter; 02410 02411 if(!primal) { /* allocate memory at first call */ 02412 primal=new float64_t[nx*3]; 02413 dual=new float64_t[nx*2+1]; 02414 } 02415 02416 obj_before=0; /* calculate objective before optimization */ 02417 for(i=0;i<qp->opt_n;i++) { 02418 obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]); 02419 obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]); 02420 for(j=0;j<i;j++) { 02421 obj_before+=(qp->opt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]); 02422 } 02423 } 02424 02425 result=STILL_RUNNING; 02426 qp->opt_ce0[0]*=(-1.0); 02427 /* Run pr_loqo. If a run fails, try again with parameters which lead */ 02428 /* to a slower, but more robust setting. */ 02429 for(margin=init_margin,iter=init_iter; 02430 (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) { 02431 02432 opt_precision=CMath::max(opt_precision, DEF_PRECISION); 02433 sigdig=-log10(opt_precision); 02434 02435 result=pr_loqo((int32_t)qp->opt_n,(int32_t)qp->opt_m, 02436 (float64_t *)qp->opt_g0,(float64_t *)qp->opt_g, 02437 (float64_t *)qp->opt_ce,(float64_t *)qp->opt_ce0, 02438 (float64_t *)qp->opt_low,(float64_t *)qp->opt_up, 02439 (float64_t *)primal,(float64_t *)dual, 02440 (int32_t)(verbosity-2), 02441 (float64_t)sigdig,(int32_t)iter, 02442 (float64_t)margin,(float64_t)(qp->opt_up[0])/4.0,(int32_t)0); 02443 02444 if(CMath::is_nan(dual[0])) { /* check for choldc problem */ 02445 if(verbosity>=2) { 02446 SG_SDEBUG("Restarting PR_LOQO with more conservative parameters.\n"); 02447 } 02448 if(init_margin<0.80) { /* become more conservative in general */ 02449 init_margin=(4.0*margin+1.0)/5.0; 02450 } 02451 margin=(margin+1.0)/2.0; 02452 (opt_precision)*=10.0; /* reduce precision */ 02453 if(verbosity>=2) { 02454 SG_SDEBUG("Reducing precision of PR_LOQO.\n"); 02455 } 02456 } 02457 else if(result!=OPTIMAL_SOLUTION) { 02458 iter+=2000; 02459 init_iter+=10; 02460 (opt_precision)*=10.0; /* reduce precision */ 02461 if(verbosity>=2) { 02462 SG_SDEBUG("Reducing precision of PR_LOQO due to (%ld).\n",result); 02463 } 02464 } 02465 } 02466 02467 if(qp->opt_m) /* Thanks to Alex Smola for this hint */ 02468 model_b=dual[0]; 02469 else 02470 model_b=0; 02471 02472 /* Check the precision of the alphas. If results of current optimization */ 02473 /* violate KT-Conditions, relax the epsilon on the bounds on alphas. */ 02474 epsilon_loqo=1E-10; 02475 for(i=0;i<qp->opt_n;i++) { 02476 dist=-model_b*qp->opt_ce[i]; 02477 dist+=(qp->opt_g0[i]+1.0); 02478 for(j=0;j<i;j++) { 02479 dist+=(primal[j]*qp->opt_g[j*qp->opt_n+i]); 02480 } 02481 for(j=i;j<qp->opt_n;j++) { 02482 dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]); 02483 } 02484 /* SG_SDEBUG("LOQO: a[%d]=%f, dist=%f, b=%f\n",i,primal[i],dist,dual[0]); */ 02485 if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) { 02486 epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0; 02487 } 02488 else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) { 02489 epsilon_loqo=primal[i]*2.0; 02490 } 02491 } 02492 02493 for(i=0;i<qp->opt_n;i++) { /* clip alphas to bounds */ 02494 if(primal[i]<=(0+epsilon_loqo)) { 02495 primal[i]=0; 02496 } 02497 else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) { 02498 primal[i]=qp->opt_up[i]; 02499 } 02500 } 02501 02502 obj_after=0; /* calculate objective after optimization */ 02503 for(i=0;i<qp->opt_n;i++) { 02504 obj_after+=(qp->opt_g0[i]*primal[i]); 02505 obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]); 02506 for(j=0;j<i;j++) { 02507 obj_after+=(primal[j]*primal[i]*qp->opt_g[j*qp->opt_n+i]); 02508 } 02509 } 02510 02511 /* if optimizer returned NAN values, reset and retry with smaller */ 02512 /* working set. */ 02513 if(CMath::is_nan(obj_after) || CMath::is_nan(model_b)) { 02514 for(i=0;i<qp->opt_n;i++) { 02515 primal[i]=qp->opt_xinit[i]; 02516 } 02517 model_b=0; 02518 if(svm_maxqpsize>2) { 02519 svm_maxqpsize--; /* decrease size of qp-subproblems */ 02520 } 02521 } 02522 02523 if(obj_after >= obj_before) { /* check whether there was progress */ 02524 (opt_precision)/=100.0; 02525 precision_violations++; 02526 if(verbosity>=2) { 02527 SG_SDEBUG("Increasing Precision of PR_LOQO.\n"); 02528 } 02529 } 02530 02531 if(precision_violations > 500) { 02532 (*epsilon_crit)*=10.0; 02533 precision_violations=0; 02534 SG_SINFO("Relaxing epsilon on KT-Conditions.\n"); 02535 } 02536 02537 (*threshold)=model_b; 02538 02539 if(result!=OPTIMAL_SOLUTION) { 02540 SG_SERROR("PR_LOQO did not converge.\n"); 02541 return(qp->opt_xinit); 02542 } 02543 else { 02544 return(primal); 02545 } 02546 } 02547 02548 02549 #endif //USE_SVMLIGHT