SHOGUN v0.9.3
SVM_light.cpp
Go to the documentation of this file.
00001 /***********************************************************************/
00002 /*                                                                     */
00003 /*   SVM_light.cpp                                                     */
00004 /*                                                                     */
00005 /*   Author: Thorsten Joachims                                         */
00006 /*   Date: 19.07.99                                                    */
00007 /*                                                                     */
00008 /*   Copyright (c) 1999  Universitaet Dortmund - All rights reserved   */
00009 /*                                                                     */
00010 /*   This software is available for non-commercial use only. It must   */
00011 /*   not be modified and distributed without prior permission of the   */
00012 /*   author. The author is not responsible for implications from the   */
00013 /*   use of this software.                                             */
00014 /*                                                                     */
00015 /*   THIS INCLUDES THE FOLLOWING ADDITIONS                             */
00016 /*   Generic Kernel Interfacing: Soeren Sonnenburg                     */
00017 /*   Parallizations: Soeren Sonnenburg                                 */
00018 /*   Multiple Kernel Learning: Gunnar Raetsch, Soeren Sonnenburg,      */
00019 /*                          Alexander Zien, Marius Kloft, Chen Guohua  */
00020 /*   Linadd Speedup: Gunnar Raetsch, Soeren Sonnenburg                 */
00021 /*                                                                     */
00022 /***********************************************************************/
00023 #include "lib/config.h"
00024 
00025 #ifdef USE_SVMLIGHT
00026 
00027 #include "lib/io.h"
00028 #include "lib/Signal.h"
00029 #include "lib/Mathematics.h"
00030 #include "lib/Time.h"
00031 #include "lib/lapack.h"
00032 
00033 #include "features/SimpleFeatures.h"
00034 #include "classifier/svm/SVM_light.h"
00035 #include "classifier/svm/pr_loqo.h"
00036 
00037 #include "kernel/Kernel.h"
00038 #include "classifier/KernelMachine.h"
00039 #include "kernel/CombinedKernel.h"
00040 
00041 #include <unistd.h>
00042 
00043 #include "base/Parallel.h"
00044 
00045 #ifndef WIN32
00046 #include <pthread.h>
00047 #endif
00048 
00049 
00050 #ifdef HAVE_BOOST_SERIALIZATION
00051 #include <boost/serialization/export.hpp>
00052 BOOST_CLASS_EXPORT(shogun::CSVMLight);
00053 #endif //HAVE_BOOST_SERIALIZATION
00054 
00055 using namespace shogun;
00056 
00057 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00058 struct S_THREAD_PARAM_REACTIVATE_LINADD
00059 {
00060     CKernel* kernel;
00061     float64_t* lin;
00062     float64_t* last_lin;
00063     int32_t* active;
00064     int32_t* docs;
00065     int32_t start;
00066     int32_t end;
00067 };
00068 
00069 struct S_THREAD_PARAM
00070 {
00071     float64_t * lin ;
00072     float64_t* W;
00073     int32_t start, end;
00074     int32_t * active2dnum ;
00075     int32_t * docs ;
00076     CKernel* kernel ;
00077 };
00078 
00079 struct S_THREAD_PARAM_REACTIVATE_VANILLA
00080 {
00081     CKernel* kernel;
00082     float64_t* lin;
00083     float64_t* aicache;
00084     float64_t* a;
00085     float64_t* a_old;
00086     int32_t* changed2dnum;
00087     int32_t* inactive2dnum;
00088     int32_t* label;
00089     int32_t start;
00090     int32_t end;
00091 };
00092 
00093 struct S_THREAD_PARAM_KERNEL
00094 {
00095     float64_t *Kval ;
00096     int32_t *KI, *KJ ;
00097     int32_t start, end;
00098     CSVMLight* svmlight;
00099 };
00100 
00101 #endif // DOXYGEN_SHOULD_SKIP_THIS
00102 
00103 void* CSVMLight::update_linear_component_linadd_helper(void* p)
00104 {
00105     S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
00106 
00107     int32_t jj=0, j=0 ;
00108 
00109     for (jj=params->start;(jj<params->end) && (j=params->active2dnum[jj])>=0;jj++)
00110         params->lin[j]+=params->kernel->compute_optimized(params->docs[j]);
00111 
00112     return NULL ;
00113 }
00114 
00115 void* CSVMLight::compute_kernel_helper(void* p)
00116 {
00117     S_THREAD_PARAM_KERNEL* params = (S_THREAD_PARAM_KERNEL*) p;
00118 
00119     int32_t jj=0 ;
00120     for (jj=params->start;jj<params->end;jj++)
00121         params->Kval[jj]=params->svmlight->compute_kernel(params->KI[jj], params->KJ[jj]) ;
00122 
00123     return NULL ;
00124 }
00125 
00126 CSVMLight::CSVMLight()
00127 : CSVM()
00128 {
00129     init();
00130     set_kernel(NULL);
00131 }
00132 
00133 CSVMLight::CSVMLight(float64_t C, CKernel* k, CLabels* lab)
00134 : CSVM(C, k, lab)
00135 {
00136     init();
00137 }
00138 
00139 void CSVMLight::init()
00140 {
00141     //optimizer settings
00142     primal=NULL;
00143     dual=NULL;
00144     init_margin=0.15;
00145     init_iter=500;
00146     precision_violations=0;
00147     model_b=0;
00148     verbosity=1;
00149     opt_precision=DEF_PRECISION;
00150 
00151     // svm variables
00152     W=NULL;
00153     model=new MODEL[1];
00154     learn_parm=new LEARN_PARM[1];
00155     model->supvec=NULL;
00156     model->alpha=NULL;
00157     model->index=NULL;
00158 
00159     // MKL stuff
00160     mymaxdiff=1 ;
00161     mkl_converged=false;
00162 }
00163 
00164 CSVMLight::~CSVMLight()
00165 {
00166 
00167   delete[] model->supvec;
00168   delete[] model->alpha;
00169   delete[] model->index;
00170   delete[] model;
00171   delete[] learn_parm;
00172 
00173   // MKL stuff
00174   delete[] W ;
00175 
00176   // optimizer variables
00177   delete[] dual;
00178   delete[] primal;
00179 }
00180 
00181 bool CSVMLight::train(CFeatures* data)
00182 {
00183     //certain setup params
00184     mkl_converged=false;
00185     verbosity=1 ;
00186     init_margin=0.15;
00187     init_iter=500;
00188     precision_violations=0;
00189     opt_precision=DEF_PRECISION;
00190 
00191     strcpy (learn_parm->predfile, "");
00192     learn_parm->biased_hyperplane= get_bias_enabled() ? 1 : 0;
00193     learn_parm->sharedslack=0;
00194     learn_parm->remove_inconsistent=0;
00195     learn_parm->skip_final_opt_check=0;
00196     learn_parm->svm_maxqpsize=get_qpsize();
00197     learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize-1;
00198     learn_parm->maxiter=100000;
00199     learn_parm->svm_iter_to_shrink=100;
00200     learn_parm->svm_c=C1;
00201     learn_parm->transduction_posratio=0.33;
00202     learn_parm->svm_costratio=C2/C1;
00203     learn_parm->svm_costratio_unlab=1.0;
00204     learn_parm->svm_unlabbound=1E-5;
00205     learn_parm->epsilon_crit=epsilon; // GU: better decrease it ... ??
00206     learn_parm->epsilon_a=1E-15;
00207     learn_parm->compute_loo=0;
00208     learn_parm->rho=1.0;
00209     learn_parm->xa_depth=0;
00210 
00211     if (!kernel)
00212         SG_ERROR( "SVM_light can not proceed without kernel!\n");
00213 
00214     if (data)
00215     {
00216         if (labels->get_num_labels() != data->get_num_vectors())
00217             SG_ERROR("Number of training vectors does not match number of labels\n");
00218         kernel->init(data, data);
00219     }
00220 
00221     if (!kernel->has_features())
00222         SG_ERROR( "SVM_light can not proceed without initialized kernel!\n");
00223 
00224     ASSERT(labels && labels->get_num_labels());
00225     ASSERT(labels->is_two_class_labeling());
00226     ASSERT(kernel->get_num_vec_lhs()==labels->get_num_labels());
00227 
00228     // in case of LINADD enabled kernels cleanup!
00229     if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00230         kernel->clear_normal() ;
00231 
00232     // output some info
00233     SG_DEBUG( "threads = %i\n", parallel->get_num_threads()) ;
00234     SG_DEBUG( "qpsize = %i\n", learn_parm->svm_maxqpsize) ;
00235     SG_DEBUG( "epsilon = %1.1e\n", learn_parm->epsilon_crit) ;
00236     SG_DEBUG( "kernel->has_property(KP_LINADD) = %i\n", kernel->has_property(KP_LINADD)) ;
00237     SG_DEBUG( "kernel->has_property(KP_KERNCOMBINATION) = %i\n", kernel->has_property(KP_KERNCOMBINATION)) ;
00238     SG_DEBUG( "kernel->has_property(KP_BATCHEVALUATION) = %i\n", kernel->has_property(KP_BATCHEVALUATION)) ;
00239     SG_DEBUG( "kernel->get_optimization_type() = %s\n", kernel->get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : "SLOWBUTMEMEFFICIENT" ) ;
00240     SG_DEBUG( "get_solver_type() = %i\n", get_solver_type());
00241     SG_DEBUG( "get_linadd_enabled() = %i\n", get_linadd_enabled()) ;
00242     SG_DEBUG( "get_batch_computation_enabled() = %i\n", get_batch_computation_enabled()) ;
00243     SG_DEBUG( "kernel->get_num_subkernels() = %i\n", kernel->get_num_subkernels()) ;
00244 
00245     use_kernel_cache = !((kernel->get_kernel_type() == K_CUSTOM) ||
00246                          (get_linadd_enabled() && kernel->has_property(KP_LINADD)));
00247 
00248     SG_DEBUG( "use_kernel_cache = %i\n", use_kernel_cache) ;
00249 
00250     if (kernel->get_kernel_type() == K_COMBINED)
00251     {
00252         CKernel* kn = ((CCombinedKernel*)kernel)->get_first_kernel();
00253 
00254         while (kn)
00255         {
00256             // allocate kernel cache but clean up beforehand
00257             kn->resize_kernel_cache(kn->get_cache_size());
00258             SG_UNREF(kn);
00259             kn = ((CCombinedKernel*) kernel)->get_next_kernel();
00260         }
00261     }
00262 
00263     kernel->resize_kernel_cache(kernel->get_cache_size());
00264 
00265     // train the svm
00266     svm_learn();
00267 
00268     // brain damaged svm light work around
00269     create_new_model(model->sv_num-1);
00270     set_bias(-model->b);
00271     for (int32_t i=0; i<model->sv_num-1; i++)
00272     {
00273         set_alpha(i, model->alpha[i+1]);
00274         set_support_vector(i, model->supvec[i+1]);
00275     }
00276 
00277     // in case of LINADD enabled kernels cleanup!
00278     if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00279     {
00280         kernel->clear_normal() ;
00281         kernel->delete_optimization() ;
00282     }
00283 
00284     if (use_kernel_cache)
00285         kernel->kernel_cache_cleanup();
00286 
00287     return true ;
00288 }
00289 
00290 int32_t CSVMLight::get_runtime()
00291 {
00292   clock_t start;
00293   start = clock();
00294   return((int32_t)((float64_t)start*100.0/(float64_t)CLOCKS_PER_SEC));
00295 }
00296 
00297 
00298 void CSVMLight::svm_learn()
00299 {
00300     int32_t *inconsistent, i;
00301     int32_t inconsistentnum;
00302     int32_t misclassified,upsupvecnum;
00303     float64_t maxdiff, *lin, *c, *a;
00304     int32_t runtime_start,runtime_end;
00305     int32_t iterations;
00306     int32_t trainpos=0, trainneg=0 ;
00307     int32_t totdoc=0;
00308     ASSERT(labels);
00309     int32_t* label=labels->get_int_labels(totdoc);
00310     ASSERT(label);
00311     int32_t* docs=new int32_t[totdoc];
00312     delete[] W;
00313     W=NULL;
00314     count = 0 ;
00315 
00316     if (kernel->has_property(KP_KERNCOMBINATION) && callback)
00317     {
00318         W = new float64_t[totdoc*kernel->get_num_subkernels()];
00319         for (i=0; i<totdoc*kernel->get_num_subkernels(); i++)
00320             W[i]=0;
00321     }
00322 
00323     for (i=0; i<totdoc; i++)
00324         docs[i]=i;
00325 
00326     float64_t *xi_fullset; /* buffer for storing xi on full sample in loo */
00327     float64_t *a_fullset;  /* buffer for storing alpha on full sample in loo */
00328     TIMING timing_profile;
00329     SHRINK_STATE shrink_state;
00330 
00331     runtime_start=get_runtime();
00332     timing_profile.time_kernel=0;
00333     timing_profile.time_opti=0;
00334     timing_profile.time_shrink=0;
00335     timing_profile.time_update=0;
00336     timing_profile.time_model=0;
00337     timing_profile.time_check=0;
00338     timing_profile.time_select=0;
00339 
00340     /* make sure -n value is reasonable */
00341     if((learn_parm->svm_newvarsinqp < 2)
00342             || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
00343         learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
00344     }
00345 
00346     init_shrink_state(&shrink_state,totdoc,(int32_t)MAXSHRINK);
00347 
00348     inconsistent = new int32_t[totdoc];
00349     c = new float64_t[totdoc];
00350     a = new float64_t[totdoc];
00351     a_fullset = new float64_t[totdoc];
00352     xi_fullset = new float64_t[totdoc];
00353     lin = new float64_t[totdoc];
00354     if (linear_term.size() > 0)
00355         learn_parm->eps=get_linear_term_array();
00356     else
00357     {
00358         learn_parm->eps=new float64_t[totdoc];      /* equivalent regression epsilon for classification */
00359         CMath::fill_vector(learn_parm->eps, totdoc, -1.0);
00360     }
00361 
00362     learn_parm->svm_cost = new float64_t[totdoc];
00363 
00364     delete[] model->supvec;
00365     delete[] model->alpha;
00366     delete[] model->index;
00367     model->supvec = new int32_t[totdoc+2];
00368     model->alpha = new float64_t[totdoc+2];
00369     model->index = new int32_t[totdoc+2];
00370 
00371     model->at_upper_bound=0;
00372     model->b=0;
00373     model->supvec[0]=0;  /* element 0 reserved and empty for now */
00374     model->alpha[0]=0;
00375     model->totdoc=totdoc;
00376 
00377     model->kernel=kernel;
00378 
00379     model->sv_num=1;
00380     model->loo_error=-1;
00381     model->loo_recall=-1;
00382     model->loo_precision=-1;
00383     model->xa_error=-1;
00384     model->xa_recall=-1;
00385     model->xa_precision=-1;
00386     inconsistentnum=0;
00387 
00388 
00389     for (i=0;i<totdoc;i++) {    /* various inits */
00390         inconsistent[i]=0;
00391         c[i]=0;
00392         a[i]=0;
00393         lin[i]=0;
00394 
00395         if(label[i] > 0) {
00396             learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
00397                 fabs((float64_t)label[i]);
00398             label[i]=1;
00399             trainpos++;
00400         }
00401         else if(label[i] < 0) {
00402             learn_parm->svm_cost[i]=learn_parm->svm_c*fabs((float64_t)label[i]);
00403             label[i]=-1;
00404             trainneg++;
00405         }
00406         else {
00407             learn_parm->svm_cost[i]=0;
00408         }
00409     }
00410 
00411   /* compute starting state for initial alpha values */
00412     SG_DEBUG( "alpha:%d num_sv:%d\n", m_alpha, get_num_support_vectors());
00413 
00414     if(m_alpha && get_num_support_vectors()) {
00415         if(verbosity>=1) {
00416             SG_INFO( "Computing starting state...");
00417         }
00418 
00419     float64_t* alpha = new float64_t[totdoc];
00420 
00421     for (i=0; i<totdoc; i++)
00422         alpha[i]=0;
00423 
00424     for (i=0; i<get_num_support_vectors(); i++)
00425         alpha[get_support_vector(i)]=get_alpha(i);
00426 
00427     int32_t* index = new int32_t[totdoc];
00428     int32_t* index2dnum = new int32_t[totdoc+11];
00429     float64_t* aicache = new float64_t[totdoc];
00430     for (i=0;i<totdoc;i++) {    /* create full index and clip alphas */
00431       index[i]=1;
00432       alpha[i]=fabs(alpha[i]);
00433       if(alpha[i]<0) alpha[i]=0;
00434       if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
00435     }
00436 
00437     if (use_kernel_cache)
00438     {
00439         if (callback &&
00440                 (!((CCombinedKernel*)kernel)->get_append_subkernel_weights())
00441            )
00442         {
00443             CCombinedKernel* k      = (CCombinedKernel*) kernel;
00444             CKernel* kn = k->get_first_kernel();
00445 
00446             while (kn)
00447             {
00448                 for (i=0;i<totdoc;i++)     // fill kernel cache with unbounded SV
00449                     if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
00450                             && (kn->kernel_cache_space_available()))
00451                         kn->cache_kernel_row(i);
00452 
00453                 for (i=0;i<totdoc;i++)     // fill rest of kernel cache with bounded SV
00454                     if((alpha[i]==learn_parm->svm_cost[i])
00455                             && (kn->kernel_cache_space_available()))
00456                         kn->cache_kernel_row(i);
00457 
00458                 SG_UNREF(kn);
00459                 kn = k->get_next_kernel();
00460             }
00461         }
00462         else
00463         {
00464             for (i=0;i<totdoc;i++)     /* fill kernel cache with unbounded SV */
00465                 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
00466                         && (kernel->kernel_cache_space_available()))
00467                     kernel->cache_kernel_row(i);
00468 
00469             for (i=0;i<totdoc;i++)     /* fill rest of kernel cache with bounded SV */
00470                 if((alpha[i]==learn_parm->svm_cost[i])
00471                         && (kernel->kernel_cache_space_available()))
00472                     kernel->cache_kernel_row(i);
00473         }
00474     }
00475     (void)compute_index(index,totdoc,index2dnum);
00476     update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
00477                 lin,aicache,NULL);
00478     (void)calculate_svm_model(docs,label,lin,alpha,a,c,
00479                   index2dnum,index2dnum);
00480     for (i=0;i<totdoc;i++) {    /* copy initial alphas */
00481       a[i]=alpha[i];
00482     }
00483 
00484     delete[] index;
00485     delete[] index2dnum;
00486     delete[] aicache;
00487     delete[] alpha;
00488 
00489     if(verbosity>=1)
00490         SG_DONE();
00491   }
00492         SG_DEBUG( "%d totdoc %d pos %d neg\n", totdoc, trainpos, trainneg);
00493         SG_DEBUG( "Optimizing...\n");
00494 
00495     /* train the svm */
00496   iterations=optimize_to_convergence(docs,label,totdoc,
00497                      &shrink_state,inconsistent,a,lin,
00498                      c,&timing_profile,
00499                      &maxdiff,(int32_t)-1,
00500                      (int32_t)1);
00501 
00502 
00503     if(verbosity>=1) {
00504         if(verbosity==1)
00505         {
00506             SG_DONE();
00507             SG_DEBUG("(%ld iterations)", iterations);
00508         }
00509 
00510         misclassified=0;
00511         for (i=0;(i<totdoc);i++) { /* get final statistic */
00512             if((lin[i]-model->b)*(float64_t)label[i] <= 0.0)
00513                 misclassified++;
00514         }
00515 
00516         SG_INFO( "Optimization finished (%ld misclassified, maxdiff=%.8f).\n",
00517                 misclassified,maxdiff);
00518 
00519         SG_INFO( "obj = %.16f, rho = %.16f\n",get_objective(),model->b);
00520         if (maxdiff>epsilon)
00521             SG_WARNING( "maximum violation (%f) exceeds svm_epsilon (%f) due to numerical difficulties\n", maxdiff, epsilon);
00522 
00523         runtime_end=get_runtime();
00524         upsupvecnum=0;
00525         for (i=1;i<model->sv_num;i++)
00526         {
00527             if(fabs(model->alpha[i]) >=
00528                     (learn_parm->svm_cost[model->supvec[i]]-
00529                      learn_parm->epsilon_a))
00530                 upsupvecnum++;
00531         }
00532         SG_INFO( "Number of SV: %ld (including %ld at upper bound)\n",
00533                 model->sv_num-1,upsupvecnum);
00534     }
00535 
00536     shrink_state_cleanup(&shrink_state);
00537     delete[] label;
00538     delete[] inconsistent;
00539     delete[] c;
00540     delete[] a;
00541     delete[] a_fullset;
00542     delete[] xi_fullset;
00543     delete[] lin;
00544     delete[] learn_parm->eps;
00545     delete[] learn_parm->svm_cost;
00546     delete[] docs;
00547 }
00548 
00549 int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_t totdoc,
00550                  SHRINK_STATE *shrink_state,
00551                  int32_t *inconsistent,
00552                  float64_t *a, float64_t *lin, float64_t *c,
00553                  TIMING *timing_profile, float64_t *maxdiff,
00554                  int32_t heldout, int32_t retrain)
00555      /* docs: Training vectors (x-part) */
00556      /* label: Training labels/value (y-part, zero if test example for
00557                   transduction) */
00558      /* totdoc: Number of examples in docs/label */
00559      /* laern_parm: Learning paramenters */
00560      /* kernel_parm: Kernel paramenters */
00561      /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
00562                       NULL if linear. */
00563      /* shrink_state: State of active variables */
00564      /* inconsistent: examples thrown out as inconstistent */
00565      /* a: alphas */
00566      /* lin: linear component of gradient */
00567      /* c: right hand side of inequalities (margin) */
00568      /* maxdiff: returns maximum violation of KT-conditions */
00569      /* heldout: marks held-out example for leave-one-out (or -1) */
00570      /* retrain: selects training mode (1=regular / 2=holdout) */
00571 {
00572   int32_t *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
00573   int32_t inconsistentnum,choosenum,already_chosen=0,iteration;
00574   int32_t misclassified,supvecnum=0,*active2dnum,inactivenum;
00575   int32_t *working2dnum,*selexam;
00576   int32_t activenum;
00577   float64_t criterion, eq;
00578   float64_t *a_old;
00579   int32_t t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
00580   int32_t transductcycle;
00581   int32_t transduction;
00582   float64_t epsilon_crit_org;
00583   float64_t bestmaxdiff;
00584   float64_t worstmaxdiff;
00585   int32_t   bestmaxdiffiter,terminate;
00586   bool reactivated=false;
00587 
00588   float64_t *selcrit;  /* buffer for sorting */
00589   float64_t *aicache;  /* buffer to keep one row of hessian */
00590   QP qp;            /* buffer for one quadratic program */
00591 
00592   epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
00593   if(kernel->has_property(KP_LINADD) && get_linadd_enabled()) {
00594       learn_parm->epsilon_crit=2.0;
00595       /* caching makes no sense for linear kernel */
00596   }
00597   learn_parm->epsilon_shrink=2;
00598   (*maxdiff)=1;
00599 
00600   SG_DEBUG("totdoc:%d\n",totdoc);
00601   chosen = new int32_t[totdoc];
00602   last_suboptimal_at =new int32_t[totdoc];
00603   key =new int32_t[totdoc+11];
00604   selcrit =new float64_t[totdoc];
00605   selexam =new int32_t[totdoc];
00606   a_old =new float64_t[totdoc];
00607   aicache =new float64_t[totdoc];
00608   working2dnum =new int32_t[totdoc+11];
00609   active2dnum =new int32_t[totdoc+11];
00610   qp.opt_ce =new float64_t[learn_parm->svm_maxqpsize];
00611   qp.opt_ce0 =new float64_t[1];
00612   qp.opt_g =new float64_t[learn_parm->svm_maxqpsize*learn_parm->svm_maxqpsize];
00613   qp.opt_g0 =new float64_t[learn_parm->svm_maxqpsize];
00614   qp.opt_xinit =new float64_t[learn_parm->svm_maxqpsize];
00615   qp.opt_low=new float64_t[learn_parm->svm_maxqpsize];
00616   qp.opt_up=new float64_t[learn_parm->svm_maxqpsize];
00617 
00618   choosenum=0;
00619   inconsistentnum=0;
00620   transductcycle=0;
00621   transduction=0;
00622   if(!retrain) retrain=1;
00623   iteration=1;
00624   bestmaxdiffiter=1;
00625   bestmaxdiff=999999999;
00626   worstmaxdiff=1e-10;
00627   terminate=0;
00628 
00629   kernel->set_time(iteration);  /* for lru cache */
00630 
00631   for (i=0;i<totdoc;i++) {    /* various inits */
00632     chosen[i]=0;
00633     a_old[i]=a[i];
00634     last_suboptimal_at[i]=1;
00635     if(inconsistent[i])
00636       inconsistentnum++;
00637   }
00638   activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00639   inactivenum=totdoc-activenum;
00640   clear_index(working2dnum);
00641 
00642   /* repeat this loop until we have convergence */
00643   CTime start_time;
00644   mkl_converged=false;
00645 
00646 #ifdef CYGWIN
00647   for (;((iteration<100 || (!mkl_converged && callback) ) || (retrain && (!terminate))); iteration++){
00648 #else
00649       CSignal::clear_cancel();
00650       for (;((!CSignal::cancel_computations()) && ((iteration<3 || (!mkl_converged && callback) ) || (retrain && (!terminate)))); iteration++){
00651 #endif
00652 
00653       if(use_kernel_cache)
00654           kernel->set_time(iteration);  /* for lru cache */
00655 
00656       if(verbosity>=2) t0=get_runtime();
00657       if(verbosity>=3) {
00658           SG_DEBUG( "\nSelecting working set... ");
00659       }
00660 
00661       if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
00662           learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
00663 
00664       i=0;
00665       for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
00666           if((chosen[j]>=(learn_parm->svm_maxqpsize/
00667                           CMath::min(learn_parm->svm_maxqpsize,
00668                                      learn_parm->svm_newvarsinqp)))
00669              || (inconsistent[j])
00670              || (j == heldout)) {
00671               chosen[j]=0;
00672               choosenum--;
00673           }
00674           else {
00675               chosen[j]++;
00676               working2dnum[i++]=j;
00677           }
00678       }
00679       working2dnum[i]=-1;
00680 
00681       if(retrain == 2) {
00682           choosenum=0;
00683           for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */
00684               chosen[j]=0;
00685           }
00686           clear_index(working2dnum);
00687           for (i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */
00688               if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
00689                   chosen[i]=99999;
00690                   choosenum++;
00691                   a[i]=0;
00692               }
00693           }
00694           if(learn_parm->biased_hyperplane) {
00695               eq=0;
00696               for (i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */
00697                   eq+=a[i]*label[i];
00698               }
00699               for (i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
00700                   if((eq*label[i] > 0) && (a[i] > 0)) {
00701                       chosen[i]=88888;
00702                       choosenum++;
00703                       if((eq*label[i]) > a[i]) {
00704                           eq-=(a[i]*label[i]);
00705                           a[i]=0;
00706                       }
00707                       else {
00708                           a[i]-=(eq*label[i]);
00709                           eq=0;
00710                       }
00711                   }
00712               }
00713           }
00714           compute_index(chosen,totdoc,working2dnum);
00715       }
00716       else
00717       {   /* select working set according to steepest gradient */
00718           if(iteration % 101)
00719           {
00720               already_chosen=0;
00721               if(CMath::min(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4 &&
00722                       (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00723               {
00724                   /* select part of the working set from cache */
00725                   already_chosen=select_next_qp_subproblem_grad(
00726                       label,a,lin,c,totdoc,
00727                       (int32_t)(CMath::min(learn_parm->svm_maxqpsize-choosenum,
00728                                         learn_parm->svm_newvarsinqp)/2),
00729                       inconsistent,active2dnum,
00730                       working2dnum,selcrit,selexam,1,
00731                       key,chosen);
00732                   choosenum+=already_chosen;
00733               }
00734               choosenum+=select_next_qp_subproblem_grad(
00735                   label,a,lin,c,totdoc,
00736                   CMath::min(learn_parm->svm_maxqpsize-choosenum,
00737                              learn_parm->svm_newvarsinqp-already_chosen),
00738                   inconsistent,active2dnum,
00739                   working2dnum,selcrit,selexam,0,key,
00740                   chosen);
00741           }
00742           else { /* once in a while, select a somewhat random working set
00743                     to get unlocked of infinite loops due to numerical
00744                     inaccuracies in the core qp-solver */
00745               choosenum+=select_next_qp_subproblem_rand(
00746                   label,a,lin,c,totdoc,
00747                   CMath::min(learn_parm->svm_maxqpsize-choosenum,
00748                              learn_parm->svm_newvarsinqp),
00749                   inconsistent,active2dnum,
00750                   working2dnum,selcrit,selexam,key,
00751                   chosen,iteration);
00752           }
00753       }
00754 
00755       if(verbosity>=2) {
00756           SG_INFO( " %ld vectors chosen\n",choosenum);
00757       }
00758 
00759       if(verbosity>=2) t1=get_runtime();
00760 
00761       if (use_kernel_cache)
00762       {
00763           // in case of MKL w/o linadd cache each kernel independently
00764           // else if linadd is disabled cache single kernel
00765           if ( callback &&
00766                   (!((CCombinedKernel*) kernel)->get_append_subkernel_weights())
00767              )
00768           {
00769               CCombinedKernel* k      = (CCombinedKernel*) kernel;
00770               CKernel* kn = k->get_first_kernel();
00771 
00772               while (kn)
00773               {
00774                   kn->cache_multiple_kernel_rows(working2dnum, choosenum);
00775                   SG_UNREF(kn);
00776                   kn = k->get_next_kernel() ;
00777               }
00778           }
00779           else
00780               kernel->cache_multiple_kernel_rows(working2dnum, choosenum);
00781       }
00782 
00783       if(verbosity>=2) t2=get_runtime();
00784 
00785       if(retrain != 2) {
00786           optimize_svm(docs,label,inconsistent,0.0,chosen,active2dnum,
00787                        totdoc,working2dnum,choosenum,a,lin,c,
00788                        aicache,&qp,&epsilon_crit_org);
00789       }
00790 
00791       if(verbosity>=2) t3=get_runtime();
00792       update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
00793                               lin,aicache,c);
00794 
00795       if(verbosity>=2) t4=get_runtime();
00796       supvecnum=calculate_svm_model(docs,label,lin,a,a_old,c,working2dnum,active2dnum);
00797 
00798       if(verbosity>=2) t5=get_runtime();
00799 
00800       for (jj=0;(i=working2dnum[jj])>=0;jj++) {
00801           a_old[i]=a[i];
00802       }
00803 
00804       retrain=check_optimality(label,a,lin,c,totdoc,
00805                                maxdiff,epsilon_crit_org,&misclassified,
00806                                inconsistent,active2dnum,last_suboptimal_at,
00807                                iteration);
00808 
00809       if(verbosity>=2) {
00810           t6=get_runtime();
00811           timing_profile->time_select+=t1-t0;
00812           timing_profile->time_kernel+=t2-t1;
00813           timing_profile->time_opti+=t3-t2;
00814           timing_profile->time_update+=t4-t3;
00815           timing_profile->time_model+=t5-t4;
00816           timing_profile->time_check+=t6-t5;
00817       }
00818 
00819       /* checking whether optimizer got stuck */
00820       if((*maxdiff) < bestmaxdiff) {
00821           bestmaxdiff=(*maxdiff);
00822           bestmaxdiffiter=iteration;
00823       }
00824       if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
00825           /* int32_t time no progress? */
00826           terminate=1;
00827           retrain=0;
00828           SG_WARNING( "Relaxing KT-Conditions due to slow progress! Terminating!\n");
00829       }
00830 
00831       noshrink= (get_shrinking_enabled()) ? 0 : 1;
00832 
00833       if ((!callback) && (!retrain) && (inactivenum>0) &&
00834               ((!learn_parm->skip_final_opt_check) || (kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00835       {
00836           t1=get_runtime();
00837           SG_DEBUG( "reactivating inactive examples\n");
00838 
00839           reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
00840                                        iteration,inconsistent,
00841                                        docs,aicache,
00842                                        maxdiff);
00843           reactivated=true;
00844           SG_DEBUG("done reactivating inactive examples (maxdiff:%8f eps_crit:%8f orig_eps:%8f)\n", *maxdiff, learn_parm->epsilon_crit, epsilon_crit_org);
00845           /* Update to new active variables. */
00846           activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00847           inactivenum=totdoc-activenum;
00848           /* reset watchdog */
00849           bestmaxdiff=(*maxdiff);
00850           bestmaxdiffiter=iteration;
00851 
00852           /* termination criterion */
00853           noshrink=1;
00854           retrain=0;
00855           if((*maxdiff) > learn_parm->epsilon_crit)
00856           {
00857               SG_INFO( "restarting optimization as we are - due to shrinking - deviating too much (maxdiff(%f) > eps(%f))\n", *maxdiff, learn_parm->epsilon_crit);
00858               retrain=1;
00859           }
00860           timing_profile->time_shrink+=get_runtime()-t1;
00861           if (((verbosity>=1) && (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00862              || (verbosity>=2)) {
00863               SG_DONE();
00864               SG_DEBUG("Number of inactive variables = %ld\n", inactivenum);
00865           }
00866       }
00867 
00868       if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
00869           learn_parm->epsilon_crit=(*maxdiff);
00870       if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
00871           learn_parm->epsilon_crit/=4.0;
00872           retrain=1;
00873           noshrink=1;
00874       }
00875       if(learn_parm->epsilon_crit<epsilon_crit_org)
00876           learn_parm->epsilon_crit=epsilon_crit_org;
00877 
00878       if(verbosity>=2) {
00879           SG_INFO( " => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
00880                        supvecnum,model->at_upper_bound,(*maxdiff));
00881 
00882       }
00883       mymaxdiff=*maxdiff ;
00884 
00885       //don't shrink w/ mkl
00886       if (((iteration % 10) == 0) && (!noshrink) && !callback)
00887       {
00888           activenum=shrink_problem(shrink_state,active2dnum,last_suboptimal_at,iteration,totdoc,
00889                   CMath::max((int32_t)(activenum/10),
00890                       CMath::max((int32_t)(totdoc/500),(int32_t) 100)),
00891                   a,inconsistent, c, lin, label);
00892 
00893           inactivenum=totdoc-activenum;
00894 
00895           if (use_kernel_cache && (supvecnum>kernel->get_max_elems_cache())
00896                   && ((kernel->get_activenum_cache()-activenum)>CMath::max((int32_t)(activenum/10),(int32_t) 500))) {
00897 
00898               kernel->kernel_cache_shrink(totdoc, CMath::min((int32_t) (kernel->get_activenum_cache()-activenum),
00899                           (int32_t) (kernel->get_activenum_cache()-supvecnum)),
00900                       shrink_state->active);
00901           }
00902       }
00903 
00904       if (bestmaxdiff>worstmaxdiff)
00905           worstmaxdiff=bestmaxdiff;
00906 
00907       SG_ABS_PROGRESS(bestmaxdiff, -CMath::log10(bestmaxdiff), -CMath::log10(worstmaxdiff), -CMath::log10(epsilon), 6);
00908 
00909       /* Terminate loop */
00910       if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time) {
00911           terminate = 1;
00912           retrain = 0;
00913       }
00914   } /* end of loop */
00915 
00916   SG_DEBUG( "inactive:%d\n", inactivenum);
00917 
00918   if (inactivenum && !reactivated && !callback)
00919   {
00920       SG_DEBUG( "reactivating inactive examples\n");
00921       reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
00922               iteration,inconsistent,
00923               docs,aicache,
00924               maxdiff);
00925       SG_DEBUG( "done reactivating inactive examples\n");
00926       /* Update to new active variables. */
00927       activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00928       inactivenum=totdoc-activenum;
00929       /* reset watchdog */
00930       bestmaxdiff=(*maxdiff);
00931       bestmaxdiffiter=iteration;
00932   }
00933 
00934   criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,totdoc);
00935   CSVM::set_objective(criterion);
00936 
00937   delete[] chosen;
00938   delete[] last_suboptimal_at;
00939   delete[] key;
00940   delete[] selcrit;
00941   delete[] selexam;
00942   delete[] a_old;
00943   delete[] aicache;
00944   delete[] working2dnum;
00945   delete[] active2dnum;
00946   delete[] qp.opt_ce;
00947   delete[] qp.opt_ce0;
00948   delete[] qp.opt_g;
00949   delete[] qp.opt_g0;
00950   delete[] qp.opt_xinit;
00951   delete[] qp.opt_low;
00952   delete[] qp.opt_up;
00953 
00954   learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
00955 
00956   return(iteration);
00957 }
00958 
00959 float64_t CSVMLight::compute_objective_function(
00960     float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label,
00961     int32_t totdoc)
00962      /* Return value of objective function. */
00963      /* Works only relative to the active variables! */
00964 {
00965   /* calculate value of objective function */
00966   float64_t criterion=0;
00967 
00968   for (int32_t i=0;i<totdoc;i++)
00969       criterion=criterion+(eps[i]-(float64_t)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
00970 
00971   return(criterion);
00972 }
00973 
00974 
00975 void CSVMLight::clear_index(int32_t *index)
00976               /* initializes and empties index */
00977 {
00978   index[0]=-1;
00979 }
00980 
00981 void CSVMLight::add_to_index(int32_t *index, int32_t elem)
00982      /* initializes and empties index */
00983 {
00984   register int32_t i;
00985   for (i=0;index[i] != -1;i++);
00986   index[i]=elem;
00987   index[i+1]=-1;
00988 }
00989 
00990 int32_t CSVMLight::compute_index(
00991     int32_t *binfeature, int32_t range, int32_t *index)
00992      /* create an inverted index of binfeature */
00993 {
00994   register int32_t i,ii;
00995 
00996   ii=0;
00997   for (i=0;i<range;i++) {
00998     if(binfeature[i]) {
00999       index[ii]=i;
01000       ii++;
01001     }
01002   }
01003   for (i=0;i<4;i++) {
01004     index[ii+i]=-1;
01005   }
01006   return(ii);
01007 }
01008 
01009 
01010 void CSVMLight::optimize_svm(
01011     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01012     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc,
01013     int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin,
01014     float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target)
01015      /* Do optimization on the working set. */
01016 {
01017     int32_t i;
01018     float64_t *a_v;
01019 
01020     //compute_matrices_for_optimization_parallel(docs,label,
01021     //                                         exclude_from_eq_const,eq_target,chosen,
01022     //                                         active2dnum,working2dnum,a,lin,c,
01023     //                                         varnum,totdoc,aicache,qp);
01024 
01025     compute_matrices_for_optimization(docs,label,
01026                                       exclude_from_eq_const,eq_target,chosen,
01027                                       active2dnum,working2dnum,a,lin,c,
01028                                       varnum,totdoc,aicache,qp);
01029 
01030     if(verbosity>=3) {
01031      SG_DEBUG( "Running optimizer...");
01032     }
01033     /* call the qp-subsolver */
01034     a_v=optimize_qp(qp,epsilon_crit_target,
01035             learn_parm->svm_maxqpsize,
01036             &(model->b),                /* in case the optimizer gives us */
01037             learn_parm->svm_maxqpsize); /* the threshold for free. otherwise */
01038                                         /* b is calculated in calculate_model. */
01039     if(verbosity>=3) {
01040      SG_DONE();
01041     }
01042 
01043     for (i=0;i<varnum;i++)
01044       a[working2dnum[i]]=a_v[i];
01045 }
01046 
01047 void CSVMLight::compute_matrices_for_optimization_parallel(
01048     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01049     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
01050     float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
01051     float64_t *aicache, QP *qp)
01052 {
01053     if (parallel->get_num_threads()<=1)
01054     {
01055         compute_matrices_for_optimization(docs, label, exclude_from_eq_const, eq_target,
01056                                                    chosen, active2dnum, key, a, lin, c,
01057                                                    varnum, totdoc, aicache, qp) ;
01058     }
01059 #ifndef WIN32
01060     else
01061     {
01062         register int32_t ki,kj,i,j;
01063         register float64_t kernel_temp;
01064 
01065         qp->opt_n=varnum;
01066         qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
01067         for (j=1;j<model->sv_num;j++) { /* start at 1 */
01068             if((!chosen[model->supvec[j]])
01069                     && (!exclude_from_eq_const[(model->supvec[j])])) {
01070                 qp->opt_ce0[0]+=model->alpha[j];
01071             }
01072         }
01073         if(learn_parm->biased_hyperplane)
01074             qp->opt_m=1;
01075         else
01076             qp->opt_m=0;  /* eq-constraint will be ignored */
01077 
01078         /* init linear part of objective function */
01079         for (i=0;i<varnum;i++) {
01080             qp->opt_g0[i]=lin[key[i]];
01081         }
01082 
01083         ASSERT(parallel->get_num_threads()>1);
01084         int32_t *KI=new int32_t[varnum*varnum] ;
01085         int32_t *KJ=new int32_t[varnum*varnum] ;
01086         int32_t Knum=0 ;
01087         float64_t *Kval = new float64_t[varnum*(varnum+1)/2] ;
01088         for (i=0;i<varnum;i++) {
01089             ki=key[i];
01090             KI[Knum]=docs[ki] ;
01091             KJ[Knum]=docs[ki] ;
01092             Knum++ ;
01093             for (j=i+1;j<varnum;j++)
01094             {
01095                 kj=key[j];
01096                 KI[Knum]=docs[ki] ;
01097                 KJ[Knum]=docs[kj] ;
01098                 Knum++ ;
01099             }
01100         }
01101         ASSERT(Knum<=varnum*(varnum+1)/2);
01102 
01103         pthread_t* threads = new pthread_t[parallel->get_num_threads()-1];
01104         S_THREAD_PARAM_KERNEL* params = new S_THREAD_PARAM_KERNEL[parallel->get_num_threads()-1];
01105         int32_t step= Knum/parallel->get_num_threads();
01106         //SG_DEBUG( "\nkernel-step size: %i\n", step) ;
01107         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01108         {
01109             params[t].svmlight = this;
01110             params[t].start = t*step;
01111             params[t].end = (t+1)*step;
01112             params[t].KI=KI ;
01113             params[t].KJ=KJ ;
01114             params[t].Kval=Kval ;
01115             pthread_create(&threads[t], NULL, CSVMLight::compute_kernel_helper, (void*)&params[t]);
01116         }
01117         for (i=params[parallel->get_num_threads()-2].end; i<Knum; i++)
01118             Kval[i]=compute_kernel(KI[i],KJ[i]) ;
01119 
01120         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01121             pthread_join(threads[t], NULL);
01122 
01123         delete[] params;
01124         delete[] threads;
01125 
01126         Knum=0 ;
01127         for (i=0;i<varnum;i++) {
01128             ki=key[i];
01129 
01130             /* Compute the matrix for equality constraints */
01131             qp->opt_ce[i]=label[ki];
01132             qp->opt_low[i]=0;
01133             qp->opt_up[i]=learn_parm->svm_cost[ki];
01134 
01135             kernel_temp=Kval[Knum] ;
01136             Knum++ ;
01137             /* compute linear part of objective function */
01138             qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01139             /* compute quadratic part of objective function */
01140             qp->opt_g[varnum*i+i]=kernel_temp;
01141 
01142             for (j=i+1;j<varnum;j++) {
01143                 kj=key[j];
01144                 kernel_temp=Kval[Knum] ;
01145                 Knum++ ;
01146                 /* compute linear part of objective function */
01147                 qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
01148                 qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01149                 /* compute quadratic part of objective function */
01150                 qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01151                 qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01152             }
01153 
01154             if(verbosity>=3) {
01155                 if(i % 20 == 0) {
01156                     SG_DEBUG( "%ld..",i);
01157                 }
01158             }
01159         }
01160 
01161         delete[] KI ;
01162         delete[] KJ ;
01163         delete[] Kval ;
01164 
01165         for (i=0;i<varnum;i++) {
01166             /* assure starting at feasible point */
01167             qp->opt_xinit[i]=a[key[i]];
01168             /* set linear part of objective function */
01169             qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(float64_t)label[key[i]];
01170         }
01171 
01172         if(verbosity>=3) {
01173             SG_DONE();
01174         }
01175     }
01176 #endif
01177 }
01178 
01179 void CSVMLight::compute_matrices_for_optimization(
01180     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01181     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
01182     float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
01183     float64_t *aicache, QP *qp)
01184 {
01185   register int32_t ki,kj,i,j;
01186   register float64_t kernel_temp;
01187 
01188   qp->opt_n=varnum;
01189   qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
01190   for (j=1;j<model->sv_num;j++) { /* start at 1 */
01191     if((!chosen[model->supvec[j]])
01192        && (!exclude_from_eq_const[(model->supvec[j])])) {
01193       qp->opt_ce0[0]+=model->alpha[j];
01194     }
01195   }
01196   if(learn_parm->biased_hyperplane)
01197     qp->opt_m=1;
01198   else
01199     qp->opt_m=0;  /* eq-constraint will be ignored */
01200 
01201   /* init linear part of objective function */
01202   for (i=0;i<varnum;i++) {
01203     qp->opt_g0[i]=lin[key[i]];
01204   }
01205 
01206   for (i=0;i<varnum;i++) {
01207       ki=key[i];
01208 
01209       /* Compute the matrix for equality constraints */
01210       qp->opt_ce[i]=label[ki];
01211       qp->opt_low[i]=0;
01212       qp->opt_up[i]=learn_parm->svm_cost[ki];
01213 
01214       kernel_temp=compute_kernel(docs[ki], docs[ki]);
01215       /* compute linear part of objective function */
01216       qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01217       /* compute quadratic part of objective function */
01218       qp->opt_g[varnum*i+i]=kernel_temp;
01219 
01220       for (j=i+1;j<varnum;j++) {
01221           kj=key[j];
01222           kernel_temp=compute_kernel(docs[ki], docs[kj]);
01223 
01224           /* compute linear part of objective function */
01225           qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
01226           qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01227           /* compute quadratic part of objective function */
01228           qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01229           qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01230       }
01231 
01232       if(verbosity>=3) {
01233           if(i % 20 == 0) {
01234               SG_DEBUG( "%ld..",i);
01235           }
01236       }
01237   }
01238 
01239   for (i=0;i<varnum;i++) {
01240       /* assure starting at feasible point */
01241       qp->opt_xinit[i]=a[key[i]];
01242       /* set linear part of objective function */
01243       qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]]) + qp->opt_g0[i]*(float64_t)label[key[i]];
01244   }
01245 
01246   if(verbosity>=3) {
01247       SG_DONE();
01248   }
01249 }
01250 
01251 
01252 int32_t CSVMLight::calculate_svm_model(
01253     int32_t* docs, int32_t *label, float64_t *lin, float64_t *a,
01254     float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum)
01255      /* Compute decision function based on current values */
01256      /* of alpha. */
01257 {
01258   int32_t i,ii,pos,b_calculated=0,first_low,first_high;
01259   float64_t ex_c,b_temp,b_low,b_high;
01260 
01261   if(verbosity>=3) {
01262    SG_DEBUG( "Calculating model...");
01263   }
01264 
01265   if(!learn_parm->biased_hyperplane) {
01266     model->b=0;
01267     b_calculated=1;
01268   }
01269 
01270   for (ii=0;(i=working2dnum[ii])>=0;ii++) {
01271     if((a_old[i]>0) && (a[i]==0)) { /* remove from model */
01272       pos=model->index[i];
01273       model->index[i]=-1;
01274       (model->sv_num)--;
01275       model->supvec[pos]=model->supvec[model->sv_num];
01276       model->alpha[pos]=model->alpha[model->sv_num];
01277       model->index[model->supvec[pos]]=pos;
01278     }
01279     else if((a_old[i]==0) && (a[i]>0)) { /* add to model */
01280       model->supvec[model->sv_num]=docs[i];
01281       model->alpha[model->sv_num]=a[i]*(float64_t)label[i];
01282       model->index[i]=model->sv_num;
01283       (model->sv_num)++;
01284     }
01285     else if(a_old[i]==a[i]) { /* nothing to do */
01286     }
01287     else {  /* just update alpha */
01288       model->alpha[model->index[i]]=a[i]*(float64_t)label[i];
01289     }
01290 
01291     ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01292     if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
01293       (model->at_upper_bound)--;
01294     }
01295     else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
01296       (model->at_upper_bound)++;
01297     }
01298 
01299     if((!b_calculated)
01300        && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) {   /* calculate b */
01301         model->b=((float64_t)label[i]*learn_parm->eps[i]-c[i]+lin[i]);
01302     b_calculated=1;
01303     }
01304   }
01305 
01306   /* No alpha in the working set not at bounds, so b was not
01307      calculated in the usual way. The following handles this special
01308      case. */
01309   if(learn_parm->biased_hyperplane
01310      && (!b_calculated)
01311      && (model->sv_num-1 == model->at_upper_bound)) {
01312     first_low=1;
01313     first_high=1;
01314     b_low=0;
01315     b_high=0;
01316     for (ii=0;(i=active2dnum[ii])>=0;ii++) {
01317       ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01318       if(a[i]<ex_c) {
01319     if(label[i]>0)  {
01320       b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
01321       if((b_temp>b_low) || (first_low)) {
01322         b_low=b_temp;
01323         first_low=0;
01324       }
01325     }
01326     else {
01327       b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
01328       if((b_temp<b_high) || (first_high)) {
01329         b_high=b_temp;
01330         first_high=0;
01331       }
01332     }
01333       }
01334       else {
01335     if(label[i]<0)  {
01336       b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
01337       if((b_temp>b_low) || (first_low)) {
01338         b_low=b_temp;
01339         first_low=0;
01340       }
01341     }
01342     else {
01343       b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
01344       if((b_temp<b_high) || (first_high)) {
01345         b_high=b_temp;
01346         first_high=0;
01347       }
01348     }
01349       }
01350     }
01351     if(first_high) {
01352       model->b=-b_low;
01353     }
01354     else if(first_low) {
01355       model->b=-b_high;
01356     }
01357     else {
01358       model->b=-(b_high+b_low)/2.0;  /* select b as the middle of range */
01359     }
01360   }
01361 
01362   if(verbosity>=3) {
01363    SG_DONE();
01364   }
01365 
01366   return(model->sv_num-1); /* have to substract one, since element 0 is empty*/
01367 }
01368 
01369 int32_t CSVMLight::check_optimality(
01370     int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01371     float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified,
01372     int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at,
01373     int32_t iteration)
01374      /* Check KT-conditions */
01375 {
01376   int32_t i,ii,retrain;
01377   float64_t dist,ex_c,target;
01378 
01379   if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
01380       learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
01381   else
01382       learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
01383   retrain=0;
01384   (*maxdiff)=0;
01385   (*misclassified)=0;
01386   for (ii=0;(i=active2dnum[ii])>=0;ii++) {
01387       if((!inconsistent[i]) && label[i]) {
01388           dist=(lin[i]-model->b)*(float64_t)label[i];/* 'distance' from
01389                                                      hyperplane*/
01390           target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
01391           ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01392           if(dist <= 0) {
01393               (*misclassified)++;  /* does not work due to deactivation of var */
01394           }
01395           if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
01396               if((dist-target)>(*maxdiff))  /* largest violation */
01397                   (*maxdiff)=dist-target;
01398           }
01399           else if((a[i]<ex_c) && (dist < target)) {
01400               if((target-dist)>(*maxdiff))  /* largest violation */
01401                   (*maxdiff)=target-dist;
01402           }
01403           /* Count how int32_t a variable was at lower/upper bound (and optimal).*/
01404           /* Variables, which were at the bound and optimal for a int32_t */
01405           /* time are unlikely to become support vectors. In case our */
01406           /* cache is filled up, those variables are excluded to save */
01407           /* kernel evaluations. (See chapter 'Shrinking').*/
01408           if((a[i]>(learn_parm->epsilon_a))
01409              && (a[i]<ex_c)) {
01410               last_suboptimal_at[i]=iteration;         /* not at bound */
01411           }
01412           else if((a[i]<=(learn_parm->epsilon_a))
01413                   && (dist < (target+learn_parm->epsilon_shrink))) {
01414               last_suboptimal_at[i]=iteration;         /* not likely optimal */
01415           }
01416           else if((a[i]>=ex_c)
01417                   && (dist > (target-learn_parm->epsilon_shrink)))  {
01418               last_suboptimal_at[i]=iteration;         /* not likely optimal */
01419           }
01420       }
01421   }
01422 
01423   /* termination criterion */
01424   if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
01425       retrain=1;
01426   }
01427   return(retrain);
01428 }
01429 
01430 void CSVMLight::update_linear_component(
01431     int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01432     float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01433     float64_t *aicache, float64_t* c)
01434      /* keep track of the linear component */
01435      /* lin of the gradient etc. by updating */
01436      /* based on the change of the variables */
01437      /* in the current working set */
01438 {
01439     register int32_t i=0,ii=0,j=0,jj=0;
01440 
01441     if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
01442     {
01443         if (callback)
01444         {
01445             update_linear_component_mkl_linadd(docs, label, active2dnum, a,
01446                     a_old, working2dnum, totdoc, lin, aicache);
01447         }
01448         else
01449         {
01450             kernel->clear_normal();
01451 
01452             int32_t num_working=0;
01453             for (ii=0;(i=working2dnum[ii])>=0;ii++) {
01454                 if(a[i] != a_old[i]) {
01455                     kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
01456                     num_working++;
01457                 }
01458             }
01459 
01460             if (num_working>0)
01461             {
01462                 if (parallel->get_num_threads() < 2)
01463                 {
01464                     for (jj=0;(j=active2dnum[jj])>=0;jj++) {
01465                         lin[j]+=kernel->compute_optimized(docs[j]);
01466                     }
01467                 }
01468 #ifndef WIN32
01469                 else
01470                 {
01471                     int32_t num_elem = 0 ;
01472                     for (jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ;
01473 
01474                     pthread_t* threads = new pthread_t[parallel->get_num_threads()-1] ;
01475                     S_THREAD_PARAM* params = new S_THREAD_PARAM[parallel->get_num_threads()-1] ;
01476                     int32_t start = 0 ;
01477                     int32_t step = num_elem/parallel->get_num_threads();
01478                     int32_t end = step ;
01479 
01480                     for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01481                     {
01482                         params[t].kernel = kernel ;
01483                         params[t].lin = lin ;
01484                         params[t].docs = docs ;
01485                         params[t].active2dnum=active2dnum ;
01486                         params[t].start = start ;
01487                         params[t].end = end ;
01488                         start=end ;
01489                         end+=step ;
01490                         pthread_create(&threads[t], NULL, update_linear_component_linadd_helper, (void*)&params[t]) ;
01491                     }
01492 
01493                     for (jj=params[parallel->get_num_threads()-2].end;(j=active2dnum[jj])>=0;jj++) {
01494                         lin[j]+=kernel->compute_optimized(docs[j]);
01495                     }
01496                     void* ret;
01497                     for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01498                         pthread_join(threads[t], &ret) ;
01499 
01500                     delete[] params;
01501                     delete[] threads;
01502                 }
01503 #endif
01504             }
01505         }
01506     }
01507     else
01508     {
01509         if (callback)
01510         {
01511             update_linear_component_mkl(docs, label, active2dnum,
01512                     a, a_old, working2dnum, totdoc, lin, aicache);
01513         }
01514         else {
01515             for (jj=0;(i=working2dnum[jj])>=0;jj++) {
01516                 if(a[i] != a_old[i]) {
01517                     kernel->get_kernel_row(i,active2dnum,aicache);
01518                     for (ii=0;(j=active2dnum[ii])>=0;ii++)
01519                         lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
01520                 }
01521             }
01522         }
01523     }
01524 }
01525 
01526 
01527 void CSVMLight::update_linear_component_mkl(
01528     int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01529     float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01530     float64_t *aicache)
01531 {
01532     //int inner_iters=0;
01533     int32_t num = kernel->get_num_vec_rhs();
01534     int32_t num_weights = -1;
01535     int32_t num_kernels = kernel->get_num_subkernels() ;
01536     const float64_t* old_beta   = kernel->get_subkernel_weights(num_weights);
01537     ASSERT(num_weights==num_kernels);
01538 
01539     if ((kernel->get_kernel_type()==K_COMBINED) &&
01540              (!((CCombinedKernel*)kernel)->get_append_subkernel_weights()))// for combined kernel
01541     {
01542         CCombinedKernel* k      = (CCombinedKernel*) kernel;
01543         CKernel* kn = k->get_first_kernel() ;
01544         int32_t n = 0, i, j ;
01545 
01546         while (kn!=NULL)
01547         {
01548             for (i=0;i<num;i++)
01549             {
01550                 if(a[i] != a_old[i])
01551                 {
01552                     kn->get_kernel_row(i,NULL,aicache, true);
01553                     for (j=0;j<num;j++)
01554                         W[j*num_kernels+n]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
01555                 }
01556             }
01557 
01558             SG_UNREF(kn);
01559             kn = k->get_next_kernel();
01560             n++ ;
01561         }
01562     }
01563     else // hope the kernel is fast ...
01564     {
01565         float64_t* w_backup = new float64_t[num_kernels] ;
01566         float64_t* w1 = new float64_t[num_kernels] ;
01567 
01568         // backup and set to zero
01569         for (int32_t i=0; i<num_kernels; i++)
01570         {
01571             w_backup[i] = old_beta[i] ;
01572             w1[i]=0.0 ;
01573         }
01574         for (int32_t n=0; n<num_kernels; n++)
01575         {
01576             w1[n]=1.0 ;
01577             kernel->set_subkernel_weights(w1, num_weights) ;
01578 
01579             for (int32_t i=0;i<num;i++)
01580             {
01581                 if(a[i] != a_old[i])
01582                 {
01583                     for (int32_t j=0;j<num;j++)
01584                         W[j*num_kernels+n]+=(a[i]-a_old[i])*compute_kernel(i,j)*(float64_t)label[i];
01585                 }
01586             }
01587             w1[n]=0.0 ;
01588         }
01589 
01590         // restore old weights
01591         kernel->set_subkernel_weights(w_backup,num_weights) ;
01592 
01593         delete[] w_backup ;
01594         delete[] w1 ;
01595     }
01596 
01597     call_mkl_callback(a, label, lin);
01598 }
01599 
01600 
01601 void CSVMLight::update_linear_component_mkl_linadd(
01602     int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01603     float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01604     float64_t *aicache)
01605 {
01606     //int inner_iters=0;
01607 
01608     // kernel with LP_LINADD property is assumed to have
01609     // compute_by_subkernel functions
01610     int32_t num = kernel->get_num_vec_rhs();
01611     int32_t num_weights = -1;
01612     int32_t num_kernels = kernel->get_num_subkernels() ;
01613     const float64_t* old_beta   = kernel->get_subkernel_weights(num_weights);
01614     ASSERT(num_weights==num_kernels);
01615 
01616     float64_t* w_backup = new float64_t[num_kernels] ;
01617     float64_t* w1 = new float64_t[num_kernels] ;
01618 
01619     // backup and set to one
01620     for (int32_t i=0; i<num_kernels; i++)
01621     {
01622         w_backup[i] = old_beta[i] ;
01623         w1[i]=1.0 ;
01624     }
01625     // set the kernel weights
01626     kernel->set_subkernel_weights(w1, num_weights) ;
01627 
01628     // create normal update (with changed alphas only)
01629     kernel->clear_normal();
01630     for (int32_t ii=0, i=0;(i=working2dnum[ii])>=0;ii++) {
01631         if(a[i] != a_old[i]) {
01632             kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
01633         }
01634     }
01635 
01636     if (parallel->get_num_threads() < 2)
01637     {
01638         // determine contributions of different kernels
01639         for (int32_t i=0; i<num; i++)
01640             kernel->compute_by_subkernel(i,&W[i*num_kernels]);
01641     }
01642 #ifndef WIN32
01643     else
01644     {
01645         pthread_t* threads = new pthread_t[parallel->get_num_threads()-1];
01646         S_THREAD_PARAM* params = new S_THREAD_PARAM[parallel->get_num_threads()-1];
01647         int32_t step= num/parallel->get_num_threads();
01648 
01649         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01650         {
01651             params[t].kernel = kernel;
01652             params[t].W = W;
01653             params[t].start = t*step;
01654             params[t].end = (t+1)*step;
01655             pthread_create(&threads[t], NULL, CSVMLight::update_linear_component_mkl_linadd_helper, (void*)&params[t]);
01656         }
01657 
01658         for (int32_t i=params[parallel->get_num_threads()-2].end; i<num; i++)
01659             kernel->compute_by_subkernel(i,&W[i*num_kernels]);
01660 
01661         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01662             pthread_join(threads[t], NULL);
01663 
01664         delete[] params;
01665         delete[] threads;
01666     }
01667 #endif
01668 
01669     // restore old weights
01670     kernel->set_subkernel_weights(w_backup,num_weights);
01671 
01672     delete[] w_backup;
01673     delete[] w1;
01674 
01675     call_mkl_callback(a, label, lin);
01676 }
01677 
01678 void* CSVMLight::update_linear_component_mkl_linadd_helper(void* p)
01679 {
01680     S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
01681 
01682     int32_t num_kernels=params->kernel->get_num_subkernels();
01683 
01684     // determine contributions of different kernels
01685     for (int32_t i=params->start; i<params->end; i++)
01686         params->kernel->compute_by_subkernel(i,&(params->W[i*num_kernels]));
01687 
01688     return NULL ;
01689 }
01690 
01691 void CSVMLight::call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin)
01692 {
01693     int32_t num = kernel->get_num_vec_rhs();
01694     int32_t num_kernels = kernel->get_num_subkernels() ;
01695 
01696     int nk = (int) num_kernels; /* calling external lib */
01697 
01698     float64_t suma=0;
01699     float64_t* sumw=new float64_t[num_kernels];
01700 #ifdef HAVE_LAPACK
01701     double* alphay  = new double[num];
01702 
01703     for (int32_t i=0; i<num; i++)
01704     {
01705         alphay[i]=a[i]*label[i];
01706         suma+=a[i];
01707     }
01708 
01709     for (int32_t i=0; i<num_kernels; i++)
01710         sumw[i]=0;
01711 
01712     cblas_dgemv(CblasColMajor, CblasNoTrans, num_kernels, (int) num, 0.5, (double*) W,
01713             num_kernels, alphay, 1, 1.0, (double*) sumw, 1);
01714 
01715     delete[] alphay;
01716 #else
01717     for (int32_t i=0; i<num; i++)
01718         suma += a[i];
01719 
01720     for (int32_t d=0; d<num_kernels; d++)
01721     {
01722         sumw[d]=0;
01723         for (int32_t i=0; i<num; i++)
01724             sumw[d] += a[i]*(0.5*label[i]*W[i*num_kernels+d]);
01725     }
01726 #endif
01727 
01728     if (callback)
01729         mkl_converged=callback(mkl, sumw, suma);
01730 
01731 
01732     const float64_t* new_beta   = kernel->get_subkernel_weights(num_kernels);
01733 
01734     // update lin
01735 #ifdef HAVE_LAPACK
01736     cblas_dgemv(CblasColMajor, CblasTrans, nk, (int) num, 1.0, (double*) W,
01737         nk, (double*) new_beta, 1, 0.0, (double*) lin, 1);
01738 #else
01739     for (int32_t i=0; i<num; i++)
01740         lin[i]=0 ;
01741     for (int32_t d=0; d<num_kernels; d++)
01742         if (new_beta[d]!=0)
01743             for (int32_t i=0; i<num; i++)
01744                 lin[i] += new_beta[d]*W[i*num_kernels+d] ;
01745 #endif
01746 
01747     delete[] sumw;
01748 }
01749 
01750 
01751 /*************************** Working set selection ***************************/
01752 
01753 int32_t CSVMLight::select_next_qp_subproblem_grad(
01754     int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01755     int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
01756     int32_t *working2dnum, float64_t *selcrit, int32_t *select,
01757     int32_t cache_only, int32_t *key, int32_t *chosen)
01758     /* Use the feasible direction approach to select the next
01759        qp-subproblem (see chapter 'Selecting a good working set'). If
01760        'cache_only' is true, then the variables are selected only among
01761        those for which the kernel evaluations are cached. */
01762 {
01763     int32_t choosenum,i,j,k,activedoc,inum,valid;
01764     float64_t s;
01765 
01766     for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
01767     choosenum=0;
01768     activedoc=0;
01769     for (i=0;(j=active2dnum[i])>=0;i++) {
01770         s=-label[j];
01771         if(cache_only)
01772         {
01773             if (use_kernel_cache)
01774                 valid=(kernel->kernel_cache_check(j));
01775             else
01776                 valid = 1 ;
01777         }
01778         else
01779             valid=1;
01780         if(valid
01781            && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01782            && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01783                  && (s>0)))
01784            && (!chosen[j])
01785            && (label[j])
01786            && (!inconsistent[j]))
01787         {
01788             selcrit[activedoc]=(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
01789             key[activedoc]=j;
01790             activedoc++;
01791         }
01792     }
01793     select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01794     for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
01795         i=key[select[k]];
01796         chosen[i]=1;
01797         working2dnum[inum+choosenum]=i;
01798         choosenum+=1;
01799         if (use_kernel_cache)
01800             kernel->kernel_cache_touch(i);
01801         /* make sure it does not get kicked */
01802         /* out of cache */
01803     }
01804 
01805     activedoc=0;
01806     for (i=0;(j=active2dnum[i])>=0;i++) {
01807         s=label[j];
01808         if(cache_only)
01809         {
01810             if (use_kernel_cache)
01811                 valid=(kernel->kernel_cache_check(j));
01812             else
01813                 valid = 1 ;
01814         }
01815         else
01816             valid=1;
01817         if(valid
01818            && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01819            && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01820                  && (s>0)))
01821            && (!chosen[j])
01822            && (label[j])
01823            && (!inconsistent[j]))
01824         {
01825             selcrit[activedoc]=-(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
01826             /*  selcrit[activedoc]=-(float64_t)(label[j]*(-1.0+(float64_t)label[j]*lin[j])); */
01827             key[activedoc]=j;
01828             activedoc++;
01829         }
01830     }
01831     select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01832     for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
01833         i=key[select[k]];
01834         chosen[i]=1;
01835         working2dnum[inum+choosenum]=i;
01836         choosenum+=1;
01837         if (use_kernel_cache)
01838             kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
01839         /* out of cache */
01840     }
01841     working2dnum[inum+choosenum]=-1; /* complete index */
01842     return(choosenum);
01843 }
01844 
01845 int32_t CSVMLight::select_next_qp_subproblem_rand(
01846     int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01847     int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
01848     int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key,
01849     int32_t *chosen, int32_t iteration)
01850 /* Use the feasible direction approach to select the next
01851    qp-subproblem (see section 'Selecting a good working set'). Chooses
01852    a feasible direction at (pseudo) random to help jump over numerical
01853    problem. */
01854 {
01855   int32_t choosenum,i,j,k,activedoc,inum;
01856   float64_t s;
01857 
01858   for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
01859   choosenum=0;
01860   activedoc=0;
01861   for (i=0;(j=active2dnum[i])>=0;i++) {
01862     s=-label[j];
01863     if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01864        && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01865          && (s>0)))
01866        && (!inconsistent[j])
01867        && (label[j])
01868        && (!chosen[j])) {
01869       selcrit[activedoc]=(j+iteration) % totdoc;
01870       key[activedoc]=j;
01871       activedoc++;
01872     }
01873   }
01874   select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01875   for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
01876     i=key[select[k]];
01877     chosen[i]=1;
01878     working2dnum[inum+choosenum]=i;
01879     choosenum+=1;
01880     if (use_kernel_cache)
01881         kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
01882                                         /* out of cache */
01883   }
01884 
01885   activedoc=0;
01886   for (i=0;(j=active2dnum[i])>=0;i++) {
01887     s=label[j];
01888     if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01889        && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01890          && (s>0)))
01891        && (!inconsistent[j])
01892        && (label[j])
01893        && (!chosen[j])) {
01894       selcrit[activedoc]=(j+iteration) % totdoc;
01895       key[activedoc]=j;
01896       activedoc++;
01897     }
01898   }
01899   select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01900   for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
01901     i=key[select[k]];
01902     chosen[i]=1;
01903     working2dnum[inum+choosenum]=i;
01904     choosenum+=1;
01905     if (use_kernel_cache)
01906         kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
01907                                         /* out of cache */
01908   }
01909   working2dnum[inum+choosenum]=-1; /* complete index */
01910   return(choosenum);
01911 }
01912 
01913 
01914 
01915 void CSVMLight::select_top_n(
01916     float64_t *selcrit, int32_t range, int32_t* select, int32_t n)
01917 {
01918   register int32_t i,j;
01919 
01920   for (i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */
01921     for (j=i;j>=0;j--) {
01922       if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
01923     select[j]=select[j-1];
01924       }
01925       else {
01926     select[j]=i;
01927     j=-1;
01928       }
01929     }
01930   }
01931   if(n>0) {
01932     for (i=n;i<range;i++) {
01933       if(selcrit[i]>selcrit[select[n-1]]) {
01934     for (j=n-1;j>=0;j--) {
01935       if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
01936         select[j]=select[j-1];
01937       }
01938       else {
01939         select[j]=i;
01940         j=-1;
01941       }
01942     }
01943       }
01944     }
01945   }
01946 }
01947 
01948 
01949 /******************************** Shrinking  *********************************/
01950 
01951 void CSVMLight::init_shrink_state(
01952     SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory)
01953 {
01954   int32_t i;
01955 
01956   shrink_state->deactnum=0;
01957   shrink_state->active = new int32_t[totdoc];
01958   shrink_state->inactive_since = new int32_t[totdoc];
01959   shrink_state->a_history = new float64_t*[maxhistory];
01960   shrink_state->maxhistory=maxhistory;
01961   shrink_state->last_lin = new float64_t[totdoc];
01962   shrink_state->last_a = new float64_t[totdoc];
01963 
01964   for (i=0;i<totdoc;i++) {
01965     shrink_state->active[i]=1;
01966     shrink_state->inactive_since[i]=0;
01967     shrink_state->last_a[i]=0;
01968     shrink_state->last_lin[i]=0;
01969   }
01970 }
01971 
01972 void CSVMLight::shrink_state_cleanup(SHRINK_STATE *shrink_state)
01973 {
01974   delete[] shrink_state->active;
01975   delete[] shrink_state->inactive_since;
01976   if(shrink_state->deactnum > 0)
01977     delete[] (shrink_state->a_history[shrink_state->deactnum-1]);
01978   delete[] (shrink_state->a_history);
01979   delete[] (shrink_state->last_a);
01980   delete[] (shrink_state->last_lin);
01981 }
01982 
01983 int32_t CSVMLight::shrink_problem(
01984     SHRINK_STATE *shrink_state, int32_t *active2dnum,
01985     int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc,
01986     int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t* c,
01987     float64_t* lin, int* label)
01988      /* Shrink some variables away.  Do the shrinking only if at least
01989         minshrink variables can be removed. */
01990 {
01991   int32_t i,ii,change,activenum,lastiter;
01992   float64_t *a_old=NULL;
01993 
01994   activenum=0;
01995   change=0;
01996   for (ii=0;active2dnum[ii]>=0;ii++) {
01997       i=active2dnum[ii];
01998       activenum++;
01999       lastiter=last_suboptimal_at[i];
02000 
02001       if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
02002          || (inconsistent[i])) {
02003           change++;
02004       }
02005   }
02006   if((change>=minshrink) /* shrink only if sufficiently many candidates */
02007      && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */
02008       /* Shrink problem by removing those variables which are */
02009       /* optimal at a bound for a minimum number of iterations */
02010       if(verbosity>=2) {
02011           SG_INFO( " Shrinking...");
02012       }
02013 
02014       if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /*  non-linear case save alphas */
02015 
02016           a_old=new float64_t[totdoc];
02017           shrink_state->a_history[shrink_state->deactnum]=a_old;
02018           for (i=0;i<totdoc;i++) {
02019               a_old[i]=a[i];
02020           }
02021       }
02022       for (ii=0;active2dnum[ii]>=0;ii++) {
02023           i=active2dnum[ii];
02024           lastiter=last_suboptimal_at[i];
02025           if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
02026              || (inconsistent[i])) {
02027               shrink_state->active[i]=0;
02028               shrink_state->inactive_since[i]=shrink_state->deactnum;
02029           }
02030       }
02031       activenum=compute_index(shrink_state->active,totdoc,active2dnum);
02032       shrink_state->deactnum++;
02033       if(kernel->has_property(KP_LINADD) && get_linadd_enabled())
02034           shrink_state->deactnum=0;
02035 
02036       if(verbosity>=2) {
02037           SG_DONE();
02038           SG_DEBUG("Number of inactive variables = %ld\n", totdoc-activenum);
02039       }
02040   }
02041   return(activenum);
02042 }
02043 
02044 void* CSVMLight::reactivate_inactive_examples_linadd_helper(void* p)
02045 {
02046     S_THREAD_PARAM_REACTIVATE_LINADD* params = (S_THREAD_PARAM_REACTIVATE_LINADD*) p;
02047 
02048     CKernel* k = params->kernel;
02049     float64_t* lin = params->lin;
02050     float64_t* last_lin = params->last_lin;
02051     int32_t* active = params->active;
02052     int32_t* docs = params->docs;
02053     int32_t start = params->start;
02054     int32_t end = params->end;
02055 
02056     for (int32_t i=start;i<end;i++)
02057     {
02058         if (!active[i])
02059             lin[i] = last_lin[i]+k->compute_optimized(docs[i]);
02060 
02061         last_lin[i]=lin[i];
02062     }
02063 
02064     return NULL;
02065 }
02066 
02067 void* CSVMLight::reactivate_inactive_examples_vanilla_helper(void* p)
02068 {
02069     S_THREAD_PARAM_REACTIVATE_VANILLA* params = (S_THREAD_PARAM_REACTIVATE_VANILLA*) p;
02070     ASSERT(params);
02071     ASSERT(params->kernel);
02072     ASSERT(params->lin);
02073     ASSERT(params->aicache);
02074     ASSERT(params->a);
02075     ASSERT(params->a_old);
02076     ASSERT(params->changed2dnum);
02077     ASSERT(params->inactive2dnum);
02078     ASSERT(params->label);
02079 
02080     CKernel* k = params->kernel;
02081     float64_t* lin = params->lin;
02082     float64_t* aicache = params->aicache;
02083     float64_t* a= params->a;
02084     float64_t* a_old = params->a_old;
02085     int32_t* changed2dnum = params->changed2dnum;
02086     int32_t* inactive2dnum = params->inactive2dnum;
02087     int32_t* label = params->label;
02088     int32_t start = params->start;
02089     int32_t end = params->end;
02090 
02091     for (int32_t ii=start;ii<end;ii++)
02092     {
02093         int32_t i=changed2dnum[ii];
02094         int32_t j=0;
02095         ASSERT(i>=0);
02096 
02097         k->get_kernel_row(i,inactive2dnum,aicache);
02098         for (int32_t jj=0;(j=inactive2dnum[jj])>=0;jj++)
02099             lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
02100     }
02101     return NULL;
02102 }
02103 
02104 void CSVMLight::reactivate_inactive_examples(
02105     int32_t* label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin,
02106     float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent,
02107     int32_t* docs, float64_t *aicache, float64_t *maxdiff)
02108      /* Make all variables active again which had been removed by
02109         shrinking. */
02110      /* Computes lin for those variables from scratch. */
02111 {
02112   register int32_t i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
02113   int32_t *changed,*inactive;
02114   register float64_t *a_old,dist;
02115   float64_t ex_c,target;
02116 
02117   if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) { /* special linear case */
02118       a_old=shrink_state->last_a;
02119 
02120       if (!use_batch_computation || !kernel->has_property(KP_BATCHEVALUATION))
02121       {
02122           SG_DEBUG( " clear normal - linadd\n");
02123           kernel->clear_normal();
02124 
02125           int32_t num_modified=0;
02126           for (i=0;i<totdoc;i++) {
02127               if(a[i] != a_old[i]) {
02128                   kernel->add_to_normal(docs[i], ((a[i]-a_old[i])*(float64_t)label[i]));
02129                   a_old[i]=a[i];
02130                   num_modified++;
02131               }
02132           }
02133 
02134           if (num_modified>0)
02135           {
02136               int32_t num_threads=parallel->get_num_threads();
02137               ASSERT(num_threads>0);
02138               if (num_threads < 2)
02139               {
02140                   S_THREAD_PARAM_REACTIVATE_LINADD params;
02141                   params.kernel=kernel;
02142                   params.lin=lin;
02143                   params.last_lin=shrink_state->last_lin;
02144                   params.docs=docs;
02145                   params.active=shrink_state->active;
02146                   params.start=0;
02147                   params.end=totdoc;
02148                   reactivate_inactive_examples_linadd_helper((void*) &params);
02149               }
02150 #ifndef WIN32
02151               else
02152               {
02153                   pthread_t* threads = new pthread_t[num_threads-1];
02154                   S_THREAD_PARAM_REACTIVATE_LINADD* params = new S_THREAD_PARAM_REACTIVATE_LINADD[num_threads];
02155                   int32_t step= totdoc/num_threads;
02156 
02157                   for (t=0; t<num_threads-1; t++)
02158                   {
02159                       params[t].kernel=kernel;
02160                       params[t].lin=lin;
02161                       params[t].last_lin=shrink_state->last_lin;
02162                       params[t].docs=docs;
02163                       params[t].active=shrink_state->active;
02164                       params[t].start = t*step;
02165                       params[t].end = (t+1)*step;
02166                       pthread_create(&threads[t], NULL, CSVMLight::reactivate_inactive_examples_linadd_helper, (void*)&params[t]);
02167                   }
02168 
02169                   params[t].kernel=kernel;
02170                   params[t].lin=lin;
02171                   params[t].last_lin=shrink_state->last_lin;
02172                   params[t].docs=docs;
02173                   params[t].active=shrink_state->active;
02174                   params[t].start = t*step;
02175                   params[t].end = totdoc;
02176                   reactivate_inactive_examples_linadd_helper((void*) &params[t]);
02177 
02178                   for (t=0; t<num_threads-1; t++)
02179                       pthread_join(threads[t], NULL);
02180 
02181                   delete[] threads;
02182                   delete[] params;
02183               }
02184 #endif
02185 
02186           }
02187       }
02188       else
02189       {
02190           float64_t *alphas = new float64_t[totdoc] ;
02191           int32_t *idx = new int32_t[totdoc] ;
02192           int32_t num_suppvec=0 ;
02193 
02194           for (i=0; i<totdoc; i++)
02195           {
02196               if(a[i] != a_old[i])
02197               {
02198                   alphas[num_suppvec] = (a[i]-a_old[i])*(float64_t)label[i];
02199                   idx[num_suppvec] = i ;
02200                   a_old[i] = a[i] ;
02201                   num_suppvec++ ;
02202               }
02203           }
02204 
02205           if (num_suppvec>0)
02206           {
02207               int32_t num_inactive=0;
02208               int32_t* inactive_idx=new int32_t[totdoc]; // infact we only need a subset
02209 
02210               j=0;
02211               for (i=0;i<totdoc;i++)
02212               {
02213                   if(!shrink_state->active[i])
02214                   {
02215                       inactive_idx[j++] = i;
02216                       num_inactive++;
02217                   }
02218               }
02219 
02220               if (num_inactive>0)
02221               {
02222                   float64_t* dest = new float64_t[num_inactive];
02223                   memset(dest, 0, sizeof(float64_t)*num_inactive);
02224 
02225                   kernel->compute_batch(num_inactive, inactive_idx, dest, num_suppvec, idx, alphas);
02226 
02227                   j=0;
02228                   for (i=0;i<totdoc;i++) {
02229                       if(!shrink_state->active[i]) {
02230                           lin[i] = shrink_state->last_lin[i] + dest[j++] ;
02231                       }
02232                       shrink_state->last_lin[i]=lin[i];
02233                   }
02234 
02235                   delete[] dest;
02236               }
02237               else
02238               {
02239                   for (i=0;i<totdoc;i++)
02240                       shrink_state->last_lin[i]=lin[i];
02241               }
02242               delete[] inactive_idx;
02243           }
02244           delete[] alphas;
02245           delete[] idx;
02246       }
02247 
02248       kernel->delete_optimization();
02249   }
02250   else
02251   {
02252       changed=new int32_t[totdoc];
02253       changed2dnum=new int32_t[totdoc+11];
02254       inactive=new int32_t[totdoc];
02255       inactive2dnum=new int32_t[totdoc+11];
02256       for (t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--)
02257       {
02258           if(verbosity>=2) {
02259               SG_INFO( "%ld..",t);
02260           }
02261           a_old=shrink_state->a_history[t];
02262           for (i=0;i<totdoc;i++) {
02263               inactive[i]=((!shrink_state->active[i])
02264                            && (shrink_state->inactive_since[i] == t));
02265               changed[i]= (a[i] != a_old[i]);
02266           }
02267           compute_index(inactive,totdoc,inactive2dnum);
02268           compute_index(changed,totdoc,changed2dnum);
02269 
02270 
02271           int32_t num_threads=parallel->get_num_threads();
02272           ASSERT(num_threads>0);
02273 
02274           if (num_threads < 2)
02275           {
02276               for (ii=0;(i=changed2dnum[ii])>=0;ii++) {
02277                   kernel->get_kernel_row(i,inactive2dnum,aicache);
02278                   for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02279                       lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
02280               }
02281           }
02282 #ifndef WIN32
02283           else
02284           {
02285               //find number of the changed ones
02286               int32_t num_changed=0;
02287               for (ii=0;changed2dnum[ii]>=0;ii++)
02288                   num_changed++;
02289 
02290               if (num_changed>0)
02291               {
02292                   pthread_t* threads= new pthread_t[num_threads-1];
02293                   S_THREAD_PARAM_REACTIVATE_VANILLA* params = new S_THREAD_PARAM_REACTIVATE_VANILLA[num_threads];
02294                   int32_t step= num_changed/num_threads;
02295 
02296                   // alloc num_threads many tmp buffers
02297                   float64_t* tmp_lin=new float64_t[totdoc*num_threads];
02298                   memset(tmp_lin, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
02299                   float64_t* tmp_aicache=new float64_t[totdoc*num_threads];
02300                   memset(tmp_aicache, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
02301 
02302                   int32_t thr;
02303                   for (thr=0; thr<num_threads-1; thr++)
02304                   {
02305                       params[thr].kernel=kernel;
02306                       params[thr].lin=&tmp_lin[thr*totdoc];
02307                       params[thr].aicache=&tmp_aicache[thr*totdoc];
02308                       params[thr].a=a;
02309                       params[thr].a_old=a_old;
02310                       params[thr].changed2dnum=changed2dnum;
02311                       params[thr].inactive2dnum=inactive2dnum;
02312                       params[thr].label=label;
02313                       params[thr].start = thr*step;
02314                       params[thr].end = (thr+1)*step;
02315                       pthread_create(&threads[thr], NULL, CSVMLight::reactivate_inactive_examples_vanilla_helper, (void*)&params[thr]);
02316                   }
02317 
02318                   params[thr].kernel=kernel;
02319                   params[thr].lin=&tmp_lin[thr*totdoc];
02320                   params[thr].aicache=&tmp_aicache[thr*totdoc];
02321                   params[thr].a=a;
02322                   params[thr].a_old=a_old;
02323                   params[thr].changed2dnum=changed2dnum;
02324                   params[thr].inactive2dnum=inactive2dnum;
02325                   params[thr].label=label;
02326                   params[thr].start = thr*step;
02327                   params[thr].end = num_changed;
02328                   reactivate_inactive_examples_vanilla_helper((void*) &params[thr]);
02329 
02330                   for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02331                       lin[j]+=tmp_lin[totdoc*thr+j];
02332 
02333                   for (thr=0; thr<num_threads-1; thr++)
02334                   {
02335                       pthread_join(threads[thr], NULL);
02336 
02337                       //add up results
02338                       for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02339                           lin[j]+=tmp_lin[totdoc*thr+j];
02340                   }
02341 
02342                   delete[] tmp_lin;
02343                   delete[] tmp_aicache;
02344                   delete[] threads;
02345                   delete[] params;
02346               }
02347           }
02348 #endif
02349       }
02350       delete[] changed;
02351       delete[] changed2dnum;
02352       delete[] inactive;
02353       delete[] inactive2dnum;
02354   }
02355 
02356   (*maxdiff)=0;
02357   for (i=0;i<totdoc;i++) {
02358     shrink_state->inactive_since[i]=shrink_state->deactnum-1;
02359     if(!inconsistent[i]) {
02360       dist=(lin[i]-model->b)*(float64_t)label[i];
02361       target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
02362       ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
02363       if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
02364     if((dist-target)>(*maxdiff))  /* largest violation */
02365       (*maxdiff)=dist-target;
02366       }
02367       else if((a[i]<ex_c) && (dist < target)) {
02368     if((target-dist)>(*maxdiff))  /* largest violation */
02369       (*maxdiff)=target-dist;
02370       }
02371       if((a[i]>(0+learn_parm->epsilon_a))
02372      && (a[i]<ex_c)) {
02373     shrink_state->active[i]=1;                         /* not at bound */
02374       }
02375       else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
02376     shrink_state->active[i]=1;
02377       }
02378       else if((a[i]>=ex_c)
02379           && (dist > (target-learn_parm->epsilon_shrink))) {
02380     shrink_state->active[i]=1;
02381       }
02382       else if(learn_parm->sharedslack) { /* make all active when sharedslack */
02383     shrink_state->active[i]=1;
02384       }
02385     }
02386   }
02387 
02388   if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* update history for non-linear */
02389       for (i=0;i<totdoc;i++) {
02390           (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
02391       }
02392       for (t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
02393           delete[] shrink_state->a_history[t];
02394           shrink_state->a_history[t]=0;
02395       }
02396   }
02397 }
02398 
02399 
02400 
02401 /* start the optimizer and return the optimal values */
02402 float64_t* CSVMLight::optimize_qp(
02403         QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold,
02404         int32_t& svm_maxqpsize)
02405 {
02406     register int32_t i, j, result;
02407     float64_t margin, obj_before, obj_after;
02408     float64_t sigdig, dist, epsilon_loqo;
02409     int32_t iter;
02410 
02411     if(!primal) { /* allocate memory at first call */
02412         primal=new float64_t[nx*3];
02413         dual=new float64_t[nx*2+1];
02414     }
02415 
02416     obj_before=0; /* calculate objective before optimization */
02417     for(i=0;i<qp->opt_n;i++) {
02418         obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]);
02419         obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]);
02420         for(j=0;j<i;j++) {
02421             obj_before+=(qp->opt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]);
02422         }
02423     }
02424 
02425     result=STILL_RUNNING;
02426     qp->opt_ce0[0]*=(-1.0);
02427     /* Run pr_loqo. If a run fails, try again with parameters which lead */
02428     /* to a slower, but more robust setting. */
02429     for(margin=init_margin,iter=init_iter;
02430         (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) {
02431 
02432         opt_precision=CMath::max(opt_precision, DEF_PRECISION);
02433         sigdig=-log10(opt_precision);
02434 
02435         result=pr_loqo((int32_t)qp->opt_n,(int32_t)qp->opt_m,
02436                 (float64_t *)qp->opt_g0,(float64_t *)qp->opt_g,
02437                 (float64_t *)qp->opt_ce,(float64_t *)qp->opt_ce0,
02438                 (float64_t *)qp->opt_low,(float64_t *)qp->opt_up,
02439                 (float64_t *)primal,(float64_t *)dual,
02440                 (int32_t)(verbosity-2),
02441                 (float64_t)sigdig,(int32_t)iter,
02442                 (float64_t)margin,(float64_t)(qp->opt_up[0])/4.0,(int32_t)0);
02443 
02444         if(CMath::is_nan(dual[0])) {     /* check for choldc problem */
02445             if(verbosity>=2) {
02446                 SG_SDEBUG("Restarting PR_LOQO with more conservative parameters.\n");
02447             }
02448             if(init_margin<0.80) { /* become more conservative in general */
02449                 init_margin=(4.0*margin+1.0)/5.0;
02450             }
02451             margin=(margin+1.0)/2.0;
02452             (opt_precision)*=10.0;   /* reduce precision */
02453             if(verbosity>=2) {
02454                 SG_SDEBUG("Reducing precision of PR_LOQO.\n");
02455             }
02456         }
02457         else if(result!=OPTIMAL_SOLUTION) {
02458             iter+=2000;
02459             init_iter+=10;
02460             (opt_precision)*=10.0;   /* reduce precision */
02461             if(verbosity>=2) {
02462                 SG_SDEBUG("Reducing precision of PR_LOQO due to (%ld).\n",result);
02463             }
02464         }
02465     }
02466 
02467     if(qp->opt_m)         /* Thanks to Alex Smola for this hint */
02468         model_b=dual[0];
02469     else
02470         model_b=0;
02471 
02472     /* Check the precision of the alphas. If results of current optimization */
02473     /* violate KT-Conditions, relax the epsilon on the bounds on alphas. */
02474     epsilon_loqo=1E-10;
02475     for(i=0;i<qp->opt_n;i++) {
02476         dist=-model_b*qp->opt_ce[i];
02477         dist+=(qp->opt_g0[i]+1.0);
02478         for(j=0;j<i;j++) {
02479             dist+=(primal[j]*qp->opt_g[j*qp->opt_n+i]);
02480         }
02481         for(j=i;j<qp->opt_n;j++) {
02482             dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]);
02483         }
02484         /*  SG_SDEBUG("LOQO: a[%d]=%f, dist=%f, b=%f\n",i,primal[i],dist,dual[0]); */
02485         if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) {
02486             epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0;
02487         }
02488         else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) {
02489             epsilon_loqo=primal[i]*2.0;
02490         }
02491     }
02492 
02493     for(i=0;i<qp->opt_n;i++) {  /* clip alphas to bounds */
02494         if(primal[i]<=(0+epsilon_loqo)) {
02495             primal[i]=0;
02496         }
02497         else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) {
02498             primal[i]=qp->opt_up[i];
02499         }
02500     }
02501 
02502     obj_after=0;  /* calculate objective after optimization */
02503     for(i=0;i<qp->opt_n;i++) {
02504         obj_after+=(qp->opt_g0[i]*primal[i]);
02505         obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]);
02506         for(j=0;j<i;j++) {
02507             obj_after+=(primal[j]*primal[i]*qp->opt_g[j*qp->opt_n+i]);
02508         }
02509     }
02510 
02511     /* if optimizer returned NAN values, reset and retry with smaller */
02512     /* working set. */
02513     if(CMath::is_nan(obj_after) || CMath::is_nan(model_b)) {
02514         for(i=0;i<qp->opt_n;i++) {
02515             primal[i]=qp->opt_xinit[i];
02516         }
02517         model_b=0;
02518         if(svm_maxqpsize>2) {
02519             svm_maxqpsize--;  /* decrease size of qp-subproblems */
02520         }
02521     }
02522 
02523     if(obj_after >= obj_before) { /* check whether there was progress */
02524         (opt_precision)/=100.0;
02525         precision_violations++;
02526         if(verbosity>=2) {
02527             SG_SDEBUG("Increasing Precision of PR_LOQO.\n");
02528         }
02529     }
02530 
02531     if(precision_violations > 500) {
02532         (*epsilon_crit)*=10.0;
02533         precision_violations=0;
02534         SG_SINFO("Relaxing epsilon on KT-Conditions.\n");
02535     }
02536 
02537     (*threshold)=model_b;
02538 
02539     if(result!=OPTIMAL_SOLUTION) {
02540         SG_SERROR("PR_LOQO did not converge.\n");
02541         return(qp->opt_xinit);
02542     }
02543     else {
02544         return(primal);
02545     }
02546 }
02547 
02548 
02549 #endif //USE_SVMLIGHT
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation