|
SHOGUN v0.9.3
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2007-2010 Soeren Sonnenburg 00008 * Copyright (c) 2007-2009 The LIBLINEAR Project. 00009 * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 #include "lib/config.h" 00012 00013 #ifdef HAVE_LAPACK 00014 #include "lib/io.h" 00015 #include "lib/Signal.h" 00016 #include "lib/Time.h" 00017 #include "classifier/svm/LibLinear.h" 00018 #include "classifier/svm/SVM_linear.h" 00019 #include "classifier/svm/Tron.h" 00020 #include "features/DotFeatures.h" 00021 00022 using namespace shogun; 00023 00024 CLibLinear::CLibLinear(LIBLINEAR_SOLVER_TYPE l) 00025 : CLinearClassifier() 00026 { 00027 liblinear_solver_type=l; 00028 use_bias=false; 00029 C1=1; 00030 C2=1; 00031 set_max_iterations(); 00032 } 00033 00034 CLibLinear::CLibLinear( 00035 float64_t C, CDotFeatures* traindat, CLabels* trainlab) 00036 : CLinearClassifier(), C1(C), C2(C), use_bias(true), epsilon(1e-5) 00037 { 00038 set_features(traindat); 00039 set_labels(trainlab); 00040 liblinear_solver_type=L2R_L1LOSS_SVC_DUAL; 00041 set_max_iterations(); 00042 } 00043 00044 00045 CLibLinear::~CLibLinear() 00046 { 00047 } 00048 00049 bool CLibLinear::train(CFeatures* data) 00050 { 00051 CSignal::clear_cancel(); 00052 ASSERT(labels); 00053 if (data) 00054 { 00055 if (!data->has_property(FP_DOT)) 00056 SG_ERROR("Specified features are not of type CDotFeatures\n"); 00057 00058 set_features((CDotFeatures*) data); 00059 } 00060 ASSERT(features); 00061 ASSERT(labels->is_two_class_labeling()); 00062 00063 00064 int32_t num_train_labels=labels->get_num_labels(); 00065 int32_t num_feat=features->get_dim_feature_space(); 00066 int32_t num_vec=features->get_num_vectors(); 00067 00068 if (liblinear_solver_type == L1R_L2LOSS_SVC || 00069 (liblinear_solver_type == L1R_LR) ) 00070 { 00071 if (num_feat!=num_train_labels) 00072 { 00073 SG_ERROR("L1 methods require the data to be transposed: " 00074 "number of features %d does not match number of " 00075 "training labels %d\n", 00076 num_feat, num_train_labels); 00077 } 00078 CMath::swap(num_feat, num_vec); 00079 00080 } 00081 else 00082 { 00083 if (num_vec!=num_train_labels) 00084 { 00085 SG_ERROR("number of vectors %d does not match " 00086 "number of training labels %d\n", 00087 num_vec, num_train_labels); 00088 } 00089 } 00090 delete[] w; 00091 if (use_bias) 00092 w=new float64_t[num_feat+1]; 00093 else 00094 w=new float64_t[num_feat+0]; 00095 w_dim=num_feat; 00096 00097 problem prob; 00098 if (use_bias) 00099 { 00100 prob.n=w_dim+1; 00101 memset(w, 0, sizeof(float64_t)*(w_dim+1)); 00102 } 00103 else 00104 { 00105 prob.n=w_dim; 00106 memset(w, 0, sizeof(float64_t)*(w_dim+0)); 00107 } 00108 prob.l=num_vec; 00109 prob.x=features; 00110 prob.y=new int[prob.l]; 00111 prob.use_bias=use_bias; 00112 00113 for (int32_t i=0; i<prob.l; i++) 00114 prob.y[i]=labels->get_int_label(i); 00115 00116 int pos = 0; 00117 int neg = 0; 00118 for(int i=0;i<prob.l;i++) 00119 { 00120 if(prob.y[i]==+1) 00121 pos++; 00122 } 00123 neg = prob.l - pos; 00124 00125 SG_INFO("%d training points %d dims\n", prob.l, prob.n); 00126 00127 function *fun_obj=NULL; 00128 double Cp=C1; 00129 double Cn=C2; 00130 switch (liblinear_solver_type) 00131 { 00132 case L2R_LR: 00133 { 00134 fun_obj=new l2r_lr_fun(&prob, Cp, Cn); 00135 CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations); 00136 SG_DEBUG("starting L2R_LR training via tron\n"); 00137 tron_obj.tron(w, max_train_time); 00138 SG_DEBUG("done with tron\n"); 00139 delete fun_obj; 00140 break; 00141 } 00142 case L2R_L2LOSS_SVC: 00143 { 00144 fun_obj=new l2r_l2_svc_fun(&prob, Cp, Cn); 00145 CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations); 00146 tron_obj.tron(w, max_train_time); 00147 delete fun_obj; 00148 break; 00149 } 00150 case L2R_L2LOSS_SVC_DUAL: 00151 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL); 00152 break; 00153 case L2R_L1LOSS_SVC_DUAL: 00154 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL); 00155 break; 00156 case L1R_L2LOSS_SVC: 00157 { 00158 //ASSUME FEATURES ARE TRANSPOSED ALREADY 00159 solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn); 00160 break; 00161 } 00162 case L1R_LR: 00163 { 00164 //ASSUME FEATURES ARE TRANSPOSED ALREADY 00165 solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn); 00166 break; 00167 } 00168 case MCSVM_CS: 00169 { 00170 SG_NOTIMPLEMENTED; 00171 /* TODO... 00172 model_->w=Malloc(double, n*nr_class); 00173 for(i=0;i<nr_class;i++) 00174 for(j=start[i];j<start[i]+count[i];j++) 00175 sub_prob.y[j] = i; 00176 Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps); 00177 Solver.Solve(model_->w); 00178 */ 00179 } 00180 default: 00181 SG_ERROR("Error: unknown solver_type\n"); 00182 break; 00183 } 00184 00185 if (use_bias) 00186 set_bias(w[w_dim]); 00187 else 00188 set_bias(0); 00189 00190 delete[] prob.y; 00191 00192 return true; 00193 } 00194 00195 // A coordinate descent algorithm for 00196 // L1-loss and L2-loss SVM dual problems 00197 // 00198 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha, 00199 // s.t. 0 <= alpha_i <= upper_bound_i, 00200 // 00201 // where Qij = yi yj xi^T xj and 00202 // D is a diagonal matrix 00203 // 00204 // In L1-SVM case: 00205 // upper_bound_i = Cp if y_i = 1 00206 // upper_bound_i = Cn if y_i = -1 00207 // D_ii = 0 00208 // In L2-SVM case: 00209 // upper_bound_i = INF 00210 // D_ii = 1/(2*Cp) if y_i = 1 00211 // D_ii = 1/(2*Cn) if y_i = -1 00212 // 00213 // Given: 00214 // x, y, Cp, Cn 00215 // eps is the stopping tolerance 00216 // 00217 // solution will be put in w 00218 00219 #undef GETI 00220 #define GETI(i) (y[i]+1) 00221 // To support weights for instances, use GETI(i) (i) 00222 00223 void CLibLinear::solve_l2r_l1l2_svc( 00224 const problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st) 00225 { 00226 int l = prob->l; 00227 int w_size = prob->n; 00228 int i, s, iter = 0; 00229 double C, d, G; 00230 double *QD = new double[l]; 00231 int *index = new int[l]; 00232 double *alpha = new double[l]; 00233 int32_t *y = new int32_t[l]; 00234 int active_size = l; 00235 00236 // PG: projected gradient, for shrinking and stopping 00237 double PG; 00238 double PGmax_old = CMath::INFTY; 00239 double PGmin_old = -CMath::INFTY; 00240 double PGmax_new, PGmin_new; 00241 00242 // default solver_type: L2R_L2LOSS_SVC_DUAL 00243 double diag[3] = {0.5/Cn, 0, 0.5/Cp}; 00244 double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY}; 00245 if(st == L2R_L1LOSS_SVC_DUAL) 00246 { 00247 diag[0] = 0; 00248 diag[2] = 0; 00249 upper_bound[0] = Cn; 00250 upper_bound[2] = Cp; 00251 } 00252 00253 int n = prob->n; 00254 00255 if (prob->use_bias) 00256 n--; 00257 00258 for(i=0; i<w_size; i++) 00259 w[i] = 0; 00260 for(i=0; i<l; i++) 00261 { 00262 alpha[i] = 0; 00263 if(prob->y[i] > 0) 00264 { 00265 y[i] = +1; 00266 } 00267 else 00268 { 00269 y[i] = -1; 00270 } 00271 QD[i] = diag[GETI(i)]; 00272 00273 QD[i] += prob->x->dot(i,i); 00274 index[i] = i; 00275 } 00276 00277 CTime start_time; 00278 while (iter < max_iterations && !CSignal::cancel_computations()) 00279 { 00280 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time) 00281 break; 00282 00283 PGmax_new = -CMath::INFTY; 00284 PGmin_new = CMath::INFTY; 00285 00286 for (i=0; i<active_size; i++) 00287 { 00288 int j = i+rand()%(active_size-i); 00289 CMath::swap(index[i], index[j]); 00290 } 00291 00292 for (s=0;s<active_size;s++) 00293 { 00294 i = index[s]; 00295 int32_t yi = y[i]; 00296 00297 G = prob->x->dense_dot(i, w, n); 00298 if (prob->use_bias) 00299 G+=w[n]; 00300 00301 G = G*yi-1; 00302 00303 C = upper_bound[GETI(i)]; 00304 G += alpha[i]*diag[GETI(i)]; 00305 00306 PG = 0; 00307 if (alpha[i] == 0) 00308 { 00309 if (G > PGmax_old) 00310 { 00311 active_size--; 00312 CMath::swap(index[s], index[active_size]); 00313 s--; 00314 continue; 00315 } 00316 else if (G < 0) 00317 PG = G; 00318 } 00319 else if (alpha[i] == C) 00320 { 00321 if (G < PGmin_old) 00322 { 00323 active_size--; 00324 CMath::swap(index[s], index[active_size]); 00325 s--; 00326 continue; 00327 } 00328 else if (G > 0) 00329 PG = G; 00330 } 00331 else 00332 PG = G; 00333 00334 PGmax_new = CMath::max(PGmax_new, PG); 00335 PGmin_new = CMath::min(PGmin_new, PG); 00336 00337 if(fabs(PG) > 1.0e-12) 00338 { 00339 double alpha_old = alpha[i]; 00340 alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C); 00341 d = (alpha[i] - alpha_old)*yi; 00342 00343 prob->x->add_to_dense_vec(d, i, w, n); 00344 00345 if (prob->use_bias) 00346 w[n]+=d; 00347 } 00348 } 00349 00350 iter++; 00351 float64_t gap=PGmax_new - PGmin_new; 00352 SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6); 00353 00354 if(gap <= eps) 00355 { 00356 if(active_size == l) 00357 break; 00358 else 00359 { 00360 active_size = l; 00361 PGmax_old = CMath::INFTY; 00362 PGmin_old = -CMath::INFTY; 00363 continue; 00364 } 00365 } 00366 PGmax_old = PGmax_new; 00367 PGmin_old = PGmin_new; 00368 if (PGmax_old <= 0) 00369 PGmax_old = CMath::INFTY; 00370 if (PGmin_old >= 0) 00371 PGmin_old = -CMath::INFTY; 00372 } 00373 00374 SG_DONE(); 00375 SG_INFO("optimization finished, #iter = %d\n",iter); 00376 if (iter >= max_iterations) 00377 { 00378 SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster" 00379 "(also see liblinear FAQ)\n\n"); 00380 } 00381 00382 // calculate objective value 00383 00384 double v = 0; 00385 int nSV = 0; 00386 for(i=0; i<w_size; i++) 00387 v += w[i]*w[i]; 00388 for(i=0; i<l; i++) 00389 { 00390 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2); 00391 if(alpha[i] > 0) 00392 ++nSV; 00393 } 00394 SG_INFO("Objective value = %lf\n",v/2); 00395 SG_INFO("nSV = %d\n",nSV); 00396 00397 delete [] QD; 00398 delete [] alpha; 00399 delete [] y; 00400 delete [] index; 00401 } 00402 00403 // A coordinate descent algorithm for 00404 // L1-regularized L2-loss support vector classification 00405 // 00406 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2, 00407 // 00408 // Given: 00409 // x, y, Cp, Cn 00410 // eps is the stopping tolerance 00411 // 00412 // solution will be put in w 00413 00414 #undef GETI 00415 #define GETI(i) (y[i]+1) 00416 // To support weights for instances, use GETI(i) (i) 00417 00418 void CLibLinear::solve_l1r_l2_svc( 00419 problem *prob_col, double eps, double Cp, double Cn) 00420 { 00421 int l = prob_col->l; 00422 int w_size = prob_col->n; 00423 int j, s, iter = 0; 00424 int active_size = w_size; 00425 int max_num_linesearch = 20; 00426 00427 double sigma = 0.01; 00428 double d, G_loss, G, H; 00429 double Gmax_old = CMath::INFTY; 00430 double Gmax_new; 00431 double Gmax_init=0; 00432 double d_old, d_diff; 00433 double loss_old=0, loss_new; 00434 double appxcond, cond; 00435 00436 int *index = new int[w_size]; 00437 int32_t *y = new int32_t[l]; 00438 double *b = new double[l]; // b = 1-ywTx 00439 double *xj_sq = new double[w_size]; 00440 00441 CDotFeatures* x = (CDotFeatures*) prob_col->x; 00442 void* iterator; 00443 int32_t ind; 00444 float64_t val; 00445 00446 double C[3] = {Cn,0,Cp}; 00447 00448 int n = prob_col->n; 00449 if (prob_col->use_bias) 00450 n--; 00451 00452 for(j=0; j<l; j++) 00453 { 00454 b[j] = 1; 00455 if(prob_col->y[j] > 0) 00456 y[j] = 1; 00457 else 00458 y[j] = -1; 00459 } 00460 00461 for(j=0; j<w_size; j++) 00462 { 00463 w[j] = 0; 00464 index[j] = j; 00465 xj_sq[j] = 0; 00466 00467 if (use_bias && j==n) 00468 { 00469 for (ind=0; ind<l; ind++) 00470 xj_sq[n] += C[GETI(ind)]; 00471 } 00472 else 00473 { 00474 iterator=x->get_feature_iterator(j); 00475 while (x->get_next_feature(ind, val, iterator)) 00476 xj_sq[j] += C[GETI(ind)]*val*val; 00477 x->free_feature_iterator(iterator); 00478 } 00479 } 00480 00481 00482 CTime start_time; 00483 while (iter < max_iterations && !CSignal::cancel_computations()) 00484 { 00485 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time) 00486 break; 00487 00488 Gmax_new = 0; 00489 00490 for(j=0; j<active_size; j++) 00491 { 00492 int i = j+rand()%(active_size-j); 00493 CMath::swap(index[i], index[j]); 00494 } 00495 00496 for(s=0; s<active_size; s++) 00497 { 00498 j = index[s]; 00499 G_loss = 0; 00500 H = 0; 00501 00502 if (use_bias && j==n) 00503 { 00504 for (ind=0; ind<l; ind++) 00505 { 00506 if(b[ind] > 0) 00507 { 00508 double tmp = C[GETI(ind)]*y[ind]; 00509 G_loss -= tmp*b[ind]; 00510 H += tmp*y[ind]; 00511 } 00512 } 00513 } 00514 else 00515 { 00516 iterator=x->get_feature_iterator(j); 00517 00518 while (x->get_next_feature(ind, val, iterator)) 00519 { 00520 if(b[ind] > 0) 00521 { 00522 double tmp = C[GETI(ind)]*val*y[ind]; 00523 G_loss -= tmp*b[ind]; 00524 H += tmp*val*y[ind]; 00525 } 00526 } 00527 x->free_feature_iterator(iterator); 00528 } 00529 00530 G_loss *= 2; 00531 00532 G = G_loss; 00533 H *= 2; 00534 H = CMath::max(H, 1e-12); 00535 00536 double Gp = G+1; 00537 double Gn = G-1; 00538 double violation = 0; 00539 if(w[j] == 0) 00540 { 00541 if(Gp < 0) 00542 violation = -Gp; 00543 else if(Gn > 0) 00544 violation = Gn; 00545 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) 00546 { 00547 active_size--; 00548 CMath::swap(index[s], index[active_size]); 00549 s--; 00550 continue; 00551 } 00552 } 00553 else if(w[j] > 0) 00554 violation = fabs(Gp); 00555 else 00556 violation = fabs(Gn); 00557 00558 Gmax_new = CMath::max(Gmax_new, violation); 00559 00560 // obtain Newton direction d 00561 if(Gp <= H*w[j]) 00562 d = -Gp/H; 00563 else if(Gn >= H*w[j]) 00564 d = -Gn/H; 00565 else 00566 d = -w[j]; 00567 00568 if(fabs(d) < 1.0e-12) 00569 continue; 00570 00571 double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; 00572 d_old = 0; 00573 int num_linesearch; 00574 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) 00575 { 00576 d_diff = d_old - d; 00577 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; 00578 00579 appxcond = xj_sq[j]*d*d + G_loss*d + cond; 00580 if(appxcond <= 0) 00581 { 00582 if (use_bias && j==n) 00583 { 00584 for (ind=0; ind<l; ind++) 00585 b[ind] += d_diff*y[ind]; 00586 break; 00587 } 00588 else 00589 { 00590 iterator=x->get_feature_iterator(j); 00591 while (x->get_next_feature(ind, val, iterator)) 00592 b[ind] += d_diff*val*y[ind]; 00593 00594 x->free_feature_iterator(iterator); 00595 break; 00596 } 00597 } 00598 00599 if(num_linesearch == 0) 00600 { 00601 loss_old = 0; 00602 loss_new = 0; 00603 00604 if (use_bias && j==n) 00605 { 00606 for (ind=0; ind<l; ind++) 00607 { 00608 if(b[ind] > 0) 00609 loss_old += C[GETI(ind)]*b[ind]*b[ind]; 00610 double b_new = b[ind] + d_diff*y[ind]; 00611 b[ind] = b_new; 00612 if(b_new > 0) 00613 loss_new += C[GETI(ind)]*b_new*b_new; 00614 } 00615 } 00616 else 00617 { 00618 iterator=x->get_feature_iterator(j); 00619 while (x->get_next_feature(ind, val, iterator)) 00620 { 00621 if(b[ind] > 0) 00622 loss_old += C[GETI(ind)]*b[ind]*b[ind]; 00623 double b_new = b[ind] + d_diff*val*y[ind]; 00624 b[ind] = b_new; 00625 if(b_new > 0) 00626 loss_new += C[GETI(ind)]*b_new*b_new; 00627 } 00628 x->free_feature_iterator(iterator); 00629 } 00630 } 00631 else 00632 { 00633 loss_new = 0; 00634 if (use_bias && j==n) 00635 { 00636 for (ind=0; ind<l; ind++) 00637 { 00638 double b_new = b[ind] + d_diff*y[ind]; 00639 b[ind] = b_new; 00640 if(b_new > 0) 00641 loss_new += C[GETI(ind)]*b_new*b_new; 00642 } 00643 } 00644 else 00645 { 00646 iterator=x->get_feature_iterator(j); 00647 while (x->get_next_feature(ind, val, iterator)) 00648 { 00649 double b_new = b[ind] + d_diff*val*y[ind]; 00650 b[ind] = b_new; 00651 if(b_new > 0) 00652 loss_new += C[GETI(ind)]*b_new*b_new; 00653 } 00654 x->free_feature_iterator(iterator); 00655 } 00656 } 00657 00658 cond = cond + loss_new - loss_old; 00659 if(cond <= 0) 00660 break; 00661 else 00662 { 00663 d_old = d; 00664 d *= 0.5; 00665 delta *= 0.5; 00666 } 00667 } 00668 00669 w[j] += d; 00670 00671 // recompute b[] if line search takes too many steps 00672 if(num_linesearch >= max_num_linesearch) 00673 { 00674 SG_INFO("#"); 00675 for(int i=0; i<l; i++) 00676 b[i] = 1; 00677 00678 for(int i=0; i<n; i++) 00679 { 00680 if(w[i]==0) 00681 continue; 00682 00683 iterator=x->get_feature_iterator(i); 00684 while (x->get_next_feature(ind, val, iterator)) 00685 b[ind] -= w[i]*val*y[ind]; 00686 x->free_feature_iterator(iterator); 00687 } 00688 00689 if (use_bias && w[n]) 00690 { 00691 for (ind=0; ind<l; ind++) 00692 b[ind] -= w[n]*y[ind]; 00693 } 00694 } 00695 } 00696 00697 if(iter == 0) 00698 Gmax_init = Gmax_new; 00699 iter++; 00700 00701 SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6); 00702 00703 if(Gmax_new <= eps*Gmax_init) 00704 { 00705 if(active_size == w_size) 00706 break; 00707 else 00708 { 00709 active_size = w_size; 00710 Gmax_old = CMath::INFTY; 00711 continue; 00712 } 00713 } 00714 00715 Gmax_old = Gmax_new; 00716 } 00717 00718 SG_DONE(); 00719 SG_INFO("optimization finished, #iter = %d\n", iter); 00720 if(iter >= max_iterations) 00721 SG_WARNING("\nWARNING: reaching max number of iterations\n"); 00722 00723 // calculate objective value 00724 00725 double v = 0; 00726 int nnz = 0; 00727 for(j=0; j<w_size; j++) 00728 { 00729 if(w[j] != 0) 00730 { 00731 v += fabs(w[j]); 00732 nnz++; 00733 } 00734 } 00735 for(j=0; j<l; j++) 00736 if(b[j] > 0) 00737 v += C[GETI(j)]*b[j]*b[j]; 00738 00739 SG_INFO("Objective value = %lf\n", v); 00740 SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size); 00741 00742 delete [] index; 00743 delete [] y; 00744 delete [] b; 00745 delete [] xj_sq; 00746 } 00747 00748 // A coordinate descent algorithm for 00749 // L1-regularized logistic regression problems 00750 // 00751 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)), 00752 // 00753 // Given: 00754 // x, y, Cp, Cn 00755 // eps is the stopping tolerance 00756 // 00757 // solution will be put in w 00758 00759 #undef GETI 00760 #define GETI(i) (y[i]+1) 00761 // To support weights for instances, use GETI(i) (i) 00762 00763 void CLibLinear::solve_l1r_lr( 00764 const problem *prob_col, double eps, 00765 double Cp, double Cn) 00766 { 00767 int l = prob_col->l; 00768 int w_size = prob_col->n; 00769 int j, s, iter = 0; 00770 int active_size = w_size; 00771 int max_num_linesearch = 20; 00772 00773 double x_min = 0; 00774 double sigma = 0.01; 00775 double d, G, H; 00776 double Gmax_old = CMath::INFTY; 00777 double Gmax_new; 00778 double Gmax_init=0; 00779 double sum1, appxcond1; 00780 double sum2, appxcond2; 00781 double cond; 00782 00783 int *index = new int[w_size]; 00784 int32_t *y = new int32_t[l]; 00785 double *exp_wTx = new double[l]; 00786 double *exp_wTx_new = new double[l]; 00787 double *xj_max = new double[w_size]; 00788 double *C_sum = new double[w_size]; 00789 double *xjneg_sum = new double[w_size]; 00790 double *xjpos_sum = new double[w_size]; 00791 00792 CDotFeatures* x = prob_col->x; 00793 void* iterator; 00794 int ind; 00795 double val; 00796 00797 double C[3] = {Cn,0,Cp}; 00798 00799 int n = prob_col->n; 00800 if (prob_col->use_bias) 00801 n--; 00802 00803 for(j=0; j<l; j++) 00804 { 00805 exp_wTx[j] = 1; 00806 if(prob_col->y[j] > 0) 00807 y[j] = 1; 00808 else 00809 y[j] = -1; 00810 } 00811 for(j=0; j<w_size; j++) 00812 { 00813 w[j] = 0; 00814 index[j] = j; 00815 xj_max[j] = 0; 00816 C_sum[j] = 0; 00817 xjneg_sum[j] = 0; 00818 xjpos_sum[j] = 0; 00819 00820 if (use_bias && j==n) 00821 { 00822 for (ind=0; ind<l; ind++) 00823 { 00824 x_min = CMath::min(x_min, 1.0); 00825 xj_max[j] = CMath::max(xj_max[j], 1.0); 00826 C_sum[j] += C[GETI(ind)]; 00827 if(y[ind] == -1) 00828 xjneg_sum[j] += C[GETI(ind)]; 00829 else 00830 xjpos_sum[j] += C[GETI(ind)]; 00831 } 00832 } 00833 else 00834 { 00835 iterator=x->get_feature_iterator(j); 00836 while (x->get_next_feature(ind, val, iterator)) 00837 { 00838 x_min = CMath::min(x_min, val); 00839 xj_max[j] = CMath::max(xj_max[j], val); 00840 C_sum[j] += C[GETI(ind)]; 00841 if(y[ind] == -1) 00842 xjneg_sum[j] += C[GETI(ind)]*val; 00843 else 00844 xjpos_sum[j] += C[GETI(ind)]*val; 00845 } 00846 x->free_feature_iterator(iterator); 00847 } 00848 } 00849 00850 CTime start_time; 00851 while (iter < max_iterations && !CSignal::cancel_computations()) 00852 { 00853 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time) 00854 break; 00855 00856 Gmax_new = 0; 00857 00858 for(j=0; j<active_size; j++) 00859 { 00860 int i = j+rand()%(active_size-j); 00861 CMath::swap(index[i], index[j]); 00862 } 00863 00864 for(s=0; s<active_size; s++) 00865 { 00866 j = index[s]; 00867 sum1 = 0; 00868 sum2 = 0; 00869 H = 0; 00870 00871 if (use_bias && j==n) 00872 { 00873 for (ind=0; ind<l; ind++) 00874 { 00875 double exp_wTxind = exp_wTx[ind]; 00876 double tmp1 = 1.0/(1+exp_wTxind); 00877 double tmp2 = C[GETI(ind)]*tmp1; 00878 double tmp3 = tmp2*exp_wTxind; 00879 sum2 += tmp2; 00880 sum1 += tmp3; 00881 H += tmp1*tmp3; 00882 } 00883 } 00884 else 00885 { 00886 iterator=x->get_feature_iterator(j); 00887 while (x->get_next_feature(ind, val, iterator)) 00888 { 00889 double exp_wTxind = exp_wTx[ind]; 00890 double tmp1 = val/(1+exp_wTxind); 00891 double tmp2 = C[GETI(ind)]*tmp1; 00892 double tmp3 = tmp2*exp_wTxind; 00893 sum2 += tmp2; 00894 sum1 += tmp3; 00895 H += tmp1*tmp3; 00896 } 00897 x->free_feature_iterator(iterator); 00898 } 00899 00900 G = -sum2 + xjneg_sum[j]; 00901 00902 double Gp = G+1; 00903 double Gn = G-1; 00904 double violation = 0; 00905 if(w[j] == 0) 00906 { 00907 if(Gp < 0) 00908 violation = -Gp; 00909 else if(Gn > 0) 00910 violation = Gn; 00911 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) 00912 { 00913 active_size--; 00914 CMath::swap(index[s], index[active_size]); 00915 s--; 00916 continue; 00917 } 00918 } 00919 else if(w[j] > 0) 00920 violation = fabs(Gp); 00921 else 00922 violation = fabs(Gn); 00923 00924 Gmax_new = CMath::max(Gmax_new, violation); 00925 00926 // obtain Newton direction d 00927 if(Gp <= H*w[j]) 00928 d = -Gp/H; 00929 else if(Gn >= H*w[j]) 00930 d = -Gn/H; 00931 else 00932 d = -w[j]; 00933 00934 if(fabs(d) < 1.0e-12) 00935 continue; 00936 00937 d = CMath::min(CMath::max(d,-10.0),10.0); 00938 00939 double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; 00940 int num_linesearch; 00941 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) 00942 { 00943 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; 00944 00945 if(x_min >= 0) 00946 { 00947 double tmp = exp(d*xj_max[j]); 00948 appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j]; 00949 appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j]; 00950 if(CMath::min(appxcond1,appxcond2) <= 0) 00951 { 00952 if (use_bias && j==n) 00953 { 00954 for (ind=0; ind<l; ind++) 00955 exp_wTx[ind] *= exp(d); 00956 } 00957 00958 else 00959 { 00960 iterator=x->get_feature_iterator(j); 00961 while (x->get_next_feature(ind, val, iterator)) 00962 exp_wTx[ind] *= exp(d*val); 00963 x->free_feature_iterator(iterator); 00964 } 00965 break; 00966 } 00967 } 00968 00969 cond += d*xjneg_sum[j]; 00970 00971 int i = 0; 00972 00973 if (use_bias && j==n) 00974 { 00975 for (ind=0; ind<l; ind++) 00976 { 00977 double exp_dx = exp(d); 00978 exp_wTx_new[i] = exp_wTx[ind]*exp_dx; 00979 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); 00980 i++; 00981 } 00982 } 00983 else 00984 { 00985 00986 iterator=x->get_feature_iterator(j); 00987 while (x->get_next_feature(ind, val, iterator)) 00988 { 00989 double exp_dx = exp(d*val); 00990 exp_wTx_new[i] = exp_wTx[ind]*exp_dx; 00991 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); 00992 i++; 00993 } 00994 x->free_feature_iterator(iterator); 00995 } 00996 00997 if(cond <= 0) 00998 { 00999 i = 0; 01000 if (use_bias && j==n) 01001 { 01002 for (ind=0; ind<l; ind++) 01003 { 01004 exp_wTx[ind] = exp_wTx_new[i]; 01005 i++; 01006 } 01007 } 01008 else 01009 { 01010 iterator=x->get_feature_iterator(j); 01011 while (x->get_next_feature(ind, val, iterator)) 01012 { 01013 exp_wTx[ind] = exp_wTx_new[i]; 01014 i++; 01015 } 01016 x->free_feature_iterator(iterator); 01017 } 01018 break; 01019 } 01020 else 01021 { 01022 d *= 0.5; 01023 delta *= 0.5; 01024 } 01025 } 01026 01027 w[j] += d; 01028 01029 // recompute exp_wTx[] if line search takes too many steps 01030 if(num_linesearch >= max_num_linesearch) 01031 { 01032 SG_INFO("#"); 01033 for(int i=0; i<l; i++) 01034 exp_wTx[i] = 0; 01035 01036 for(int i=0; i<w_size; i++) 01037 { 01038 if(w[i]==0) continue; 01039 01040 if (use_bias && i==n) 01041 { 01042 for (ind=0; ind<l; ind++) 01043 exp_wTx[ind] += w[i]; 01044 } 01045 else 01046 { 01047 iterator=x->get_feature_iterator(i); 01048 while (x->get_next_feature(ind, val, iterator)) 01049 exp_wTx[ind] += w[i]*val; 01050 x->free_feature_iterator(iterator); 01051 } 01052 } 01053 01054 for(int i=0; i<l; i++) 01055 exp_wTx[i] = exp(exp_wTx[i]); 01056 } 01057 } 01058 01059 if(iter == 0) 01060 Gmax_init = Gmax_new; 01061 iter++; 01062 SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6); 01063 01064 if(Gmax_new <= eps*Gmax_init) 01065 { 01066 if(active_size == w_size) 01067 break; 01068 else 01069 { 01070 active_size = w_size; 01071 Gmax_old = CMath::INFTY; 01072 continue; 01073 } 01074 } 01075 01076 Gmax_old = Gmax_new; 01077 } 01078 01079 SG_DONE(); 01080 SG_INFO("optimization finished, #iter = %d\n", iter); 01081 if(iter >= max_iterations) 01082 SG_WARNING("\nWARNING: reaching max number of iterations\n"); 01083 01084 // calculate objective value 01085 01086 double v = 0; 01087 int nnz = 0; 01088 for(j=0; j<w_size; j++) 01089 if(w[j] != 0) 01090 { 01091 v += fabs(w[j]); 01092 nnz++; 01093 } 01094 for(j=0; j<l; j++) 01095 if(y[j] == 1) 01096 v += C[GETI(j)]*log(1+1/exp_wTx[j]); 01097 else 01098 v += C[GETI(j)]*log(1+exp_wTx[j]); 01099 01100 SG_INFO("Objective value = %lf\n", v); 01101 SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size); 01102 01103 delete [] index; 01104 delete [] y; 01105 delete [] exp_wTx; 01106 delete [] exp_wTx_new; 01107 delete [] xj_max; 01108 delete [] C_sum; 01109 delete [] xjneg_sum; 01110 delete [] xjpos_sum; 01111 } 01112 01113 01114 #endif //HAVE_LAPACK