SHOGUN v0.9.3
SVM_light.h
Go to the documentation of this file.
00001 /***********************************************************************/
00002 /*                                                                     */
00003 /*   SVM_light.h                                                       */
00004 /*                                                                     */
00005 /*   Author: Thorsten Joachims                                         */
00006 /*   Date: 19.07.99                                                    */
00007 /*                                                                     */
00008 /*   Copyright (c) 1999  Universitaet Dortmund - All rights reserved   */
00009 /*                                                                     */
00010 /*   This software is available for non-commercial use only. It must   */
00011 /*   not be modified and distributed without prior permission of the   */
00012 /*   author. The author is not responsible for implications from the   */
00013 /*   use of this software.                                             */
00014 /*                                                                     */
00015 /*   THIS INCLUDES THE FOLLOWING ADDITIONS                             */
00016 /*   Generic Kernel Interfacing: Soeren Sonnenburg                     */
00017 /*   Parallizations: Soeren Sonnenburg                                 */
00018 /*   Multiple Kernel Learning: Gunnar Raetsch, Soeren Sonnenburg       */
00019 /*   Linadd Speedup: Gunnar Raetsch, Soeren Sonnenburg                 */
00020 /*                                                                     */
00021 /***********************************************************************/
00022 #ifndef _SVMLight_H___
00023 #define _SVMLight_H___
00024 
00025 #include "lib/config.h"
00026 
00027 #ifdef USE_SVMLIGHT
00028 #include "classifier/svm/SVM.h"
00029 #include "kernel/Kernel.h"
00030 #include "lib/Mathematics.h"
00031 #include "lib/common.h"
00032 
00033 #include <stdio.h>
00034 #include <ctype.h>
00035 #include <string.h>
00036 #include <stdlib.h>
00037 #include <time.h>
00038 
00039 namespace shogun
00040 {
00041 # define VERSION       "V3.50 -- correct??"
00042 # define VERSION_DATE  "01.11.00 -- correct??"
00043 
00044 # define DEF_PRECISION 1E-14
00045 # define MAXSHRINK 50000
00046 
00047 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00048 
00049 struct MODEL {
00051 int32_t    sv_num;
00053 int32_t    at_upper_bound;
00055 float64_t b;
00057 int32_t*   supvec;
00059 float64_t *alpha;
00061 int32_t    *index;
00063 int32_t    totdoc;
00065 CKernel* kernel;
00066 
00067 /* the following values are not written to file */
00069 float64_t loo_error;
00071 float64_t loo_recall;
00073 float64_t loo_precision;
00074 
00076 float64_t xa_error;
00078 float64_t xa_recall;
00080 float64_t xa_precision;
00081 };
00082 
00084 typedef struct quadratic_program {
00086   int32_t   opt_n;
00088   int32_t   opt_m;
00090   float64_t *opt_ce;
00092   float64_t *opt_ce0;
00094   float64_t *opt_g;
00096   float64_t *opt_g0;
00098   float64_t *opt_xinit;
00100   float64_t *opt_low;
00102   float64_t *opt_up;
00103 } QP;
00104 
00106 typedef int32_t FNUM;
00107 
00109 typedef float64_t FVAL;
00110 
00112 struct LEARN_PARM {
00114   int32_t   type;
00116   float64_t svm_c;
00118   float64_t* eps;
00120   float64_t svm_costratio;
00122   float64_t transduction_posratio;
00123   /* classified as positives */
00125   int32_t   biased_hyperplane;
00130   int32_t   sharedslack;
00132   int32_t   svm_maxqpsize;
00134   int32_t   svm_newvarsinqp;
00136   int32_t   kernel_cache_size;
00138   float64_t epsilon_crit;
00140   float64_t epsilon_shrink;
00142   int32_t   svm_iter_to_shrink;
00146   int32_t   maxiter;
00148   int32_t   remove_inconsistent;
00152   int32_t   skip_final_opt_check;
00154   int32_t   compute_loo;
00158   float64_t rho;
00162   int32_t   xa_depth;
00164   char predfile[200];
00168   char alphafile[200];
00169 
00170   /* you probably do not want to touch the following */
00172   float64_t epsilon_const;
00174   float64_t epsilon_a;
00176   float64_t opt_precision;
00177 
00178   /* the following are only for internal use */
00180   int32_t   svm_c_steps;
00182   float64_t svm_c_factor;
00184   float64_t svm_costratio_unlab;
00186   float64_t svm_unlabbound;
00188   float64_t *svm_cost;
00189 };
00190 
00192 struct TIMING {
00194   int32_t   time_kernel;
00196   int32_t   time_opti;
00198   int32_t   time_shrink;
00200   int32_t   time_update;
00202   int32_t   time_model;
00204   int32_t   time_check;
00206   int32_t   time_select;
00207 };
00208 
00209 
00211 struct SHRINK_STATE
00212 {
00214   int32_t   *active;
00216   int32_t   *inactive_since;
00218   int32_t   deactnum;
00220   float64_t **a_history;
00222   int32_t   maxhistory;
00224   float64_t *last_a;
00226   float64_t *last_lin;
00227 };
00228 #endif // DOXYGEN_SHOULD_SKIP_THIS
00229 
00231 class CSVMLight : public CSVM
00232 {
00233  public:
00235   CSVMLight();
00236 
00243   CSVMLight(float64_t C, CKernel* k, CLabels* lab);
00244   virtual ~CSVMLight();
00245 
00247   void init();
00248 
00257   virtual bool train(CFeatures* data=NULL);
00258 
00263   virtual inline EClassifierType get_classifier_type() { return CT_LIGHT; }
00264 
00269   int32_t   get_runtime();
00270 
00271 
00273   void   svm_learn();
00274 
00291   int32_t optimize_to_convergence(
00292     int32_t* docs, int32_t* label, int32_t totdoc, SHRINK_STATE *shrink_state,
00293     int32_t *inconsistent, float64_t *a, float64_t *lin, float64_t *c,
00294     TIMING *timing_profile, float64_t *maxdiff, int32_t heldout,
00295     int32_t retrain);
00296 
00307   virtual float64_t compute_objective_function(
00308     float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label,
00309     int32_t totdoc);
00310 
00315   void   clear_index(int32_t *index);
00316 
00322   void   add_to_index(int32_t *index, int32_t elem);
00323 
00331   int32_t   compute_index(int32_t *binfeature, int32_t range, int32_t *index);
00332 
00351   void optimize_svm(
00352     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
00353     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc,
00354     int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin,
00355     float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target);
00356 
00374   void compute_matrices_for_optimization(
00375     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
00376     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
00377     float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
00378     float64_t *aicache, QP *qp);
00379 
00397   void compute_matrices_for_optimization_parallel(
00398     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
00399     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
00400     float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
00401     float64_t *aicache, QP *qp);
00402 
00415   int32_t   calculate_svm_model(
00416     int32_t* docs, int32_t *label,float64_t *lin, float64_t *a,
00417     float64_t* a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum);
00418 
00435   int32_t   check_optimality(
00436     int32_t *label, float64_t *a, float64_t* lin, float64_t *c, int32_t totdoc,
00437     float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified,
00438     int32_t *inconsistent,int32_t* active2dnum, int32_t *last_suboptimal_at,
00439     int32_t iteration);
00440 
00454   virtual void update_linear_component(
00455     int32_t* docs, int32_t *label, int32_t *active2dnum, float64_t *a,
00456     float64_t* a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
00457     float64_t *aicache, float64_t* c);
00458 
00463   static void* update_linear_component_mkl_linadd_helper(void* p);
00464 
00477   void update_linear_component_mkl(
00478           int32_t* docs, int32_t *label, int32_t *active2dnum, float64_t *a,
00479           float64_t* a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
00480           float64_t *aicache);
00481 
00494   void update_linear_component_mkl_linadd(
00495           int32_t* docs, int32_t *label, int32_t *active2dnum, float64_t *a,
00496           float64_t* a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
00497           float64_t *aicache);
00498 
00499   void call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin);
00500 
00519   int32_t select_next_qp_subproblem_grad(
00520     int32_t *label, float64_t *a, float64_t* lin, float64_t* c, int32_t totdoc,
00521     int32_t qp_size, int32_t *inconsistent, int32_t* active2dnum,
00522     int32_t* working2dnum, float64_t *selcrit, int32_t *select,
00523     int32_t cache_only, int32_t *key, int32_t *chosen);
00524 
00543   int32_t select_next_qp_subproblem_rand(
00544     int32_t* label, float64_t *a, float64_t *lin, float64_t *c,
00545     int32_t totdoc, int32_t qp_size, int32_t *inconsistent,
00546     int32_t *active2dnum, int32_t *working2dnum, float64_t *selcrit,
00547     int32_t *select, int32_t *key, int32_t *chosen, int32_t iteration);
00548 
00556   void   select_top_n(
00557     float64_t *selcrit, int32_t range, int32_t *select, int32_t n);
00558 
00565   void   init_shrink_state(
00566     SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory);
00567 
00572   void   shrink_state_cleanup(SHRINK_STATE *shrink_state);
00573 
00589   int32_t shrink_problem(
00590     SHRINK_STATE *shrink_state, int32_t *active2dnum,
00591     int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc,
00592     int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t* c,
00593     float64_t* lin, int* label);
00594 
00609   virtual void   reactivate_inactive_examples(
00610     int32_t *label,float64_t *a,SHRINK_STATE *shrink_state, float64_t *lin,
00611     float64_t *c, int32_t totdoc,int32_t iteration, int32_t *inconsistent,
00612     int32_t *docs,float64_t *aicache, float64_t* maxdiff);
00613 
00614 protected:
00621     inline virtual float64_t compute_kernel(int32_t i, int32_t j)
00622     {
00623         return kernel->kernel(i, j);
00624     }
00625 
00630     static void* compute_kernel_helper(void* p);
00631 
00636     static void* update_linear_component_linadd_helper(void* p);
00637 
00642     static void* reactivate_inactive_examples_vanilla_helper(void* p);
00643 
00648     static void* reactivate_inactive_examples_linadd_helper(void* p);
00649 
00651     inline virtual const char* get_name() const { return "SVM_light"; }
00652 
00653     /* interface to QP-solver */
00654     float64_t *optimize_qp( QP *qp,float64_t *epsilon_crit, int32_t nx,
00655             float64_t *threshold, int32_t& svm_maxqpsize);
00656 
00657 
00658 #ifdef HAVE_BOOST_SERIALIZATION
00659 private:
00660 
00661     friend class ::boost::serialization::access;
00662     template<class Archive>
00663         void serialize(Archive & ar, const unsigned int archive_version)
00664         {
00665 
00666             SG_DEBUG("archiving CSVMLight\n");
00667 
00668             ar & ::boost::serialization::base_object<CSVM>(*this);
00669 
00670             // ar & svm_cost;
00671 
00672             // ar & set_cost_individually;
00673 
00674             SG_DEBUG("done with CSVMLight\n");
00675 
00676         }
00677 
00678 public:
00679 
00680     virtual std::string toString() const
00681     {
00682         std::ostringstream s;
00683 
00684         ::boost::archive::text_oarchive oa(s);
00685 
00686         oa << *this;
00687 
00688         return s.str();
00689     }
00690 
00691     virtual void fromString(std::string str)
00692     {
00693 
00694         std::istringstream is(str);
00695 
00696         ::boost::archive::text_iarchive ia(is);
00697 
00698         ia >> *this;
00699 
00700     }
00701 
00702     virtual void toFile(std::string filename) const
00703     {
00704 
00705         //std::ofstream os(filename.c_str(), std::ios::binary);
00706         //::boost::archive::binary_oarchive oa(os);
00707         std::ofstream os(filename.c_str());
00708         ::boost::archive::text_oarchive oa(os);
00709 
00710         oa << *this;
00711 
00712     }
00713 
00714     virtual void fromFile(std::string filename)
00715     {
00716 
00717         //std::ifstream is(filename.c_str(), std::ios::binary);
00718         //::boost::archive::binary_iarchive ia(is);
00719 
00720         std::ifstream is(filename.c_str());
00721         ::boost::archive::text_iarchive ia(is);
00722 
00723         ia >> *this;
00724 
00725         //TODO get non-default stuff to work!!
00726         this->init();
00727 
00728     }
00729 
00730 #endif //HAVE_BOOST_SERIALIZATION
00731 
00732 
00733  protected:
00735   MODEL* model;
00737   LEARN_PARM* learn_parm;
00739   int32_t   verbosity;
00740 
00742   float64_t init_margin;
00744   int32_t   init_iter;
00746   int32_t precision_violations;
00748   float64_t model_b;
00750   float64_t opt_precision;
00752   float64_t* primal;
00754   float64_t* dual;
00755 
00756   // MKL stuff
00757 
00761   float64_t* W;
00763   int32_t count;
00765   float64_t mymaxdiff;
00767   bool use_kernel_cache;
00769   bool mkl_converged;
00770 };
00771 }
00772 #endif //USE_SVMLIGHT
00773 #endif //_SVMLight_H___
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation