]> granicus.if.org Git - liblinear/commitdiff
Squashed commit of the following:
authorboyu <r02222047@ntu.edu.tw>
Sun, 28 Jun 2015 20:56:32 +0000 (04:56 +0800)
committerboyu <r02222047@ntu.edu.tw>
Sun, 28 Jun 2015 20:56:32 +0000 (04:56 +0800)
commit 3f867438140182ad5edeb7b5fff11fbe39216052
Author: boyu <r02222047@ntu.edu.tw>
Date:   Mon Jun 29 04:23:22 2015 +0800

    num_unchanged_w

commit d8fce5f7a19fa14bcfe0245be1217741706bb913
Author: boyu <r02222047@ntu.edu.tw>
Date:   Sun Jun 28 14:57:25 2015 +0800

    windows math.h log ambiguous fix

commit 52d5addd8179f247f56df8eb098fa24751b8bf0c
Author: Chia-Hua Ho <b95082@csie.ntu.edu.tw>
Date:   Sat Jun 27 16:38:04 2015 +0800

    c_double * 0 => None

commit 24e0fac8d23b31de54c28ab4a790d4fe56f5f3b0
Author: Chia-Hua Ho <b95082@csie.ntu.edu.tw>
Date:   Sat Jun 27 15:04:27 2015 +0800

    default of -C: L2R_LR => L2R_L2LOSS_SVC

commit fd170647d2808ff3ff97e4af7d2e76734a5ffd7a
Author: Chia-Hua Ho <b95082@csie.ntu.edu.tw>
Date:   Sat Jun 27 00:11:36 2015 +0800

    Fix NULL pointer bug, python README

commit 947cfa1226a774e79675c7280d2de6719545ff29
Author: boyu <r02222047@ntu.edu.tw>
Date:   Fri Jun 26 23:50:34 2015 +0800

    fix

commit 225d35ed39d2b9028ae4da1005183ea65a30d5f1
Merge: adf59e5 5d5b3a6
Author: boyu <r02222047@ntu.edu.tw>
Date:   Fri Jun 26 23:49:37 2015 +0800

    Merge branch 'parasearch' of svm:liblr into parasearch

commit adf59e5ecc60e5b943bf0083a8d53156463125a9
Author: boyu <r02222047@ntu.edu.tw>
Date:   Fri Jun 26 23:49:12 2015 +0800

    fix

commit 5d5b3a60edd41b4585c669f210b0bb67f83900b4
Author: Chia-Hua Ho <b95082@csie.ntu.edu.tw>
Date:   Fri Jun 26 21:07:31 2015 +0800

    README

commit bcdde0a776b6f7f1023ec65733d689834c9c445d
Author: boyu <r02222047@ntu.edu.tw>
Date:   Fri Jun 26 19:52:57 2015 +0800

    num_unchanged_w; README

commit 97b4b7a2af81767effadaaa3549f29e5866320e3
Author: Chih-Jen Lin <cjlin@csie.ntu.edu.tw>
Date:   Thu Jun 25 20:07:24 2015 -0700

    minor changes to ensure consistency

commit b982e1c0fed146c82efc616de49f10323fb9ae8f
Author: boyu <r02222047@ntu.edu.tw>
Date:   Fri Jun 26 02:56:29 2015 +0800

    naming and indent

commit a46ba8359de45401f9693ef27c1bfd2ed968118e
Author: boyu <r02222047@ntu.edu.tw>
Date:   Fri Jun 26 02:40:07 2015 +0800

    comments except 67 and interface

commit 75b8a547eb742240127f39599f94427d1bf9300e
Merge: 83dc99b 1c9b998
Author: boyu <r02222047@ntu.edu.tw>
Date:   Thu Jun 25 23:05:58 2015 +0800

    Merge branch 'parasearch' of svm:liblr into parasearch

commit 83dc99b843569c4fa868ad0c9eb669a7e1e9ac9b
Author: boyu <r02222047@ntu.edu.tw>
Date:   Thu Jun 25 23:05:22 2015 +0800

    solver specified

commit 1c9b9989046244ba6b983abffc742f931290c692
Merge: 1be6f38 e25481b
Author: Chia-Hua Ho <b95082@csie.ntu.edu.tw>
Date:   Thu Jun 25 22:24:21 2015 +0800

    Merge branch 'parasearch' of svm.csie.ntu.edu.tw:liblr into parasearch

commit 1be6f384358d79b86d602dcfbae0d2bd0c0ff951
Author: Chia-Hua Ho <b95082@csie.ntu.edu.tw>
Date:   Thu Jun 25 22:23:58 2015 +0800

    MATLAB interface README

commit e25481bd5325a6d2fc076af20ed55c3afd02b62b
Author: boyu <r02222047@ntu.edu.tw>
Date:   Thu Jun 25 20:56:16 2015 +0800

    unchanged_w undo

commit e41852838bae0acec5e64e20e6de308179456597
Author: boyu <r02222047@ntu.edu.tw>
Date:   Thu Jun 25 13:03:41 2015 +0800

    python fix

commit d86ef0693eb18e7b763b3e18581c56b2054283c4
Author: boyu <r02222047@ntu.edu.tw>
Date:   Thu Jun 25 13:00:50 2015 +0800

    comments 6/25

commit ffc1831a7a28113925ed4825ad57e67a4f26124b
Merge: fed01f9 a29f7b1
Author: boyu <r02222047@ntu.edu.tw>
Date:   Thu Jun 25 12:47:23 2015 +0800

    Merge branch 'parasearch' of svm:liblr into parasearch

commit fed01f95c27b720d9755a1f5a0601c76ae0e75f8
Author: boyu <r02222047@ntu.edu.tw>
Date:   Thu Jun 25 12:24:29 2015 +0800

    python

commit a29f7b111a5b464bcd612490da00b4952db95167
Author: Chia-Hua Ho <b95082@csie.ntu.edu.tw>
Date:   Thu Jun 25 11:22:28 2015 +0800

    comments 20150625: train.c matlab/train.c

commit d25ac8709c095be2406cf3eda6125e34197e5966
Merge: e45c874 4dc5739
Author: Chih-Jen Lin <cjlin@csie.ntu.edu.tw>
Date:   Thu Jun 25 02:30:18 2015 +0800

    Merge branch 'parasearch' of svm.csie.ntu.edu.tw:liblr into parasearch

    Conflicts:
     linear.cpp

commit e45c8742e8ef05a47c7c9613807ca7954ab21925
Author: Chih-Jen Lin <cjlin@csie.ntu.edu.tw>
Date:   Thu Jun 25 02:26:37 2015 +0800

    reorder variables' declaration in find_parameter_C

commit 4dc57397a193faf761cf3602d8daae027a6930de
Author: boyu <r02222047@ntu.edu.tw>
Date:   Wed Jun 24 23:25:31 2015 +0800

    fix

commit a37f6180940470d491fa7db0db88a7801903fbf8
Author: boyu <r02222047@ntu.edu.tw>
Date:   Wed Jun 24 18:32:21 2015 +0800

    fix

commit bf8dcd427e82f52da413406eac84189445ef12a9
Author: Chia-Hua Ho <b95082@csie.ntu.edu.tw>
Date:   Wed Jun 24 18:09:49 2015 +0800

    MATLAB interface

commit 10ec863e6560e16fa3edb5b45387961efbd3bbbe
Author: boyu <r02222047@ntu.edu.tw>
Date:   Wed Jun 24 15:55:07 2015 +0800

    comments 0618; merge the two new subroutine

commit 069daa543cfd6bb0f2b858a3a4c6ac60d385f32e
Author: boyu <r02222047@ntu.edu.tw>
Date:   Thu Jun 18 19:08:50 2015 +0800

    CV separate; init_sol in param; remove interactive

commit 64bdcf116bca2ecc2c6956dbe62a8c517c31528f
Author: boyu <r02222047@ntu.edu.tw>
Date:   Sat Jun 13 00:29:30 2015 +0800

    intial check on 2015 6 11

commit ae3fd7104ef587834ab06b5fdadaacaefb757ffe
Author: boyu <r02222047@ntu.edu.tw>
Date:   Fri Jun 12 18:50:06 2015 +0800

    nolable

12 files changed:
Makefile
README
linear.cpp
linear.h
matlab/README
matlab/train.c
python/README
python/liblinear.py
python/liblinearutil.py
train.c
tron.cpp
tron.h

index ac6a32a7a156bff6e762194541378315670af0ad..0534f2bdc4d46a18457e5ff5bf05fdbf2bf88a20 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ CXX ?= g++
 CC ?= gcc
 CFLAGS = -Wall -Wconversion -O3 -fPIC
 LIBS = blas/blas.a
-SHVER = 2
+SHVER = 3
 OS = $(shell uname)
 #LIBS = -lblas
 
diff --git a/README b/README
index 1b7fca560844e4aa0e9105c0245b13a0deadcda1..6f9e783c717ae181b775adbded74f87c8cbcd80d 100644 (file)
--- a/README
+++ b/README
@@ -131,11 +131,16 @@ options:
 -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
 -wi weight: weights adjust the parameter C of different classes (see README for details)
 -v n: n-fold cross validation mode
+-C : find parameter C (only for -s 0 and 2)
 -q : quiet mode (no outputs)
 
 Option -v randomly splits the data into n parts and calculates cross
 validation accuracy on them.
 
+Option -C conducts cross validation under different C values and finds
+the best one. This options is supported only by -s 0 and -s 2. If
+the solver is not specified, -s 2 is used.
+
 Formulations:
 
 For L2-regularized logistic regression (-s 0), we solve
@@ -245,6 +250,12 @@ Do five-fold cross-validation using L2-loss svm.
 Use a smaller stopping tolerance 0.001 than the default
 0.1 if you want more accurate solutions.
 
+> train -C -s 0 data_file
+
+Conduct cross validation many times by logistic regression
+and finds the parameter C which achieves the best cross 
+validation accuracy.
+
 > train -c 10 -w1 2 -w2 5 -w3 2 four_class_data_file
 
 Train four classifiers:
@@ -407,6 +418,22 @@ Library Usage
 
     The format of prob is same as that for train().
 
+- Function: void find_parameter_C(const struct problem *prob, 
+            const struct parameter *param, int nr_fold, double start_C, 
+           double max_C, double *best_C, double *best_rate);
+
+    This function is similar to cross_validation. However, instead of
+    conducting cross validation under a specified parameter C, it 
+    conducts cross validation many times under parameters C = start_C, 
+    2*start_C, 4*start_C, 8*start_C, ..., and finds the best one with
+    the highest cross validation accuracy.
+    
+    If start_C <= 0, then this procedure calculates a small enough C 
+    for prob as the start_C. The procedure stops when the models of 
+    all folds become stable or C reaches max_C. The best C and the 
+    corresponding accuracy are assigned to *best_C and *best_rate,
+    respectively.
+
 - Function: double predict(const model *model_, const feature_node *x);
 
     For a classification model, the predicted class for x is returned.
index 230948c31f42db595e3c56e8408899b7d5c09adb..7ad136ff5b92cecb2b81530b0a23eb00809c110d 100644 (file)
@@ -27,6 +27,7 @@ static void print_string_stdout(const char *s)
        fputs(s,stdout);
        fflush(stdout);
 }
+static void print_null(const char *s) {}
 
 static void (*liblinear_print_string) (const char *) = &print_string_stdout;
 
@@ -2180,14 +2181,18 @@ static void group_classes(const problem *prob, int *nr_class_ret, int **label_re
 
 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
 {
-       double eps=param->eps;
+       //inner and outer tolerances for TRON
+       double eps = param->eps;
+       double eps_cg = 0.1;
+       if(param->init_sol != NULL)
+               eps_cg = 0.5;
+
        int pos = 0;
        int neg = 0;
        for(int i=0;i<prob->l;i++)
                if(prob->y[i] > 0)
                        pos++;
        neg = prob->l - pos;
-
        double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
 
        function *fun_obj=NULL;
@@ -2204,7 +2209,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
                                        C[i] = Cn;
                        }
                        fun_obj=new l2r_lr_fun(prob, C);
-                       TRON tron_obj(fun_obj, primal_solver_tol);
+                       TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
                        tron_obj.set_print_string(liblinear_print_string);
                        tron_obj.tron(w);
                        delete fun_obj;
@@ -2222,7 +2227,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
                                        C[i] = Cn;
                        }
                        fun_obj=new l2r_l2_svc_fun(prob, C);
-                       TRON tron_obj(fun_obj, primal_solver_tol);
+                       TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
                        tron_obj.set_print_string(liblinear_print_string);
                        tron_obj.tron(w);
                        delete fun_obj;
@@ -2287,6 +2292,36 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
        }
 }
 
+// Calculate the initial C for parameter selection
+static double calc_start_C(const problem *prob, const parameter *param)
+{
+       int i;
+       double xTx,max_xTx;
+       max_xTx = 0;
+       for(i=0; i<prob->l; i++)
+       {
+               xTx = 0;
+               feature_node *xi=prob->x[i];
+               while(xi->index != -1)
+               {
+                       double val = xi->value;
+                       xTx += val*val;
+                       xi++;
+               }
+               if(xTx > max_xTx)
+                       max_xTx = xTx;
+       }
+
+       double min_C = 1.0;
+       if(param->solver_type == L2R_LR)
+               min_C = 1.0 / (prob->l * max_xTx);
+       else if(param->solver_type == L2R_L2LOSS_SVC)
+               min_C = 1.0 / (2 * prob->l * max_xTx);
+
+       return pow( 2, floor(log(min_C) / log(2.0)) );
+}
+
+
 //
 // Interface functions
 //
@@ -2310,7 +2345,7 @@ model* train(const problem *prob, const parameter *param)
                model_->w = Malloc(double, w_size);
                model_->nr_class = 2;
                model_->label = NULL;
-               train_one(prob, param, &model_->w[0], 0, 0);
+               train_one(prob, param, model_->w, 0, 0);
        }
        else
        {
@@ -2380,8 +2415,15 @@ model* train(const problem *prob, const parameter *param)
                                        sub_prob.y[k] = +1;
                                for(; k<sub_prob.l; k++)
                                        sub_prob.y[k] = -1;
+                               
+                               if(param->init_sol != NULL)
+                                       for(i=0;i<w_size;i++)
+                                               model_->w[i] = param->init_sol[i];
+                               else
+                                       for(i=0;i<w_size;i++)
+                                               model_->w[i] = 0;
 
-                               train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
+                               train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
                        }
                        else
                        {
@@ -2400,6 +2442,13 @@ model* train(const problem *prob, const parameter *param)
                                        for(; k<sub_prob.l; k++)
                                                sub_prob.y[k] = -1;
 
+                                       if(param->init_sol != NULL)
+                                               for(j=0;j<w_size;j++)
+                                                       w[j] = param->init_sol[j*nr_class+i];
+                                       else
+                                               for(j=0;j<w_size;j++)
+                                                       w[j] = 0;
+
                                        train_one(&sub_prob, param, w, weighted_C[i], param->C);
 
                                        for(int j=0;j<w_size;j++)
@@ -2480,6 +2529,148 @@ void cross_validation(const problem *prob, const parameter *param, int nr_fold,
        free(perm);
 }
 
+void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
+{
+       // variables for CV
+       int i;
+       int *fold_start;
+       int l = prob->l;
+       int *perm = Malloc(int, l);
+       double *target = Malloc(double, prob->l);
+       struct problem *subprob = Malloc(problem,nr_fold);
+
+       // variables for warm start
+       double ratio = 2;
+       double **prev_w = Malloc(double*, nr_fold);
+       for(i = 0; i < nr_fold; i++)
+               prev_w[i] = NULL;
+       int num_unchanged_w = 0;
+       struct parameter param1 = *param;
+       void (*default_print_string) (const char *) = liblinear_print_string;
+
+       if (nr_fold > l)
+       {
+               nr_fold = l;
+               fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
+       }
+       fold_start = Malloc(int,nr_fold+1);
+       for(i=0;i<l;i++) perm[i]=i;
+       for(i=0;i<l;i++)
+       {
+               int j = i+rand()%(l-i);
+               swap(perm[i],perm[j]);
+       }
+       for(i=0;i<=nr_fold;i++)
+               fold_start[i]=i*l/nr_fold;
+
+       for(i=0;i<nr_fold;i++)
+       {
+               int begin = fold_start[i];
+               int end = fold_start[i+1];
+               int j,k;
+
+               subprob[i].bias = prob->bias;
+               subprob[i].n = prob->n;
+               subprob[i].l = l-(end-begin);
+               subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
+               subprob[i].y = Malloc(double,subprob[i].l);
+
+               k=0;
+               for(j=0;j<begin;j++)
+               {
+                       subprob[i].x[k] = prob->x[perm[j]];
+                       subprob[i].y[k] = prob->y[perm[j]];
+                       ++k;
+               }
+               for(j=end;j<l;j++)
+               {
+                       subprob[i].x[k] = prob->x[perm[j]];
+                       subprob[i].y[k] = prob->y[perm[j]];
+                       ++k;
+               }
+
+       }
+
+       *best_rate = 0;
+       if(start_C <= 0)
+               start_C = calc_start_C(prob,param);
+       param1.C = start_C;
+
+       while(param1.C <= max_C)
+       {
+               //Output diabled for running CV at a particular C
+               set_print_string_function(&print_null);
+
+               for(i=0; i<nr_fold; i++)
+               {
+                       int j;
+                       int begin = fold_start[i];
+                       int end = fold_start[i+1];
+
+                       param1.init_sol = prev_w[i];
+                       struct model *submodel = train(&subprob[i],&param1);
+
+                       int total_w_size;
+                       if(submodel->nr_class == 2)
+                               total_w_size = subprob[i].n;
+                       else
+                               total_w_size = subprob[i].n * submodel->nr_class;
+
+                       if(prev_w[i] != NULL && num_unchanged_w >= 0)
+                       {
+                               double norm_w_diff = 0;
+                               for(j=0; j<total_w_size; j++)
+                               {
+                                       norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
+                                       prev_w[i][j] = submodel->w[j];
+                               }
+                               norm_w_diff = sqrt(norm_w_diff);
+
+                               if(norm_w_diff > 1e-15)
+                                       num_unchanged_w = -1;
+                       }
+                       else
+                       {
+                               prev_w[i] = Malloc(double, total_w_size);
+                               for(j=0; j<total_w_size; j++)
+                                       prev_w[i][j] = submodel->w[j];
+                       }
+
+                       for(j=begin; j<end; j++)
+                               target[perm[j]] = predict(submodel,prob->x[perm[j]]);
+
+                       free_and_destroy_model(&submodel);
+               }
+               set_print_string_function(default_print_string);
+
+               int total_correct = 0;
+               for(i=0; i<prob->l; i++)
+                       if(target[i] == prob->y[i])
+                               ++total_correct;
+               double current_rate = (double)total_correct/prob->l;
+               if(current_rate > *best_rate)
+               {
+                       *best_C = param1.C;
+                       *best_rate = current_rate;
+               }
+
+               info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
+               num_unchanged_w++;
+               if(num_unchanged_w == 3)
+                       break;
+               param1.C = param1.C*ratio;
+       }
+
+       if(param1.C > max_C && max_C > start_C) 
+               info("warning: maximum C reached.\n");
+       free(fold_start);
+       free(perm);
+       free(target);
+       for(i=0; i<nr_fold; i++)
+               free(prev_w[i]);
+       free(prev_w);
+}
+
 double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
 {
        int idx;
@@ -2839,6 +3030,8 @@ void destroy_param(parameter* param)
                free(param->weight_label);
        if(param->weight != NULL)
                free(param->weight);
+       if(param->init_sol != NULL)
+               free(param->init_sol);
 }
 
 const char *check_parameter(const problem *prob, const parameter *param)
@@ -2865,6 +3058,10 @@ const char *check_parameter(const problem *prob, const parameter *param)
                && param->solver_type != L2R_L1LOSS_SVR_DUAL)
                return "unknown solver type";
 
+       if(param->init_sol != NULL 
+               && param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC)
+               return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC";
+
        return NULL;
 }
 
index 6b07b477dd1de388f9b70e355b8ae395ac700448..bc6aaf8b6e1ec716902c0cb2d587e6ca06fa2ba7 100644 (file)
--- a/linear.h
+++ b/linear.h
@@ -32,6 +32,7 @@ struct parameter
        int *weight_label;
        double* weight;
        double p;
+       double *init_sol;
 };
 
 struct model
@@ -46,6 +47,7 @@ struct model
 
 struct model* train(const struct problem *prob, const struct parameter *param);
 void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
+void find_parameter_C(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate);
 
 double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
 double predict(const struct model *model_, const struct feature_node *x);
index f2b02b73a7efe91ead0359dede55b371c5753bee..f53f4350cacf00520a2f1dd3adef363f2ec3318d 100644 (file)
@@ -131,7 +131,12 @@ nr_feature, bias, Label, w]:
 
 If the '-v' option is specified, cross validation is conducted and the
 returned model is just a scalar: cross-validation accuracy for 
-classification and mean-squared error for regression.
+classification and mean-squared error for regression. If the '-C' option
+is specified, the best parameter C is found by cross validation. The 
+returned model is a two dimensional vector, where the first value is 
+the best C and the second value is the corresponding cross-validation 
+accuracy. The parameter selection utility is supported by only -s 0
+and -s 2.
 
 Result of Prediction
 ====================
@@ -184,6 +189,11 @@ For probability estimates, you need '-b 1' only in the testing phase:
 
 matlab> [predict_label, accuracy, prob_estimates] = predict(heart_scale_label, heart_scale_inst, model, '-b 1');
 
+Use the best parameter to train (only supported by -s 0 and -s 2):
+
+matlab> best = train(heart_scale_label, heart_scale_inst, '-C -s 0');
+matlab> model = train(heart_scale_label, heart_scale_inst, sprintf('-c %f -s 0', best(1))); % use the same solver: -s 0 
+
 Additional Information
 ======================
 
index 93e3eb823189203670b59db63285d9bc8681bafd..5c3ef4a79c2bcf8eb5ac40178eac2b0908884968 100644 (file)
@@ -1,4 +1,3 @@
-#include <stdio.h>
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
@@ -60,6 +59,7 @@ void exit_with_help()
        "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
        "-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
        "-v n: n-fold cross validation mode\n"
+       "-C : find parameter C (only for -s 0 and 2)\n"
        "-q : quiet mode (no outputs)\n"
        "col:\n"
        "       if 'col' is setted, training_instance_matrix is parsed in column format, otherwise is in row format\n"
@@ -71,11 +71,28 @@ struct parameter param;             // set by parse_command_line
 struct problem prob;           // set by read_problem
 struct model *model_;
 struct feature_node *x_space;
-int cross_validation_flag;
+int flag_cross_validation;
+int flag_find_C;
+int flag_C_specified;
+int flag_solver_specified;
 int col_format_flag;
 int nr_fold;
 double bias;
 
+
+void do_find_parameter_C(double *best_C, double *best_rate)
+{
+       double start_C;
+       double max_C = 1024;
+       if (flag_C_specified)
+               start_C = param.C;
+       else
+               start_C = -1.0;
+       find_parameter_C(&prob, &param, nr_fold, start_C, max_C, best_C, best_rate);
+       mexPrintf("Best C = %lf  CV accuracy = %g%%\n", *best_C, 100.0**best_rate);     
+}
+
+
 double do_cross_validation()
 {
        int i;
@@ -101,8 +118,8 @@ double do_cross_validation()
                         sumyy += y*y;
                         sumvy += v*y;
                 }
-                printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
-                printf("Cross Validation Squared correlation coefficient = %g\n",
+                mexPrintf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
+                mexPrintf("Cross Validation Squared correlation coefficient = %g\n",
                         ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
                         ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
                         );
@@ -113,7 +130,7 @@ double do_cross_validation()
                for(i=0;i<prob.l;i++)
                        if(target[i] == prob.y[i])
                                ++total_correct;
-               printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
+               mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
                retval = 100.0*total_correct/prob.l;
        }
 
@@ -137,8 +154,12 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
        param.nr_weight = 0;
        param.weight_label = NULL;
        param.weight = NULL;
-       cross_validation_flag = 0;
+       param.init_sol = NULL;
+       flag_cross_validation = 0;
        col_format_flag = 0;
+       flag_C_specified = 0;
+       flag_solver_specified = 0;
+       flag_find_C = 0;
        bias = -1;
 
 
@@ -166,15 +187,17 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
        {
                if(argv[i][0] != '-') break;
                ++i;
-               if(i>=argc && argv[i-1][1] != 'q') // since option -q has no parameter
+               if(i>=argc && argv[i-1][1] != 'q' && argv[i-1][1] != 'C') // since options -q and -C have no parameter
                        return 1;
                switch(argv[i-1][1])
                {
                        case 's':
                                param.solver_type = atoi(argv[i]);
+                               flag_solver_specified = 1;
                                break;
                        case 'c':
                                param.C = atof(argv[i]);
+                               flag_C_specified = 1;
                                break;
                        case 'p':
                                param.p = atof(argv[i]);
@@ -186,7 +209,7 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
                                bias = atof(argv[i]);
                                break;
                        case 'v':
-                               cross_validation_flag = 1;
+                               flag_cross_validation = 1;
                                nr_fold = atoi(argv[i]);
                                if(nr_fold < 2)
                                {
@@ -205,6 +228,10 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
                                print_func = &print_null;
                                i--;
                                break;
+                       case 'C':
+                               flag_find_C = 1;
+                               i--;
+                               break;
                        default:
                                mexPrintf("unknown option\n");
                                return 1;
@@ -213,6 +240,23 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
 
        set_print_string_function(print_func);
 
+       // default solver for parameter selection is L2R_L2LOSS_SVC
+       if(flag_find_C)
+       {
+               if(!flag_cross_validation)
+                       nr_fold = 5;
+               if(!flag_solver_specified)
+               {
+                       mexPrintf("Solver not specified. Using -s 2\n");
+                       param.solver_type = L2R_L2LOSS_SVC;
+               }
+               else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
+               {
+                       mexPrintf("Warm-start parameter search only available for -s 0 and -s 2\n");
+                       return 1;
+               }
+       }
+
        if(param.eps == INF)
        {
                switch(param.solver_type)
@@ -406,7 +450,18 @@ void mexFunction( int nlhs, mxArray *plhs[],
                        return;
                }
 
-               if(cross_validation_flag)
+               if (flag_find_C)
+               {
+                       double best_C, best_rate, *ptr;
+                       
+                       do_find_parameter_C(&best_C, &best_rate);       
+                       
+                       plhs[0] = mxCreateDoubleMatrix(2, 1, mxREAL);
+                       ptr = mxGetPr(plhs[0]);
+                       ptr[0] = best_C;
+                       ptr[1] = best_rate;
+               }
+               else if(flag_cross_validation)
                {
                        double *ptr;
                        plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
index e6349cf9fa948560d338c1f135d5b39b51e41356..47e0b4a04dd7f4f334feafbe4cffda3eb2357dc7 100644 (file)
@@ -277,6 +277,11 @@ The above command loads
            structure. If '-v' is specified, cross validation is
            conducted and the returned model is just a scalar: cross-validation
            accuracy for classification and mean-squared error for regression.
+           If the '-C' option is specified, the best parameter C is found 
+          by cross validation. The returned model is a tuple of the best C 
+          and the corresponding cross-validation accuracy. The parameter 
+          selection utility is supported by only -s 0 and -s 2.
+
 
     To train the same data many times with different
     parameters, the second and the third ways should be faster..
@@ -290,6 +295,8 @@ The above command loads
     >>> m = train(prob, '-w1 5 -c 5')
     >>> m = train(prob, param)
     >>> CV_ACC = train(y, x, '-v 3')
+    >>> best_C, best_rate = train(y, x, '-C -s 0')
+    >>> m = train(y, x, '-c {0} -s 0'.format(best_C)) # use the same solver: -s 0
 
 - Function: predict
 
index 9587718d844ffb9bff97f5a19e24fc5027f19456..d6500626f83b867d19a30639a31d489d7317446c 100644 (file)
@@ -16,7 +16,7 @@ try:
        if sys.platform == 'win32':
                liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll'))
        else:
-               liblinear = CDLL(path.join(dirname, '../liblinear.so.2'))
+               liblinear = CDLL(path.join(dirname, '../liblinear.so.3'))
 except:
 # For unix the prefix 'lib' is not considered.
        if find_library('linear'):
@@ -127,8 +127,8 @@ class problem(Structure):
 
 
 class parameter(Structure):
-       _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p"]
-       _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double]
+       _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p", "init_sol"]
+       _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double, POINTER(c_double)]
        _fields_ = genFields(_names, _types)
 
        def __init__(self, options = None):
@@ -152,10 +152,14 @@ class parameter(Structure):
                self.C = 1
                self.p = 0.1
                self.nr_weight = 0
-               self.weight_label = (c_int * 0)()
-               self.weight = (c_double * 0)()
+               self.weight_label = None
+               self.weight = None
+               self.init_sol = None
                self.bias = -1
-               self.cross_validation = False
+               self.flag_cross_validation = False
+               self.flag_C_specified = False
+               self.flag_solver_specified = False
+               self.flag_find_C = False
                self.nr_fold = 0
                self.print_func = cast(None, PRINT_STRING_FUN)
 
@@ -176,9 +180,11 @@ class parameter(Structure):
                        if argv[i] == "-s":
                                i = i + 1
                                self.solver_type = int(argv[i])
+                               self.flag_solver_specified = True
                        elif argv[i] == "-c":
                                i = i + 1
                                self.C = float(argv[i])
+                               self.flag_C_specified = True
                        elif argv[i] == "-p":
                                i = i + 1
                                self.p = float(argv[i])
@@ -190,18 +196,20 @@ class parameter(Structure):
                                self.bias = float(argv[i])
                        elif argv[i] == "-v":
                                i = i + 1
-                               self.cross_validation = 1
+                               self.flag_cross_validation = 1
                                self.nr_fold = int(argv[i])
                                if self.nr_fold < 2 :
                                        raise ValueError("n-fold cross validation: n must >= 2")
                        elif argv[i].startswith("-w"):
                                i = i + 1
                                self.nr_weight += 1
-                               nr_weight = self.nr_weight
                                weight_label += [int(argv[i-1][2:])]
                                weight += [float(argv[i])]
                        elif argv[i] == "-q":
                                self.print_func = PRINT_STRING_FUN(print_null)
+                       elif argv[i] == "-C":
+                               self.flag_find_C = True
+
                        else :
                                raise ValueError("Wrong options")
                        i += 1
@@ -213,6 +221,16 @@ class parameter(Structure):
                        self.weight[i] = weight[i]
                        self.weight_label[i] = weight_label[i]
 
+               # default solver for parameter selection is L2R_L2LOSS_SVC
+               if self.flag_find_C:
+                       if not self.flag_cross_validation:
+                               self.nr_fold = 5
+                       if not self.flag_solver_specified:
+                               self.solver_type = L2R_L2LOSS_SVC
+                               self.flag_solver_specified = True
+                       elif self.solver_type not in [L2R_LR, L2R_L2LOSS_SVC]:
+                               raise ValueError("Warm-start parameter search only available for -s 0 and -s 2")
+
                if self.eps == float('inf'):
                        if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
                                self.eps = 0.01
@@ -280,6 +298,7 @@ def toPyModel(model_ptr):
        return m
 
 fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
+fillprototype(liblinear.find_parameter_C, None, [POINTER(problem), POINTER(parameter), c_int, c_double, c_double, POINTER(c_double), POINTER(c_double)])
 fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
 
 fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
index 40de52a2d4011efe644598f91f08747642eea9bb..5ba5efa3e21ac8ecf9a21ada20f8fcefb74ef12d 100644 (file)
@@ -150,7 +150,21 @@ def train(arg1, arg2=None, arg3=None):
        if err_msg :
                raise ValueError('Error: %s' % err_msg)
 
-       if param.cross_validation:
+       if param.flag_find_C:
+               nr_fold = param.nr_fold
+               best_C = c_double()
+               best_rate = c_double()          
+               max_C = 1024
+               if param.flag_C_specified:
+                       start_C = param.C
+               else:
+                       start_C = -1.0
+               liblinear.find_parameter_C(prob, param, nr_fold, start_C, max_C, best_C, best_rate)
+               print("Best C = %lf  CV accuracy = %g%%\n"% (best_C.value, 100.0*best_rate.value))
+               return best_C.value,best_rate.value
+
+
+       elif param.flag_cross_validation:
                l, nr_fold = prob.l, param.nr_fold
                target = (c_double * l)()
                liblinear.cross_validation(prob, param, nr_fold, target)
diff --git a/train.c b/train.c
index 80d9810778b92787513644afa91f9a12d1230bc9..4df8594e5db618d63d2a777d439299e6d4a8d244 100644 (file)
--- a/train.c
+++ b/train.c
@@ -49,6 +49,7 @@ void exit_with_help()
        "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
        "-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
        "-v n: n-fold cross validation mode\n"
+       "-C : find parameter C (only for -s 0 and 2)\n"
        "-q : quiet mode (no outputs)\n"
        );
        exit(1);
@@ -84,12 +85,16 @@ static char* readline(FILE *input)
 void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
 void read_problem(const char *filename);
 void do_cross_validation();
+void do_find_parameter_C();
 
 struct feature_node *x_space;
 struct parameter param;
 struct problem prob;
 struct model* model_;
 int flag_cross_validation;
+int flag_find_C;
+int flag_C_specified;
+int flag_solver_specified;
 int nr_fold;
 double bias;
 
@@ -109,7 +114,11 @@ int main(int argc, char **argv)
                exit(1);
        }
 
-       if(flag_cross_validation)
+       if (flag_find_C)
+       {
+               do_find_parameter_C();
+       }
+       else if(flag_cross_validation)
        {
                do_cross_validation();
        }
@@ -132,6 +141,18 @@ int main(int argc, char **argv)
        return 0;
 }
 
+void do_find_parameter_C()
+{
+       double start_C, best_C, best_rate;
+       double max_C = 1024;
+       if (flag_C_specified)
+               start_C = param.C;
+       else
+               start_C = -1.0;
+       find_parameter_C(&prob, &param, nr_fold, start_C, max_C, &best_C, &best_rate);
+       printf("Best C = %lf  CV accuracy = %g%%\n", best_C, 100.0*best_rate);
+}
+
 void do_cross_validation()
 {
        int i;
@@ -186,7 +207,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
        param.nr_weight = 0;
        param.weight_label = NULL;
        param.weight = NULL;
+       param.init_sol = NULL;
        flag_cross_validation = 0;
+       flag_C_specified = 0;
+       flag_solver_specified = 0;
+       flag_find_C = 0;
        bias = -1;
 
        // parse options
@@ -199,10 +224,12 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
                {
                        case 's':
                                param.solver_type = atoi(argv[i]);
+                               flag_solver_specified = 1;
                                break;
 
                        case 'c':
                                param.C = atof(argv[i]);
+                               flag_C_specified = 1;
                                break;
 
                        case 'p':
@@ -240,6 +267,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
                                i--;
                                break;
 
+                       case 'C':
+                               flag_find_C = 1;
+                               i--;
+                               break;
+
                        default:
                                fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
                                exit_with_help();
@@ -267,6 +299,23 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
                sprintf(model_file_name,"%s.model",p);
        }
 
+       // default solver for parameter selection is L2R_L2LOSS_SVC
+       if(flag_find_C)
+       {
+               if(!flag_cross_validation)
+                       nr_fold = 5;
+               if(!flag_solver_specified)
+               {
+                       fprintf(stderr, "Solver not specified. Using -s 2\n");
+                       param.solver_type = L2R_L2LOSS_SVC;
+               }
+               else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
+               {
+                       fprintf(stderr, "Warm-start parameter search only available for -s 0 and -s 2\n");
+                       exit_with_help();
+               }
+       }
+
        if(param.eps == INF)
        {
                switch(param.solver_type)
index 7d1fd6eb17fd02ad35c491d8cf4ef394a5ca3f95..2cd283405da8229ca70617cddeb140fd38fd24ff 100644 (file)
--- a/tron.cpp
+++ b/tron.cpp
@@ -41,10 +41,11 @@ void TRON::info(const char *fmt,...)
        (*tron_print_string)(buf);
 }
 
-TRON::TRON(const function *fun_obj, double eps, int max_iter)
+TRON::TRON(const function *fun_obj, double eps, double eps_cg, int max_iter)
 {
        this->fun_obj=const_cast<function *>(fun_obj);
        this->eps=eps;
+       this->eps_cg=eps_cg;
        this->max_iter=max_iter;
        tron_print_string = default_print;
 }
@@ -71,16 +72,21 @@ void TRON::tron(double *w)
        double *w_new = new double[n];
        double *g = new double[n];
 
+       // calculate gradient norm at w=0 for stopping condition.
+       double *w0 = new double[n];
        for (i=0; i<n; i++)
-               w[i] = 0;
+               w0[i] = 0;
+       fun_obj->fun(w0);
+       fun_obj->grad(w0, g);
+       double gnorm0 = dnrm2_(&n, g, &inc);
+       delete [] w0;
 
        f = fun_obj->fun(w);
        fun_obj->grad(w, g);
        delta = dnrm2_(&n, g, &inc);
-       double gnorm1 = delta;
-       double gnorm = gnorm1;
+       double gnorm = delta;
 
-       if (gnorm <= eps*gnorm1)
+       if (gnorm <= eps*gnorm0)
                search = 0;
 
        iter = 1;
@@ -130,7 +136,7 @@ void TRON::tron(double *w)
                        fun_obj->grad(w, g);
 
                        gnorm = dnrm2_(&n, g, &inc);
-                       if (gnorm <= eps*gnorm1)
+                       if (gnorm <= eps*gnorm0)
                                break;
                }
                if (f < -1.0e+32)
@@ -172,7 +178,7 @@ int TRON::trcg(double delta, double *g, double *s, double *r)
                r[i] = -g[i];
                d[i] = r[i];
        }
-       cgtol = 0.1*dnrm2_(&n, g, &inc);
+       cgtol = eps_cg*dnrm2_(&n, g, &inc);
 
        int cg_iter = 0;
        rTr = ddot_(&n, r, &inc, r, &inc);
diff --git a/tron.h b/tron.h
index 3045c2e83a1338eb8ec148ed9bc689ea7d7a71ae..56002dcdbd0224d469196375d1aa9e053ae4addc 100644 (file)
--- a/tron.h
+++ b/tron.h
@@ -15,7 +15,7 @@ public:
 class TRON
 {
 public:
-       TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000);
+       TRON(const function *fun_obj, double eps = 0.1, double eps_cg = 0.1, int max_iter = 1000);
        ~TRON();
 
        void tron(double *w);
@@ -26,6 +26,7 @@ private:
        double norm_inf(int n, double *x);
 
        double eps;
+       double eps_cg;
        int max_iter;
        function *fun_obj;
        void info(const char *fmt,...);