Squashed commit of the following:

author boyu <r02222047@ntu.edu.tw>

Sun, 28 Jun 2015 20:56:32 +0000 (04:56 +0800)

committer boyu <r02222047@ntu.edu.tw>

Sun, 28 Jun 2015 20:56:32 +0000 (04:56 +0800)
author boyu <r02222047@ntu.edu.tw>
Sun, 28 Jun 2015 20:56:32 +0000 (04:56 +0800)
committer boyu <r02222047@ntu.edu.tw>
Sun, 28 Jun 2015 20:56:32 +0000 (04:56 +0800)
diff --git a/Makefile b/Makefile

index ac6a32a7a156bff6e762194541378315670af0ad..0534f2bdc4d46a18457e5ff5bf05fdbf2bf88a20 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ CXX ?= g++
  CC ?= gcc
  CFLAGS = -Wall -Wconversion -O3 -fPIC
  LIBS = blas/blas.a
-SHVER = 2
+SHVER = 3
  OS = $(shell uname)
  #LIBS = -lblas
  
diff --git a/README b/README

index 1b7fca560844e4aa0e9105c0245b13a0deadcda1..6f9e783c717ae181b775adbded74f87c8cbcd80d 100644 (file)
--- a/README
+++ b/README
@@ -131,11 +131,16 @@ options:
  -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
  -wi weight: weights adjust the parameter C of different classes (see README for details)
  -v n: n-fold cross validation mode
+-C : find parameter C (only for -s 0 and 2)
  -q : quiet mode (no outputs)
  
  Option -v randomly splits the data into n parts and calculates cross
  validation accuracy on them.
  
+Option -C conducts cross validation under different C values and finds
+the best one. This options is supported only by -s 0 and -s 2. If
+the solver is not specified, -s 2 is used.
+
  Formulations:
  
  For L2-regularized logistic regression (-s 0), we solve
@@ -245,6 +250,12 @@ Do five-fold cross-validation using L2-loss svm.
  Use a smaller stopping tolerance 0.001 than the default
  0.1 if you want more accurate solutions.
  
+> train -C -s 0 data_file
+
+Conduct cross validation many times by logistic regression
+and finds the parameter C which achieves the best cross 
+validation accuracy.
+
  > train -c 10 -w1 2 -w2 5 -w3 2 four_class_data_file
  
  Train four classifiers:
@@ -407,6 +418,22 @@ Library Usage
  
      The format of prob is same as that for train().
  
+- Function: void find_parameter_C(const struct problem *prob, 
+            const struct parameter *param, int nr_fold, double start_C, 
+           double max_C, double *best_C, double *best_rate);
+
+    This function is similar to cross_validation. However, instead of
+    conducting cross validation under a specified parameter C, it 
+    conducts cross validation many times under parameters C = start_C, 
+    2*start_C, 4*start_C, 8*start_C, ..., and finds the best one with
+    the highest cross validation accuracy.
+    
+    If start_C <= 0, then this procedure calculates a small enough C 
+    for prob as the start_C. The procedure stops when the models of 
+    all folds become stable or C reaches max_C. The best C and the 
+    corresponding accuracy are assigned to *best_C and *best_rate,
+    respectively.
+
  - Function: double predict(const model *model_, const feature_node *x);
  
      For a classification model, the predicted class for x is returned.
diff --git a/linear.cpp b/linear.cpp

index 230948c31f42db595e3c56e8408899b7d5c09adb..7ad136ff5b92cecb2b81530b0a23eb00809c110d 100644 (file)
--- a/linear.cpp
+++ b/linear.cpp
@@ -27,6 +27,7 @@ static void print_string_stdout(const char *s)
         fputs(s,stdout);
         fflush(stdout);
  }
+static void print_null(const char *s) {}
  
  static void (*liblinear_print_string) (const char *) = &print_string_stdout;
  
@@ -2180,14 +2181,18 @@ static void group_classes(const problem *prob, int *nr_class_ret, int **label_re
  
  static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
  {
-       double eps=param->eps;
+       //inner and outer tolerances for TRON
+       double eps = param->eps;
+       double eps_cg = 0.1;
+       if(param->init_sol != NULL)
+               eps_cg = 0.5;
+
         int pos = 0;
         int neg = 0;
         for(int i=0;i<prob->l;i++)
                 if(prob->y[i] > 0)
                         pos++;
         neg = prob->l - pos;
-
         double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
  
         function *fun_obj=NULL;
@@ -2204,7 +2209,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
                                         C[i] = Cn;
                         }
                         fun_obj=new l2r_lr_fun(prob, C);
-                       TRON tron_obj(fun_obj, primal_solver_tol);
+                       TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
                         tron_obj.set_print_string(liblinear_print_string);
                         tron_obj.tron(w);
                         delete fun_obj;
@@ -2222,7 +2227,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
                                         C[i] = Cn;
                         }
                         fun_obj=new l2r_l2_svc_fun(prob, C);
-                       TRON tron_obj(fun_obj, primal_solver_tol);
+                       TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
                         tron_obj.set_print_string(liblinear_print_string);
                         tron_obj.tron(w);
                         delete fun_obj;
@@ -2287,6 +2292,36 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
         }
  }
  
+// Calculate the initial C for parameter selection
+static double calc_start_C(const problem *prob, const parameter *param)
+{
+       int i;
+       double xTx,max_xTx;
+       max_xTx = 0;
+       for(i=0; i<prob->l; i++)
+       {
+               xTx = 0;
+               feature_node *xi=prob->x[i];
+               while(xi->index != -1)
+               {
+                       double val = xi->value;
+                       xTx += val*val;
+                       xi++;
+               }
+               if(xTx > max_xTx)
+                       max_xTx = xTx;
+       }
+
+       double min_C = 1.0;
+       if(param->solver_type == L2R_LR)
+               min_C = 1.0 / (prob->l * max_xTx);
+       else if(param->solver_type == L2R_L2LOSS_SVC)
+               min_C = 1.0 / (2 * prob->l * max_xTx);
+
+       return pow( 2, floor(log(min_C) / log(2.0)) );
+}
+
+
  //
  // Interface functions
  //
@@ -2310,7 +2345,7 @@ model* train(const problem *prob, const parameter *param)
                 model_->w = Malloc(double, w_size);
                 model_->nr_class = 2;
                 model_->label = NULL;
-               train_one(prob, param, &model_->w[0], 0, 0);
+               train_one(prob, param, model_->w, 0, 0);
         }
         else
         {
@@ -2380,8 +2415,15 @@ model* train(const problem *prob, const parameter *param)
                                         sub_prob.y[k] = +1;
                                 for(; k<sub_prob.l; k++)
                                         sub_prob.y[k] = -1;
+                               
+                               if(param->init_sol != NULL)
+                                       for(i=0;i<w_size;i++)
+                                               model_->w[i] = param->init_sol[i];
+                               else
+                                       for(i=0;i<w_size;i++)
+                                               model_->w[i] = 0;
  
-                               train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
+                               train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
                         }
                         else
                         {
@@ -2400,6 +2442,13 @@ model* train(const problem *prob, const parameter *param)
                                         for(; k<sub_prob.l; k++)
                                                 sub_prob.y[k] = -1;
  
+                                       if(param->init_sol != NULL)
+                                               for(j=0;j<w_size;j++)
+                                                       w[j] = param->init_sol[j*nr_class+i];
+                                       else
+                                               for(j=0;j<w_size;j++)
+                                                       w[j] = 0;
+
                                         train_one(&sub_prob, param, w, weighted_C[i], param->C);
  
                                         for(int j=0;j<w_size;j++)
@@ -2480,6 +2529,148 @@ void cross_validation(const problem *prob, const parameter *param, int nr_fold,
         free(perm);
  }
  
+void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
+{
+       // variables for CV
+       int i;
+       int *fold_start;
+       int l = prob->l;
+       int *perm = Malloc(int, l);
+       double *target = Malloc(double, prob->l);
+       struct problem *subprob = Malloc(problem,nr_fold);
+
+       // variables for warm start
+       double ratio = 2;
+       double **prev_w = Malloc(double*, nr_fold);
+       for(i = 0; i < nr_fold; i++)
+               prev_w[i] = NULL;
+       int num_unchanged_w = 0;
+       struct parameter param1 = *param;
+       void (*default_print_string) (const char *) = liblinear_print_string;
+
+       if (nr_fold > l)
+       {
+               nr_fold = l;
+               fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
+       }
+       fold_start = Malloc(int,nr_fold+1);
+       for(i=0;i<l;i++) perm[i]=i;
+       for(i=0;i<l;i++)
+       {
+               int j = i+rand()%(l-i);
+               swap(perm[i],perm[j]);
+       }
+       for(i=0;i<=nr_fold;i++)
+               fold_start[i]=i*l/nr_fold;
+
+       for(i=0;i<nr_fold;i++)
+       {
+               int begin = fold_start[i];
+               int end = fold_start[i+1];
+               int j,k;
+
+               subprob[i].bias = prob->bias;
+               subprob[i].n = prob->n;
+               subprob[i].l = l-(end-begin);
+               subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
+               subprob[i].y = Malloc(double,subprob[i].l);
+
+               k=0;
+               for(j=0;j<begin;j++)
+               {
+                       subprob[i].x[k] = prob->x[perm[j]];
+                       subprob[i].y[k] = prob->y[perm[j]];
+                       ++k;
+               }
+               for(j=end;j<l;j++)
+               {
+                       subprob[i].x[k] = prob->x[perm[j]];
+                       subprob[i].y[k] = prob->y[perm[j]];
+                       ++k;
+               }
+
+       }
+
+       *best_rate = 0;
+       if(start_C <= 0)
+               start_C = calc_start_C(prob,param);
+       param1.C = start_C;
+
+       while(param1.C <= max_C)
+       {
+               //Output diabled for running CV at a particular C
+               set_print_string_function(&print_null);
+
+               for(i=0; i<nr_fold; i++)
+               {
+                       int j;
+                       int begin = fold_start[i];
+                       int end = fold_start[i+1];
+
+                       param1.init_sol = prev_w[i];
+                       struct model *submodel = train(&subprob[i],&param1);
+
+                       int total_w_size;
+                       if(submodel->nr_class == 2)
+                               total_w_size = subprob[i].n;
+                       else
+                               total_w_size = subprob[i].n * submodel->nr_class;
+
+                       if(prev_w[i] != NULL && num_unchanged_w >= 0)
+                       {
+                               double norm_w_diff = 0;
+                               for(j=0; j<total_w_size; j++)
+                               {
+                                       norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
+                                       prev_w[i][j] = submodel->w[j];
+                               }
+                               norm_w_diff = sqrt(norm_w_diff);
+
+                               if(norm_w_diff > 1e-15)
+                                       num_unchanged_w = -1;
+                       }
+                       else
+                       {
+                               prev_w[i] = Malloc(double, total_w_size);
+                               for(j=0; j<total_w_size; j++)
+                                       prev_w[i][j] = submodel->w[j];
+                       }
+
+                       for(j=begin; j<end; j++)
+                               target[perm[j]] = predict(submodel,prob->x[perm[j]]);
+
+                       free_and_destroy_model(&submodel);
+               }
+               set_print_string_function(default_print_string);
+
+               int total_correct = 0;
+               for(i=0; i<prob->l; i++)
+                       if(target[i] == prob->y[i])
+                               ++total_correct;
+               double current_rate = (double)total_correct/prob->l;
+               if(current_rate > *best_rate)
+               {
+                       *best_C = param1.C;
+                       *best_rate = current_rate;
+               }
+
+               info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
+               num_unchanged_w++;
+               if(num_unchanged_w == 3)
+                       break;
+               param1.C = param1.C*ratio;
+       }
+
+       if(param1.C > max_C && max_C > start_C) 
+               info("warning: maximum C reached.\n");
+       free(fold_start);
+       free(perm);
+       free(target);
+       for(i=0; i<nr_fold; i++)
+               free(prev_w[i]);
+       free(prev_w);
+}
+
  double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
  {
         int idx;
@@ -2839,6 +3030,8 @@ void destroy_param(parameter* param)
                 free(param->weight_label);
         if(param->weight != NULL)
                 free(param->weight);
+       if(param->init_sol != NULL)
+               free(param->init_sol);
  }
  
  const char *check_parameter(const problem *prob, const parameter *param)
@@ -2865,6 +3058,10 @@ const char *check_parameter(const problem *prob, const parameter *param)
                 && param->solver_type != L2R_L1LOSS_SVR_DUAL)
                 return "unknown solver type";
  
+       if(param->init_sol != NULL 
+               && param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC)
+               return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC";
+
         return NULL;
  }
  
diff --git a/linear.h b/linear.h

index 6b07b477dd1de388f9b70e355b8ae395ac700448..bc6aaf8b6e1ec716902c0cb2d587e6ca06fa2ba7 100644 (file)
--- a/linear.h
+++ b/linear.h
@@ -32,6 +32,7 @@ struct parameter
         int *weight_label;
         double* weight;
         double p;
+       double *init_sol;
  };
  
  struct model
@@ -46,6 +47,7 @@ struct model
  
  struct model* train(const struct problem *prob, const struct parameter *param);
  void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
+void find_parameter_C(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate);
  
  double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
  double predict(const struct model *model_, const struct feature_node *x);
diff --git a/matlab/README b/matlab/README

index f2b02b73a7efe91ead0359dede55b371c5753bee..f53f4350cacf00520a2f1dd3adef363f2ec3318d 100644 (file)
--- a/matlab/README
+++ b/matlab/README
@@ -131,7 +131,12 @@ nr_feature, bias, Label, w]:
  
  If the '-v' option is specified, cross validation is conducted and the
  returned model is just a scalar: cross-validation accuracy for 
-classification and mean-squared error for regression.
+classification and mean-squared error for regression. If the '-C' option
+is specified, the best parameter C is found by cross validation. The 
+returned model is a two dimensional vector, where the first value is 
+the best C and the second value is the corresponding cross-validation 
+accuracy. The parameter selection utility is supported by only -s 0
+and -s 2.
  
  Result of Prediction
  ====================
@@ -184,6 +189,11 @@ For probability estimates, you need '-b 1' only in the testing phase:
  
  matlab> [predict_label, accuracy, prob_estimates] = predict(heart_scale_label, heart_scale_inst, model, '-b 1');
  
+Use the best parameter to train (only supported by -s 0 and -s 2):
+
+matlab> best = train(heart_scale_label, heart_scale_inst, '-C -s 0');
+matlab> model = train(heart_scale_label, heart_scale_inst, sprintf('-c %f -s 0', best(1))); % use the same solver: -s 0 
+
  Additional Information
  ======================
  
diff --git a/matlab/train.c b/matlab/train.c

index 93e3eb823189203670b59db63285d9bc8681bafd..5c3ef4a79c2bcf8eb5ac40178eac2b0908884968 100644 (file)
--- a/matlab/train.c
+++ b/matlab/train.c
@@ -1,4 +1,3 @@
-#include <stdio.h>
  #include <math.h>
  #include <stdlib.h>
  #include <string.h>
@@ -60,6 +59,7 @@ void exit_with_help()
         "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
         "-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
         "-v n: n-fold cross validation mode\n"
+       "-C : find parameter C (only for -s 0 and 2)\n"
         "-q : quiet mode (no outputs)\n"
         "col:\n"
         "       if 'col' is setted, training_instance_matrix is parsed in column format, otherwise is in row format\n"
@@ -71,11 +71,28 @@ struct parameter param;             // set by parse_command_line
  struct problem prob;           // set by read_problem
  struct model *model_;
  struct feature_node *x_space;
-int cross_validation_flag;
+int flag_cross_validation;
+int flag_find_C;
+int flag_C_specified;
+int flag_solver_specified;
  int col_format_flag;
  int nr_fold;
  double bias;
  
+
+void do_find_parameter_C(double *best_C, double *best_rate)
+{
+       double start_C;
+       double max_C = 1024;
+       if (flag_C_specified)
+               start_C = param.C;
+       else
+               start_C = -1.0;
+       find_parameter_C(&prob, &param, nr_fold, start_C, max_C, best_C, best_rate);
+       mexPrintf("Best C = %lf  CV accuracy = %g%%\n", *best_C, 100.0**best_rate);     
+}
+
+
  double do_cross_validation()
  {
         int i;
@@ -101,8 +118,8 @@ double do_cross_validation()
                          sumyy += y*y;
                          sumvy += v*y;
                  }
-                printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
-                printf("Cross Validation Squared correlation coefficient = %g\n",
+                mexPrintf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
+                mexPrintf("Cross Validation Squared correlation coefficient = %g\n",
                          ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
                          ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
                          );
@@ -113,7 +130,7 @@ double do_cross_validation()
                 for(i=0;i<prob.l;i++)
                         if(target[i] == prob.y[i])
                                 ++total_correct;
-               printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
+               mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
                 retval = 100.0*total_correct/prob.l;
         }
  
@@ -137,8 +154,12 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
         param.nr_weight = 0;
         param.weight_label = NULL;
         param.weight = NULL;
-       cross_validation_flag = 0;
+       param.init_sol = NULL;
+       flag_cross_validation = 0;
         col_format_flag = 0;
+       flag_C_specified = 0;
+       flag_solver_specified = 0;
+       flag_find_C = 0;
         bias = -1;
  
  
@@ -166,15 +187,17 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
         {
                 if(argv[i][0] != '-') break;
                 ++i;
-               if(i>=argc && argv[i-1][1] != 'q') // since option -q has no parameter
+               if(i>=argc && argv[i-1][1] != 'q' && argv[i-1][1] != 'C') // since options -q and -C have no parameter
                         return 1;
                 switch(argv[i-1][1])
                 {
                         case 's':
                                 param.solver_type = atoi(argv[i]);
+                               flag_solver_specified = 1;
                                 break;
                         case 'c':
                                 param.C = atof(argv[i]);
+                               flag_C_specified = 1;
                                 break;
                         case 'p':
                                 param.p = atof(argv[i]);
@@ -186,7 +209,7 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
                                 bias = atof(argv[i]);
                                 break;
                         case 'v':
-                               cross_validation_flag = 1;
+                               flag_cross_validation = 1;
                                 nr_fold = atoi(argv[i]);
                                 if(nr_fold < 2)
                                 {
@@ -205,6 +228,10 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
                                 print_func = &print_null;
                                 i--;
                                 break;
+                       case 'C':
+                               flag_find_C = 1;
+                               i--;
+                               break;
                         default:
                                 mexPrintf("unknown option\n");
                                 return 1;
@@ -213,6 +240,23 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
  
         set_print_string_function(print_func);
  
+       // default solver for parameter selection is L2R_L2LOSS_SVC
+       if(flag_find_C)
+       {
+               if(!flag_cross_validation)
+                       nr_fold = 5;
+               if(!flag_solver_specified)
+               {
+                       mexPrintf("Solver not specified. Using -s 2\n");
+                       param.solver_type = L2R_L2LOSS_SVC;
+               }
+               else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
+               {
+                       mexPrintf("Warm-start parameter search only available for -s 0 and -s 2\n");
+                       return 1;
+               }
+       }
+
         if(param.eps == INF)
         {
                 switch(param.solver_type)
@@ -406,7 +450,18 @@ void mexFunction( int nlhs, mxArray *plhs[],
                         return;
                 }
  
-               if(cross_validation_flag)
+               if (flag_find_C)
+               {
+                       double best_C, best_rate, *ptr;
+                       
+                       do_find_parameter_C(&best_C, &best_rate);       
+                       
+                       plhs[0] = mxCreateDoubleMatrix(2, 1, mxREAL);
+                       ptr = mxGetPr(plhs[0]);
+                       ptr[0] = best_C;
+                       ptr[1] = best_rate;
+               }
+               else if(flag_cross_validation)
                 {
                         double *ptr;
                         plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
diff --git a/python/README b/python/README

index e6349cf9fa948560d338c1f135d5b39b51e41356..47e0b4a04dd7f4f334feafbe4cffda3eb2357dc7 100644 (file)
--- a/python/README
+++ b/python/README
@@ -277,6 +277,11 @@ The above command loads
             structure. If '-v' is specified, cross validation is
             conducted and the returned model is just a scalar: cross-validation
             accuracy for classification and mean-squared error for regression.
+           If the '-C' option is specified, the best parameter C is found 
+          by cross validation. The returned model is a tuple of the best C 
+          and the corresponding cross-validation accuracy. The parameter 
+          selection utility is supported by only -s 0 and -s 2.
+
  
      To train the same data many times with different
      parameters, the second and the third ways should be faster..
@@ -290,6 +295,8 @@ The above command loads
      >>> m = train(prob, '-w1 5 -c 5')
      >>> m = train(prob, param)
      >>> CV_ACC = train(y, x, '-v 3')
+    >>> best_C, best_rate = train(y, x, '-C -s 0')
+    >>> m = train(y, x, '-c {0} -s 0'.format(best_C)) # use the same solver: -s 0
  
  - Function: predict
  
diff --git a/python/liblinear.py b/python/liblinear.py

index 9587718d844ffb9bff97f5a19e24fc5027f19456..d6500626f83b867d19a30639a31d489d7317446c 100644 (file)
--- a/python/liblinear.py
+++ b/python/liblinear.py
@@ -16,7 +16,7 @@ try:
         if sys.platform == 'win32':
                 liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll'))
         else:
-               liblinear = CDLL(path.join(dirname, '../liblinear.so.2'))
+               liblinear = CDLL(path.join(dirname, '../liblinear.so.3'))
  except:
  # For unix the prefix 'lib' is not considered.
         if find_library('linear'):
@@ -127,8 +127,8 @@ class problem(Structure):
  
  
  class parameter(Structure):
-       _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p"]
-       _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double]
+       _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p", "init_sol"]
+       _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double, POINTER(c_double)]
         _fields_ = genFields(_names, _types)
  
         def __init__(self, options = None):
@@ -152,10 +152,14 @@ class parameter(Structure):
                 self.C = 1
                 self.p = 0.1
                 self.nr_weight = 0
-               self.weight_label = (c_int * 0)()
-               self.weight = (c_double * 0)()
+               self.weight_label = None
+               self.weight = None
+               self.init_sol = None
                 self.bias = -1
-               self.cross_validation = False
+               self.flag_cross_validation = False
+               self.flag_C_specified = False
+               self.flag_solver_specified = False
+               self.flag_find_C = False
                 self.nr_fold = 0
                 self.print_func = cast(None, PRINT_STRING_FUN)
  
@@ -176,9 +180,11 @@ class parameter(Structure):
                         if argv[i] == "-s":
                                 i = i + 1
                                 self.solver_type = int(argv[i])
+                               self.flag_solver_specified = True
                         elif argv[i] == "-c":
                                 i = i + 1
                                 self.C = float(argv[i])
+                               self.flag_C_specified = True
                         elif argv[i] == "-p":
                                 i = i + 1
                                 self.p = float(argv[i])
@@ -190,18 +196,20 @@ class parameter(Structure):
                                 self.bias = float(argv[i])
                         elif argv[i] == "-v":
                                 i = i + 1
-                               self.cross_validation = 1
+                               self.flag_cross_validation = 1
                                 self.nr_fold = int(argv[i])
                                 if self.nr_fold < 2 :
                                         raise ValueError("n-fold cross validation: n must >= 2")
                         elif argv[i].startswith("-w"):
                                 i = i + 1
                                 self.nr_weight += 1
-                               nr_weight = self.nr_weight
                                 weight_label += [int(argv[i-1][2:])]
                                 weight += [float(argv[i])]
                         elif argv[i] == "-q":
                                 self.print_func = PRINT_STRING_FUN(print_null)
+                       elif argv[i] == "-C":
+                               self.flag_find_C = True
+
                         else :
                                 raise ValueError("Wrong options")
                         i += 1
@@ -213,6 +221,16 @@ class parameter(Structure):
                         self.weight[i] = weight[i]
                         self.weight_label[i] = weight_label[i]
  
+               # default solver for parameter selection is L2R_L2LOSS_SVC
+               if self.flag_find_C:
+                       if not self.flag_cross_validation:
+                               self.nr_fold = 5
+                       if not self.flag_solver_specified:
+                               self.solver_type = L2R_L2LOSS_SVC
+                               self.flag_solver_specified = True
+                       elif self.solver_type not in [L2R_LR, L2R_L2LOSS_SVC]:
+                               raise ValueError("Warm-start parameter search only available for -s 0 and -s 2")
+
                 if self.eps == float('inf'):
                         if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
                                 self.eps = 0.01
@@ -280,6 +298,7 @@ def toPyModel(model_ptr):
         return m
  
  fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
+fillprototype(liblinear.find_parameter_C, None, [POINTER(problem), POINTER(parameter), c_int, c_double, c_double, POINTER(c_double), POINTER(c_double)])
  fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
  
  fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
diff --git a/python/liblinearutil.py b/python/liblinearutil.py

index 40de52a2d4011efe644598f91f08747642eea9bb..5ba5efa3e21ac8ecf9a21ada20f8fcefb74ef12d 100644 (file)
--- a/python/liblinearutil.py
+++ b/python/liblinearutil.py
@@ -150,7 +150,21 @@ def train(arg1, arg2=None, arg3=None):
         if err_msg :
                 raise ValueError('Error: %s' % err_msg)
  
-       if param.cross_validation:
+       if param.flag_find_C:
+               nr_fold = param.nr_fold
+               best_C = c_double()
+               best_rate = c_double()          
+               max_C = 1024
+               if param.flag_C_specified:
+                       start_C = param.C
+               else:
+                       start_C = -1.0
+               liblinear.find_parameter_C(prob, param, nr_fold, start_C, max_C, best_C, best_rate)
+               print("Best C = %lf  CV accuracy = %g%%\n"% (best_C.value, 100.0*best_rate.value))
+               return best_C.value,best_rate.value
+
+
+       elif param.flag_cross_validation:
                 l, nr_fold = prob.l, param.nr_fold
                 target = (c_double * l)()
                 liblinear.cross_validation(prob, param, nr_fold, target)
diff --git a/train.c b/train.c

index 80d9810778b92787513644afa91f9a12d1230bc9..4df8594e5db618d63d2a777d439299e6d4a8d244 100644 (file)
--- a/train.c
+++ b/train.c
@@ -49,6 +49,7 @@ void exit_with_help()
         "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
         "-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
         "-v n: n-fold cross validation mode\n"
+       "-C : find parameter C (only for -s 0 and 2)\n"
         "-q : quiet mode (no outputs)\n"
         );
         exit(1);
@@ -84,12 +85,16 @@ static char* readline(FILE *input)
  void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
  void read_problem(const char *filename);
  void do_cross_validation();
+void do_find_parameter_C();
  
  struct feature_node *x_space;
  struct parameter param;
  struct problem prob;
  struct model* model_;
  int flag_cross_validation;
+int flag_find_C;
+int flag_C_specified;
+int flag_solver_specified;
  int nr_fold;
  double bias;
  
@@ -109,7 +114,11 @@ int main(int argc, char **argv)
                 exit(1);
         }
  
-       if(flag_cross_validation)
+       if (flag_find_C)
+       {
+               do_find_parameter_C();
+       }
+       else if(flag_cross_validation)
         {
                 do_cross_validation();
         }
@@ -132,6 +141,18 @@ int main(int argc, char **argv)
         return 0;
  }
  
+void do_find_parameter_C()
+{
+       double start_C, best_C, best_rate;
+       double max_C = 1024;
+       if (flag_C_specified)
+               start_C = param.C;
+       else
+               start_C = -1.0;
+       find_parameter_C(&prob, &param, nr_fold, start_C, max_C, &best_C, &best_rate);
+       printf("Best C = %lf  CV accuracy = %g%%\n", best_C, 100.0*best_rate);
+}
+
  void do_cross_validation()
  {
         int i;
@@ -186,7 +207,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
         param.nr_weight = 0;
         param.weight_label = NULL;
         param.weight = NULL;
+       param.init_sol = NULL;
         flag_cross_validation = 0;
+       flag_C_specified = 0;
+       flag_solver_specified = 0;
+       flag_find_C = 0;
         bias = -1;
  
         // parse options
@@ -199,10 +224,12 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
                 {
                         case 's':
                                 param.solver_type = atoi(argv[i]);
+                               flag_solver_specified = 1;
                                 break;
  
                         case 'c':
                                 param.C = atof(argv[i]);
+                               flag_C_specified = 1;
                                 break;
  
                         case 'p':
@@ -240,6 +267,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
                                 i--;
                                 break;
  
+                       case 'C':
+                               flag_find_C = 1;
+                               i--;
+                               break;
+
                         default:
                                 fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
                                 exit_with_help();
@@ -267,6 +299,23 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
                 sprintf(model_file_name,"%s.model",p);
         }
  
+       // default solver for parameter selection is L2R_L2LOSS_SVC
+       if(flag_find_C)
+       {
+               if(!flag_cross_validation)
+                       nr_fold = 5;
+               if(!flag_solver_specified)
+               {
+                       fprintf(stderr, "Solver not specified. Using -s 2\n");
+                       param.solver_type = L2R_L2LOSS_SVC;
+               }
+               else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
+               {
+                       fprintf(stderr, "Warm-start parameter search only available for -s 0 and -s 2\n");
+                       exit_with_help();
+               }
+       }
+
         if(param.eps == INF)
         {
                 switch(param.solver_type)
diff --git a/tron.cpp b/tron.cpp

index 7d1fd6eb17fd02ad35c491d8cf4ef394a5ca3f95..2cd283405da8229ca70617cddeb140fd38fd24ff 100644 (file)
--- a/tron.cpp
+++ b/tron.cpp
@@ -41,10 +41,11 @@ void TRON::info(const char *fmt,...)
         (*tron_print_string)(buf);
  }
  
-TRON::TRON(const function *fun_obj, double eps, int max_iter)
+TRON::TRON(const function *fun_obj, double eps, double eps_cg, int max_iter)
  {
         this->fun_obj=const_cast<function *>(fun_obj);
         this->eps=eps;
+       this->eps_cg=eps_cg;
         this->max_iter=max_iter;
         tron_print_string = default_print;
  }
@@ -71,16 +72,21 @@ void TRON::tron(double *w)
         double *w_new = new double[n];
         double *g = new double[n];
  
+       // calculate gradient norm at w=0 for stopping condition.
+       double *w0 = new double[n];
         for (i=0; i<n; i++)
-               w[i] = 0;
+               w0[i] = 0;
+       fun_obj->fun(w0);
+       fun_obj->grad(w0, g);
+       double gnorm0 = dnrm2_(&n, g, &inc);
+       delete [] w0;
  
         f = fun_obj->fun(w);
         fun_obj->grad(w, g);
         delta = dnrm2_(&n, g, &inc);
-       double gnorm1 = delta;
-       double gnorm = gnorm1;
+       double gnorm = delta;
  
-       if (gnorm <= eps*gnorm1)
+       if (gnorm <= eps*gnorm0)
                 search = 0;
  
         iter = 1;
@@ -130,7 +136,7 @@ void TRON::tron(double *w)
                         fun_obj->grad(w, g);
  
                         gnorm = dnrm2_(&n, g, &inc);
-                       if (gnorm <= eps*gnorm1)
+                       if (gnorm <= eps*gnorm0)
                                 break;
                 }
                 if (f < -1.0e+32)
@@ -172,7 +178,7 @@ int TRON::trcg(double delta, double *g, double *s, double *r)
                 r[i] = -g[i];
                 d[i] = r[i];
         }
-       cgtol = 0.1*dnrm2_(&n, g, &inc);
+       cgtol = eps_cg*dnrm2_(&n, g, &inc);
  
         int cg_iter = 0;
         rTr = ddot_(&n, r, &inc, r, &inc);
diff --git a/tron.h b/tron.h

index 3045c2e83a1338eb8ec148ed9bc689ea7d7a71ae..56002dcdbd0224d469196375d1aa9e053ae4addc 100644 (file)
--- a/tron.h
+++ b/tron.h
@@ -15,7 +15,7 @@ public:
  class TRON
  {
  public:
-       TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000);
+       TRON(const function *fun_obj, double eps = 0.1, double eps_cg = 0.1, int max_iter = 1000);
         ~TRON();
  
         void tron(double *w);
@@ -26,6 +26,7 @@ private:
         double norm_inf(int n, double *x);
  
         double eps;
+       double eps_cg;
         int max_iter;
         function *fun_obj;
         void info(const char *fmt,...);
author	boyu <r02222047@ntu.edu.tw>
	Sun, 28 Jun 2015 20:56:32 +0000 (04:56 +0800)
committer	boyu <r02222047@ntu.edu.tw>
	Sun, 28 Jun 2015 20:56:32 +0000 (04:56 +0800)
Makefile		patch \| blob \| history
README		patch \| blob \| history
linear.cpp		patch \| blob \| history
linear.h		patch \| blob \| history
matlab/README		patch \| blob \| history
matlab/train.c		patch \| blob \| history
python/README		patch \| blob \| history
python/liblinear.py		patch \| blob \| history
python/liblinearutil.py		patch \| blob \| history
train.c		patch \| blob \| history
tron.cpp		patch \| blob \| history
tron.h		patch \| blob \| history