Add -s 11, 12, 13 to python and matlab interfaces.

author popo <popo@16e7d947-dcc2-db11-b54a-0017319806e7>

Tue, 17 Apr 2012 04:50:50 +0000 (04:50 +0000)

committer popo <popo@16e7d947-dcc2-db11-b54a-0017319806e7>

Tue, 17 Apr 2012 04:50:50 +0000 (04:50 +0000)
author popo <popo@16e7d947-dcc2-db11-b54a-0017319806e7>
Tue, 17 Apr 2012 04:50:50 +0000 (04:50 +0000)
committer popo <popo@16e7d947-dcc2-db11-b54a-0017319806e7>
Tue, 17 Apr 2012 04:50:50 +0000 (04:50 +0000)
diff --git a/matlab/README b/matlab/README

index 402a1552a87608187d2a9e93c0bb434aef7f6961..d0fc6460431dd8604858b3134e54d704e870a195 100644 (file)
--- a/matlab/README
+++ b/matlab/README
@@ -18,7 +18,7 @@ Introduction
  ============
  
  This tool provides a simple interface to LIBLINEAR, a library for
-large-scale regularized linear classification
+large-scale regularized linear classification and regression
  (http://www.csie.ntu.edu.tw/~cjlin/liblinear).  It is very easy to use
  as the usage and the way of specifying parameters are the same as that
  of LIBLINEAR.
@@ -117,11 +117,11 @@ prediction.  It is a structure and is organized as [Parameters, nr_class,
  nr_feature, bias, Label, w]:
  
          -Parameters: Parameters
-        -nr_class: number of classes
+        -nr_class: number of classes; = 2 for regression
          -nr_feature: number of features in training data (without including the bias term)
          -bias: If >= 0, we assume one additional feature is added to the end
              of each data instance.
-        -Label: label of each class
+        -Label: label of each class; empty for regression
          -w: a nr_w-by-n matrix for the weights, where n is nr_feature
              or nr_feature+1 depending on the existence of the bias term.
              nr_w is 1 if nr_class=2 and -s is not 4 (i.e., not
@@ -129,14 +129,16 @@ nr_feature, bias, Label, w]:
              nr_class otherwise.
  
  If the '-v' option is specified, cross validation is conducted and the
-returned model is just a scalar: cross-validation accuracy.
+returned model is just a scalar: cross-validation accuracy for 
+classification and mean-squared error for regression.
  
  Result of Prediction
  ====================
  
  The function 'predict' has three outputs. The first one,
-predicted_label, is a vector of predicted labels.
-The second output is a scalar meaning accuracy.
+predicted_label, is a vector of predicted labels. The second output,
+accuracy, is a vector including accuracy (for classification), mean
+squared error, and squared correlation coefficient (for regression).
  The third is a matrix containing decision values or probability
  estimates (if '-b 1' is specified). If k is the number of classes
  and k' is the number of classifiers (k'=1 if k=2, otherwise k'=k), for decision values,
diff --git a/matlab/linear_model_matlab.c b/matlab/linear_model_matlab.c

index 2d4076e6792decba352bed2406f82f6cfc5f6d34..7b5129e3b617f4686b0f978f7301989d4c3f21df 100644 (file)
--- a/matlab/linear_model_matlab.c
+++ b/matlab/linear_model_matlab.c
@@ -154,10 +154,14 @@ const char *matlab_matrix_to_model(struct model *model_, const mxArray *matlab_s
                 n=model_->nr_feature;
         w_size = n;
  
-       ptr = mxGetPr(rhs[id]);
-       model_->label=Malloc(int, model_->nr_class);
-       for(i=0; i<model_->nr_class; i++)
-               model_->label[i]=(int)ptr[i];
+       // Label
+       if(mxIsEmpty(rhs[id]) == 0)
+       {
+               model_->label = Malloc(int, model_->nr_class);
+               ptr = mxGetPr(rhs[id]);
+               for(i=0;i<model_->nr_class;i++)
+                       model_->label[i] = (int)ptr[i];
+       }
         id++;
  
         ptr = mxGetPr(rhs[id]);
diff --git a/matlab/predict.c b/matlab/predict.c

index 972d24918976dffc33f42a504e62f89f3e25adee..84e5dd8ee946c71f7ea4b857a418e629dcf63309 100644 (file)
--- a/matlab/predict.c
+++ b/matlab/predict.c
@@ -58,13 +58,15 @@ void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, co
         int label_vector_row_num, label_vector_col_num;
         int feature_number, testing_instance_number;
         int instance_index;
-       double *ptr_instance, *ptr_label, *ptr_predict_label;
+       double *ptr_label, *ptr_predict_label;
         double *ptr_prob_estimates, *ptr_dec_values, *ptr;
         struct feature_node *x;
         mxArray *pplhs[1]; // instance sparse matrix in row format
  
         int correct = 0;
         int total = 0;
+       double error = 0;
+       double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
  
         int nr_class=get_nr_class(model_);
         int nr_w;
@@ -100,7 +102,6 @@ void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, co
                 return;
         }
  
-       ptr_instance = mxGetPr(prhs[1]);
         ptr_label    = mxGetPr(prhs[0]);
  
         // transpose instance matrix
@@ -134,42 +135,63 @@ void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, co
         for(instance_index=0;instance_index<testing_instance_number;instance_index++)
         {
                 int i;
-               double target,v;
+               double target_label, predict_label;
  
-               target = ptr_label[instance_index];
+               target_label = ptr_label[instance_index];
  
                 // prhs[1] and prhs[1]^T are sparse
                 read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias);
  
                 if(predict_probability_flag)
                 {
-                       v = predict_probability(model_, x, prob_estimates);
-                       ptr_predict_label[instance_index] = v;
+                       predict_label = predict_probability(model_, x, prob_estimates);
+                       ptr_predict_label[instance_index] = predict_label;
                         for(i=0;i<nr_class;i++)
                                 ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i];
                 }
                 else
                 {
                         double *dec_values = Malloc(double, nr_class);
-                       v = predict(model_, x);
-                       ptr_predict_label[instance_index] = v;
+                       predict_label = predict_values(model_, x, dec_values);
+                       ptr_predict_label[instance_index] = predict_label;
  
-                       predict_values(model_, x, dec_values);
                         for(i=0;i<nr_w;i++)
                                 ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i];
                         free(dec_values);
                 }
  
-               if(v == target)
+               if(predict_label == target_label)
                         ++correct;
+               error += (predict_label-target_label)*(predict_label-target_label);
+               sump += predict_label;
+               sumt += target_label;
+               sumpp += predict_label*predict_label;
+               sumtt += target_label*target_label;
+               sumpt += predict_label*target_label;
+
                 ++total;
         }
-       mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);
+       
+       if(model_->param.solver_type==L2R_L2LOSS_SVR || 
+           model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || 
+           model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
+        {
+                mexPrintf("Mean squared error = %g (regression)\n",error/total);
+                mexPrintf("Squared correlation coefficient = %g (regression)\n",
+                       ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
+                       ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
+                       );
+        }
+       else
+               mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);
  
         // return accuracy, mean squared error, squared correlation coefficient
-       plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL);
+       plhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL);
         ptr = mxGetPr(plhs[1]);
-       ptr[0] = (double) correct/total*100;
+       ptr[0] = (double)correct/total*100;
+       ptr[1] = error/total;
+       ptr[2] = ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
+                               ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt));
  
         free(x);
         if(prob_estimates != NULL)
@@ -182,7 +204,11 @@ void exit_with_help()
                         "Usage: [predicted_label, accuracy, decision_values/prob_estimates] = predict(testing_label_vector, testing_instance_matrix, model, 'liblinear_options','col')\n"
                         "liblinear_options:\n"
                         "-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
-                       "col: if 'col' is setted testing_instance_matrix is parsed in column format, otherwise is in row format"
+                       "col: if 'col' is setted testing_instance_matrix is parsed in column format, otherwise is in row format\n"
+                       "Returns:\n"
+                       "  predicted_label: prediction output vector.\n"
+                       "  accuracy: a vector with accuracy, mean squared error, squared correlation coefficient.\n"
+                       "  prob_estimates: If selected, probability estimate vector.\n"
                         );
  }
  
diff --git a/matlab/train.c b/matlab/train.c

index 1094a51dc27c7543308f3f86d14113b8f75523bc..7bab35f17d62fe4771a4eca1a7995d1b91c8227a 100644 (file)
--- a/matlab/train.c
+++ b/matlab/train.c
@@ -27,25 +27,34 @@ void exit_with_help()
         "Usage: model = train(training_label_vector, training_instance_matrix, 'liblinear_options', 'col');\n"
         "liblinear_options:\n"
         "-s type : set type of solver (default 1)\n"
-       "       0 -- L2-regularized logistic regression (primal)\n"
-       "       1 -- L2-regularized L2-loss support vector classification (dual)\n"     
-       "       2 -- L2-regularized L2-loss support vector classification (primal)\n"
-       "       3 -- L2-regularized L1-loss support vector classification (dual)\n"
-       "       4 -- multi-class support vector classification by Crammer and Singer\n"
-       "       5 -- L1-regularized L2-loss support vector classification\n"
-       "       6 -- L1-regularized logistic regression\n"
-       "       7 -- L2-regularized logistic regression (dual)\n"
+       "        0 -- L2-regularized logistic regression (primal)\n"
+       "        1 -- L2-regularized L2-loss support vector classification (dual)\n"    
+       "        2 -- L2-regularized L2-loss support vector classification (primal)\n"
+       "        3 -- L2-regularized L1-loss support vector classification (dual)\n"
+       "        4 -- multi-class support vector classification by Crammer and Singer\n"
+       "        5 -- L1-regularized L2-loss support vector classification\n"
+       "        6 -- L1-regularized logistic regression\n"
+       "        7 -- L2-regularized logistic regression (dual)\n"
+       "       11 -- L2-regularized L2-loss epsilon support vector regression (primal)\n"
+       "       12 -- L2-regularized L2-loss epsilon support vector regression (dual)\n"
+       "       13 -- L2-regularized L1-loss epsilon support vector regression (dual)\n"
         "-c cost : set the parameter C (default 1)\n"
+       "-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
         "-e epsilon : set tolerance of termination criterion\n"
         "       -s 0 and 2\n" 
         "               |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n" 
         "               where f is the primal function and pos/neg are # of\n" 
         "               positive/negative data (default 0.01)\n"
+       "       -s 11\n"
+       "               |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)\n" 
         "       -s 1, 3, 4 and 7\n"
         "               Dual maximal violation <= eps; similar to libsvm (default 0.1)\n"
         "       -s 5 and 6\n"
         "               |f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n"
         "               where f is the primal function (default 0.01)\n"
+       "       -s 12 and 13\n"
+       "               |f'(alpha)|_1 <= eps |f'(alpha0)|,\n"
+       "               where f is the dual function (default 0.1)\n"
         "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
         "-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
         "-v n: n-fold cross validation mode\n"
@@ -69,16 +78,42 @@ double do_cross_validation()
  {
         int i;
         int total_correct = 0;
-       int *target = Malloc(int,prob.l);
+       double total_error = 0;
+       double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
+       double *target = Malloc(double, prob.l);
         double retval = 0.0;
  
         cross_validation(&prob,&param,nr_fold,target);
-
-       for(i=0;i<prob.l;i++)
-               if(target[i] == prob.y[i])
-                       ++total_correct;
-       mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
-       retval = 100.0*total_correct/prob.l;
+       if(param.solver_type == L2R_L2LOSS_SVR || 
+          param.solver_type == L2R_L1LOSS_SVR_DUAL || 
+          param.solver_type == L2R_L2LOSS_SVR_DUAL)
+       {
+               for(i=0;i<prob.l;i++)
+                {
+                        double y = prob.y[i];
+                        double v = target[i];
+                        total_error += (v-y)*(v-y);
+                        sumv += v;
+                        sumy += y;
+                        sumvv += v*v;
+                        sumyy += y*y;
+                        sumvy += v*y;
+                }
+                printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
+                printf("Cross Validation Squared correlation coefficient = %g\n",
+                        ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
+                        ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
+                        );
+               retval = total_error/prob.l;
+       }
+       else
+       {
+               for(i=0;i<prob.l;i++)
+                       if(target[i] == prob.y[i])
+                               ++total_correct;
+               printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
+               retval = 100.0*total_correct/prob.l;
+       }
  
         free(target);
         return retval;
@@ -96,6 +131,7 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
         param.solver_type = L2R_L2LOSS_SVC_DUAL;
         param.C = 1;
         param.eps = INF; // see setting below
+       param.p = 0.1;
         param.nr_weight = 0;
         param.weight_label = NULL;
         param.weight = NULL;
@@ -138,6 +174,9 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
                         case 'c':
                                 param.C = atof(argv[i]);
                                 break;
+                       case 'p':
+                               param.p = atof(argv[i]);
+                               break;
                         case 'e':
                                 param.eps = atof(argv[i]);
                                 break;
@@ -174,12 +213,30 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
  
         if(param.eps == INF)
         {
-               if(param.solver_type == L2R_LR || param.solver_type == L2R_L2LOSS_SVC)
-                       param.eps = 0.01;
-               else if(param.solver_type == L2R_L2LOSS_SVC_DUAL || param.solver_type == L2R_L1LOSS_SVC_DUAL || param.solver_type == MCSVM_CS || param.solver_type == L2R_LR_DUAL)
-                       param.eps = 0.1;
-               else if(param.solver_type == L1R_L2LOSS_SVC || param.solver_type == L1R_LR)
-                       param.eps = 0.01;
+               switch(param.solver_type)
+               {
+                       case L2R_LR: 
+                       case L2R_L2LOSS_SVC:
+                               param.eps = 0.01;
+                               break;
+                       case L2R_L2LOSS_SVR:
+                               param.eps = 0.001;
+                               break;
+                       case L2R_L2LOSS_SVC_DUAL: 
+                       case L2R_L1LOSS_SVC_DUAL: 
+                       case MCSVM_CS: 
+                       case L2R_LR_DUAL: 
+                               param.eps = 0.1;
+                               break;
+                       case L1R_L2LOSS_SVC: 
+                       case L1R_LR:
+                               param.eps = 0.01;
+                               break;
+                       case L2R_L1LOSS_SVR_DUAL:
+                       case L2R_L2LOSS_SVR_DUAL:
+                               param.eps = 0.1;
+                               break;
+               }
         }
         return 0;
  }
@@ -238,7 +295,7 @@ int read_problem_sparse(const mxArray *label_vec, const mxArray *instance_mat)
         elements = num_samples + prob.l*2;
         max_index = (int) mxGetM(instance_mat_col);
  
-       prob.y = Malloc(int, prob.l);
+       prob.y = Malloc(double, prob.l);
         prob.x = Malloc(struct feature_node*, prob.l);
         x_space = Malloc(struct feature_node, elements);
  
@@ -248,7 +305,7 @@ int read_problem_sparse(const mxArray *label_vec, const mxArray *instance_mat)
         for(i=0;i<prob.l;i++)
         {
                 prob.x[i] = &x_space[j];
-               prob.y[i] = (int) labels[i];
+               prob.y[i] = labels[i];
                 low = (int) jc[i], high = (int) jc[i+1];
                 for(k=low;k<high;k++)
                 {
diff --git a/python/README b/python/README

index 7eca2a700ebc1e9cf7c6cb5b51e328706c362901..e856e74326b2b3070434b1a41bcd84d0d2add376 100644 (file)
--- a/python/README
+++ b/python/README
@@ -63,7 +63,7 @@ in liblinearutil.py and the usage is the same as the LIBLINEAR MATLAB interface.
  >>> save_model('heart_scale.model', m)
  >>> m = load_model('heart_scale.model')
  >>> p_label, p_acc, p_val = predict(y, x, m, '-b 1')
->>> ACC = evaluations(y, p_label)
+>>> ACC, MSE, SCC = evaluations(y, p_val)
  
  # Getting online help
  >>> help(train)
@@ -278,7 +278,9 @@ The above command loads
  
      p_labels: a list of predicted labels
  
-    p_acc: testing accuracy
+    p_acc: a tuple including accuracy (for classification), mean
+           squared error, and squared correlation coefficient (for
+           regression).
  
      p_vals: a list of decision values or probability estimates (if '-b 1' 
              is specified). If k is the number of classes, for decision values,
@@ -307,7 +309,7 @@ The above command loads
      Calculate some evaluations using the true values (ty) and predicted
      values (pv):
  
-    >>> ACC = evaluations(ty, pv)
+    >>> (ACC, MSE, SCC) = evaluations(ty, pv)
  
      ty: a list of true values.
  
@@ -315,6 +317,10 @@ The above command loads
  
      ACC: accuracy.
  
+    MSE: mean squared error.
+
+    SCC: squared correlation coefficient.
+
  
  Additional Information
  ======================
diff --git a/python/liblinear.py b/python/liblinear.py

index 5872a4a3974c9de68e3db566fdd4fe6cee186767..890e18a03ca94e1019fecc03b9e4dfa8c6dcf75d 100644 (file)
--- a/python/liblinear.py
+++ b/python/liblinear.py
@@ -20,8 +20,11 @@ else:
  
  # Construct constants
  SOLVER_TYPE = ['L2R_LR', 'L2R_L2LOSS_SVC_DUAL', 'L2R_L2LOSS_SVC', 'L2R_L1LOSS_SVC_DUAL',\
-               'MCSVM_CS', 'L1R_L2LOSS_SVC', 'L1R_LR', 'L2R_LR_DUAL']
-for i, s in enumerate(SOLVER_TYPE): exec("%s = %d" % (s , i))
+               'MCSVM_CS', 'L1R_L2LOSS_SVC', 'L1R_LR', 'L2R_LR_DUAL', \
+               None, None, None, \
+               'L2R_L2LOSS_SVR', 'L2R_L2LOSS_SVR_DUAL', 'L2R_L1LOSS_SVR_DUAL']
+for i, s in enumerate(SOLVER_TYPE): 
+       if s is not None: exec("%s = %d" % (s , i))
  
  PRINT_STRING_FUN = CFUNCTYPE(None, c_char_p)
  def print_null(s): 
@@ -68,7 +71,7 @@ def gen_feature_nodearray(xi, feature_max=None, issparse=True):
  
  class problem(Structure):
         _names = ["l", "n", "y", "x", "bias"]
-       _types = [c_int, c_int, POINTER(c_int), POINTER(POINTER(feature_node)), c_double]
+       _types = [c_int, c_int, POINTER(c_double), POINTER(POINTER(feature_node)), c_double]
         _fields_ = genFields(_names, _types)
  
         def __init__(self, y, x, bias = -1):
@@ -85,7 +88,7 @@ class problem(Structure):
                         max_idx = max(max_idx, tmp_idx)
                 self.n = max_idx
  
-               self.y = (c_int * l)()
+               self.y = (c_double * l)()
                 for i, yi in enumerate(y): self.y[i] = y[i]
  
                 self.x = (POINTER(feature_node) * l)() 
@@ -109,8 +112,8 @@ class problem(Structure):
  
  
  class parameter(Structure):
-       _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight"]
-       _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double)]
+       _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p"]
+       _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double]
         _fields_ = genFields(_names, _types)
  
         def __init__(self, options = None):
@@ -128,6 +131,7 @@ class parameter(Structure):
                 self.solver_type = L2R_L2LOSS_SVC_DUAL
                 self.eps = float('inf')
                 self.C = 1
+               self.p = 0.1
                 self.nr_weight = 0
                 self.weight_label = (c_int * 0)()
                 self.weight = (c_double * 0)()
@@ -151,6 +155,9 @@ class parameter(Structure):
                         elif argv[i] == "-c":
                                 i = i + 1
                                 self.C = float(argv[i])
+                       elif argv[i] == "-p":
+                               i = i + 1
+                               self.p = float(argv[i])
                         elif argv[i] == "-e":
                                 i = i + 1
                                 self.eps = float(argv[i])
@@ -185,11 +192,14 @@ class parameter(Structure):
                 if self.eps == float('inf'):
                         if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
                                 self.eps = 0.01
+                       elif self.solver_type in [L2R_L2LOSS_SVR]:
+                               self.eps = 0.001
                         elif self.solver_type in [L2R_L2LOSS_SVC_DUAL, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L2R_LR_DUAL]:
                                 self.eps = 0.1
                         elif self.solver_type in [L1R_L2LOSS_SVC, L1R_LR]:
                                 self.eps = 0.01
-
+                       elif self.solver_type in [L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
+                               self.eps = 0.1
  
  class model(Structure):
         _names = ["param", "nr_class", "nr_feature", "w", "label", "bias"]
@@ -212,7 +222,7 @@ class model(Structure):
  
         def get_labels(self):
                 nr_class = self.get_nr_class()
-               labels = (c_int * nr_class)()
+               labels = (c_double * nr_class)()
                 liblinear.get_labels(self, labels)
                 return labels[:nr_class]
  
@@ -232,11 +242,11 @@ def toPyModel(model_ptr):
         return m
  
  fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
-fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_int)])
+fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
  
-fillprototype(liblinear.predict_values, c_int, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
-fillprototype(liblinear.predict, c_int, [POINTER(model), POINTER(feature_node)])
-fillprototype(liblinear.predict_probability, c_int, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
+fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
+fillprototype(liblinear.predict, c_double, [POINTER(model), POINTER(feature_node)])
+fillprototype(liblinear.predict_probability, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
  
  fillprototype(liblinear.save_model, c_int, [c_char_p, POINTER(model)])
  fillprototype(liblinear.load_model, POINTER(model), [c_char_p])
diff --git a/python/liblinearutil.py b/python/liblinearutil.py

index 42f7352cb35a76794a44408f4cdd247e82e5c303..445dfaef84b95f10350c3ee9be6c129e9301b49b 100644 (file)
--- a/python/liblinearutil.py
+++ b/python/liblinearutil.py
@@ -30,7 +30,7 @@ def load_model(model_file_name):
         
         Load a LIBLINEAR model from model_file_name and return.
         """
-       model = liblinear.load_model(model_file_name.encode())
+       model = liblinear.load_model(model_file_name)
         if not model: 
                 print("can't open model file %s" % model_file_name)
                 return None
@@ -47,19 +47,32 @@ def save_model(model_file_name, model):
  
  def evaluations(ty, pv):
         """
-       evaluations(ty, pv) -> ACC
+       evaluations(ty, pv) -> (ACC, MSE, SCC)
  
-       Calculate accuracy using the true values (ty) and predicted values (pv).
+       Calculate accuracy, mean squared error and squared correlation coefficient
+       using the true values (ty) and predicted values (pv).
         """
         if len(ty) != len(pv):
                 raise ValueError("len(ty) must equal to len(pv)")
         total_correct = total_error = 0
+       sumv = sumy = sumvv = sumyy = sumvy = 0
         for v, y in zip(pv, ty):
                 if y == v: 
                         total_correct += 1
+               total_error += (v-y)*(v-y)
+               sumv += v
+               sumy += y
+               sumvv += v*v
+               sumyy += y*y
+               sumvy += v*y 
         l = len(ty)
         ACC = 100.0*total_correct/l
-       return ACC
+       MSE = total_error/l
+       try:
+               SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
+       except:
+               SCC = float('nan')
+       return (ACC, MSE, SCC)
  
  def train(arg1, arg2=None, arg3=None):
         """
@@ -70,28 +83,37 @@ def train(arg1, arg2=None, arg3=None):
         Train a model from data (y, x) or a problem prob using
         'options' or a parameter param. 
         If '-v' is specified in 'options' (i.e., cross validation)
-       accuracy (ACC) is returned.
+       either accuracy (ACC) or mean-squared error (MSE) is returned.
  
         'options':
                 -s type : set type of solver (default 1)
-                       0 -- L2-regularized logistic regression (primal)
-                       1 -- L2-regularized L2-loss support vector classification (dual)        
-                       2 -- L2-regularized L2-loss support vector classification (primal)
-                       3 -- L2-regularized L1-loss support vector classification (dual)
-                       4 -- multi-class support vector classification by Crammer and Singer
-                       5 -- L1-regularized L2-loss support vector classification
-                       6 -- L1-regularized logistic regression
-                       7 -- L2-regularized logistic regression (dual)
+                        0 -- L2-regularized logistic regression (primal)
+                        1 -- L2-regularized L2-loss support vector classification (dual)       
+                        2 -- L2-regularized L2-loss support vector classification (primal)
+                        3 -- L2-regularized L1-loss support vector classification (dual)
+                        4 -- multi-class support vector classification by Crammer and Singer
+                        5 -- L1-regularized L2-loss support vector classification
+                        6 -- L1-regularized logistic regression
+                        7 -- L2-regularized logistic regression (dual)
+                       11 -- L2-regularized L2-loss epsilon support vector regression (primal)
+                       12 -- L2-regularized L2-loss epsilon support vector regression (dual)
+                       13 -- L2-regularized L1-loss epsilon support vector regression (dual)
                 -c cost : set the parameter C (default 1)
+               -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
                 -e epsilon : set tolerance of termination criterion
                         -s 0 and 2 
                                 |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2, 
                                 where f is the primal function, (default 0.01)
+                       -s 11
+                               |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001) 
                         -s 1, 3, 4, and 7
                                 Dual maximal violation <= eps; similar to liblinear (default 0.1)
                         -s 5 and 6
                                 |f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
                                 where f is the primal function (default 0.01)
+                       -s 12 and 13
+                               |f'(alpha)|_1 <= eps |f'(alpha0)|,
+                               where f is the dual function (default 0.1)
                 -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
                 -wi weight: weights adjust the parameter C of different classes (see README for details)
                 -v n: n-fold cross validation mode
@@ -120,11 +142,16 @@ def train(arg1, arg2=None, arg3=None):
  
         if param.cross_validation:
                 l, nr_fold = prob.l, param.nr_fold
-               target = (c_int * l)()
+               target = (c_double * l)()
                 liblinear.cross_validation(prob, param, nr_fold, target)
-               ACC = evaluations(prob.y[:l], target[:l])
-               print("Cross Validation Accuracy = %g%%" % ACC)
-               return ACC
+               ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
+               if param.solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
+                       print("Cross Validation Mean squared error = %g" % MSE)
+                       print("Cross Validation Squared correlation coefficient = %g" % SCC)
+                       return MSE
+               else:
+                       print("Cross Validation Accuracy = %g%%" % ACC)
+                       return ACC
         else :
                 m = liblinear.train(prob, param)
                 m = toPyModel(m)
@@ -143,7 +170,8 @@ def predict(y, x, m, options=""):
  
         The return tuple contains
         p_labels: a list of predicted labels
-       p_acc: testing accuracy. 
+       p_acc: a tuple including  accuracy (for classification), mean-squared 
+              error, and squared correlation coefficient (for regression).
         p_vals: a list of decision values or probability estimates (if '-b 1' 
                 is specified). If k is the number of classes, for decision values,
                 each element includes results of predicting k binary-class
@@ -164,6 +192,7 @@ def predict(y, x, m, options=""):
                         raise ValueError("Wrong options")
                 i+=1
  
+       solver_type = m.param.solver_type
         nr_class = m.get_nr_class()
         nr_feature = m.get_nr_feature()
         is_prob_model = m.is_probability_model()
@@ -201,9 +230,12 @@ def predict(y, x, m, options=""):
                         pred_values += [values]
         if len(y) == 0:
                 y = [0] * len(x)
-       ACC = evaluations(y, pred_labels)
+       ACC, MSE, SCC = evaluations(y, pred_labels)
         l = len(y)
-       print("Accuracy = %g%% (%d/%d)" % (ACC, int(l*ACC//100), l))
+       if solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
+               print("Mean squared error = %g (regression)" % MSE)
+               print("Squared correlation coefficient = %g (regression)" % SCC)
+       else:
+               print("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
  
-       return pred_labels, ACC, pred_values
-       
+       return pred_labels, (ACC, MSE, SCC), pred_values
author	popo <popo@16e7d947-dcc2-db11-b54a-0017319806e7>
	Tue, 17 Apr 2012 04:50:50 +0000 (04:50 +0000)
committer	popo <popo@16e7d947-dcc2-db11-b54a-0017319806e7>
	Tue, 17 Apr 2012 04:50:50 +0000 (04:50 +0000)
matlab/README		patch \| blob \| history
matlab/linear_model_matlab.c		patch \| blob \| history
matlab/predict.c		patch \| blob \| history
matlab/train.c		patch \| blob \| history
python/README		patch \| blob \| history
python/liblinear.py		patch \| blob \| history
python/liblinearutil.py		patch \| blob \| history