]> granicus.if.org Git - liblinear/commitdiff
Added -q option for predict and modified the solver type usage message
authorleepei <leepei@16e7d947-dcc2-db11-b54a-0017319806e7>
Fri, 20 Jul 2012 07:59:36 +0000 (07:59 +0000)
committerleepei <leepei@16e7d947-dcc2-db11-b54a-0017319806e7>
Fri, 20 Jul 2012 07:59:36 +0000 (07:59 +0000)
README
matlab/predict.c
matlab/train.c
predict.c
python/liblinearutil.py
train.c

diff --git a/README b/README
index cb01674cf706aa7e3227618c4d865c0042af5c85..ded695eef3bbe1393d6b714bacde0f7ad5269e93 100644 (file)
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-LIBLINEAR is a simple package for solving large-scale regularized linear 
+lIBLINEAR is a simple package for solving large-scale regularized linear 
 classification and regression. It currently supports 
 - L2-regularized logistic regression/L2-loss support vector classification/L1-loss support vector classification
 - L1-regularized L2-loss support vector classification/L1-regularized logistic regression
@@ -98,17 +98,19 @@ and mark
 Usage: train [options] training_set_file [model_file]
 options:
 -s type : set type of solver (default 1)
+  for multi-class classification
         0 -- L2-regularized logistic regression (primal)
         1 -- L2-regularized L2-loss support vector classification (dual)
         2 -- L2-regularized L2-loss support vector classification (primal)
         3 -- L2-regularized L1-loss support vector classification (dual)
-        4 -- multi-class support vector classification by Crammer and Singer
+        4 -- support vector classification by Crammer and Singer
         5 -- L1-regularized L2-loss support vector classification
         6 -- L1-regularized logistic regression
         7 -- L2-regularized logistic regression (dual)
-       11 -- L2-regularized L2-loss epsilon support vector regression (primal)
-       12 -- L2-regularized L2-loss epsilon support vector regression (dual)
-       13 -- L2-regularized L1-loss epsilon support vector regression (dual)
+  for regression
+       11 -- L2-regularized L2-loss support vector regression (primal)
+       12 -- L2-regularized L2-loss support vector regression (dual)
+       13 -- L2-regularized L1-loss support vector regression (dual)
 -c cost : set the parameter C (default 1)
 -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
 -e epsilon : set tolerance of termination criterion
@@ -221,6 +223,7 @@ and C^m_i = C if m  = y_i,
 Usage: predict [options] test_file model_file output_file
 options:
 -b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
+-q : quiet mode (no outputs)
 
 Note that -b is only needed in the prediction phase. This is different
 from the setting of LIBSVM.
@@ -328,15 +331,16 @@ Library Usage
         };
 
     solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL.
-
+  for classification
     L2R_LR                L2-regularized logistic regression (primal)
     L2R_L2LOSS_SVC_DUAL   L2-regularized L2-loss support vector classification (dual)
     L2R_L2LOSS_SVC        L2-regularized L2-loss support vector classification (primal)
     L2R_L1LOSS_SVC_DUAL   L2-regularized L1-loss support vector classification (dual)
-    MCSVM_CS              multi-class support vector classification by Crammer and Singer
+    MCSVM_CS              support vector classification by Crammer and Singer
     L1R_L2LOSS_SVC        L1-regularized L2-loss support vector classification
     L1R_LR                L1-regularized logistic regression
     L2R_LR_DUAL           L2-regularized logistic regression (dual)
+  for regression
     L2R_L2LOSS_SVR        L2-regularized L2-loss support vector regression (primal)
     L2R_L2LOSS_SVR_DUAL   L2-regularized L2-loss support vector regression (dual)
     L2R_L1LOSS_SVR_DUAL   L2-regularized L1-loss support vector regression (dual)
index 84e5dd8ee946c71f7ea4b857a418e629dcf63309..f60b54d2b2dafce9c2b656d5b66bab7fb68b211a 100644 (file)
@@ -16,6 +16,9 @@ typedef int mwIndex;
 
 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 
+int print_null(const char *s,...) {}
+int (*info)(const char *fmt,...) = &mexPrintf;
+
 int col_format_flag;
 
 void read_sparse_instance(const mxArray *prhs, int index, struct feature_node *x, int feature_number, double bias)
@@ -171,19 +174,19 @@ void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, co
 
                ++total;
        }
-       
-       if(model_->param.solver_type==L2R_L2LOSS_SVR || 
-           model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || 
-           model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
-        {
-                mexPrintf("Mean squared error = %g (regression)\n",error/total);
-                mexPrintf("Squared correlation coefficient = %g (regression)\n",
-                       ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
-                       ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
-                       );
-        }
+
+       if(model_->param.solver_type==L2R_L2LOSS_SVR ||
+          model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
+          model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
+       {
+               info("Mean squared error = %g (regression)\n",error/total);
+               info("Squared correlation coefficient = %g (regression)\n",
+                       ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
+                       ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
+                       );
+       }
        else
-               mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);
+               info("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);
 
        // return accuracy, mean squared error, squared correlation coefficient
        plhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL);
@@ -204,6 +207,7 @@ void exit_with_help()
                        "Usage: [predicted_label, accuracy, decision_values/prob_estimates] = predict(testing_label_vector, testing_instance_matrix, model, 'liblinear_options','col')\n"
                        "liblinear_options:\n"
                        "-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
+                       "-q quiet mode (no outputs)\n"
                        "col: if 'col' is setted testing_instance_matrix is parsed in column format, otherwise is in row format\n"
                        "Returns:\n"
                        "  predicted_label: prediction output vector.\n"
@@ -230,7 +234,7 @@ void mexFunction( int nlhs, mxArray *plhs[],
        {
                mxGetString(prhs[4], cmd, mxGetN(prhs[4])+1);
                if(strcmp(cmd, "col") == 0)
-               {                       
+               {
                        col_format_flag = 1;
                }
        }
@@ -260,7 +264,8 @@ void mexFunction( int nlhs, mxArray *plhs[],
                        for(i=1;i<argc;i++)
                        {
                                if(argv[i][0] != '-') break;
-                               if(++i>=argc)
+                               ++i;
+                               if(i>=argc && argv[i-1][1] != 'q')
                                {
                                        exit_with_help();
                                        fake_answer(plhs);
@@ -271,6 +276,10 @@ void mexFunction( int nlhs, mxArray *plhs[],
                                        case 'b':
                                                prob_estimate_flag = atoi(argv[i]);
                                                break;
+                                       case 'q':
+                                               info = &print_null;
+                                               i--;
+                                               break;
                                        default:
                                                mexPrintf("unknown option\n");
                                                exit_with_help();
index 7bab35f17d62fe4771a4eca1a7995d1b91c8227a..6351dab3e6c6530806f10f6c4557717719d2c706 100644 (file)
@@ -27,19 +27,21 @@ void exit_with_help()
        "Usage: model = train(training_label_vector, training_instance_matrix, 'liblinear_options', 'col');\n"
        "liblinear_options:\n"
        "-s type : set type of solver (default 1)\n"
+       "  for multi-class classification\n"
        "        0 -- L2-regularized logistic regression (primal)\n"
        "        1 -- L2-regularized L2-loss support vector classification (dual)\n"    
        "        2 -- L2-regularized L2-loss support vector classification (primal)\n"
        "        3 -- L2-regularized L1-loss support vector classification (dual)\n"
-       "        4 -- multi-class support vector classification by Crammer and Singer\n"
+       "        4 -- support vector classification by Crammer and Singer\n"
        "        5 -- L1-regularized L2-loss support vector classification\n"
        "        6 -- L1-regularized logistic regression\n"
        "        7 -- L2-regularized logistic regression (dual)\n"
-       "       11 -- L2-regularized L2-loss epsilon support vector regression (primal)\n"
-       "       12 -- L2-regularized L2-loss epsilon support vector regression (dual)\n"
-       "       13 -- L2-regularized L1-loss epsilon support vector regression (dual)\n"
+       "  for regression\n"
+       "       11 -- L2-regularized L2-loss support vector regression (primal)\n"
+       "       12 -- L2-regularized L2-loss support vector regression (dual)\n"
+       "       13 -- L2-regularized L1-loss support vector regression (dual)\n"
        "-c cost : set the parameter C (default 1)\n"
-       "-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
+       "-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n"
        "-e epsilon : set tolerance of termination criterion\n"
        "       -s 0 and 2\n" 
        "               |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n" 
index a8f9f833d1e8b956a0c112d5c2712bd872d847b6..c0b635f7299660fc35f75c076865ec68d3723a70 100644 (file)
--- a/predict.c
+++ b/predict.c
@@ -5,6 +5,10 @@
 #include <errno.h>
 #include "linear.h"
 
+int print_null(const char *s,...) {}
+
+static int (*info)(const char *fmt,...) = &printf;
+
 struct feature_node *x;
 int max_nr_attr = 64;
 
@@ -23,7 +27,7 @@ static int max_line_len;
 static char* readline(FILE *input)
 {
        int len;
-       
+
        if(fgets(line,max_line_len,input) == NULL)
                return NULL;
 
@@ -67,7 +71,7 @@ void do_predict(FILE *input, FILE *output)
                labels=(int *) malloc(nr_class*sizeof(int));
                get_labels(model_,labels);
                prob_estimates = (double *) malloc(nr_class*sizeof(double));
-               fprintf(output,"labels");               
+               fprintf(output,"labels");
                for(j=0;j<nr_class;j++)
                        fprintf(output," %d",labels[j]);
                fprintf(output,"\n");
@@ -147,25 +151,25 @@ void do_predict(FILE *input, FILE *output)
                if(predict_label == target_label)
                        ++correct;
                error += (predict_label-target_label)*(predict_label-target_label);
-                sump += predict_label;
-                sumt += target_label;
-                sumpp += predict_label*predict_label;
-                sumtt += target_label*target_label;
-                sumpt += predict_label*target_label;
-                ++total;
-        }
-        if(model_->param.solver_type==L2R_L2LOSS_SVR || 
-           model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || 
-           model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
-        {
-                printf("Mean squared error = %g (regression)\n",error/total);
-                printf("Squared correlation coefficient = %g (regression)\n",
-                       ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
-                       ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
-                       );
+               sump += predict_label;
+               sumt += target_label;
+               sumpp += predict_label*predict_label;
+               sumtt += target_label*target_label;
+               sumpt += predict_label*target_label;
+               ++total;
+       }
+       if(model_->param.solver_type==L2R_L2LOSS_SVR ||
+          model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
+          model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
+       {
+               info("Mean squared error = %g (regression)\n",error/total);
+               info("Squared correlation coefficient = %g (regression)\n",
+                       ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
+                       ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
+                       );
         }
        else
-               printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
+               info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
        if(flag_predict_probability)
                free(prob_estimates);
 }
@@ -176,6 +180,7 @@ void exit_with_help()
        "Usage: predict [options] test_file model_file output_file\n"
        "options:\n"
        "-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
+       "-q : quiet mode (no outputs)\n"
        );
        exit(1);
 }
@@ -195,7 +200,10 @@ int main(int argc, char **argv)
                        case 'b':
                                flag_predict_probability = atoi(argv[i]);
                                break;
-
+                       case 'q':
+                               info = &print_null;
+                               i--;
+                               break;
                        default:
                                fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
                                exit_with_help();
index 70dbd8a718b684aebdb88cf4b0b4343b90de1f33..1940abe7351fb61a6e444e17d963f868f98f4e79 100644 (file)
@@ -27,11 +27,11 @@ def svm_read_problem(data_file_name):
 def load_model(model_file_name):
        """
        load_model(model_file_name) -> model
-       
+
        Load a LIBLINEAR model from model_file_name and return.
        """
        model = liblinear.load_model(model_file_name.encode())
-       if not model: 
+       if not model:
                print("can't open model file %s" % model_file_name)
                return None
        model = toPyModel(model)
@@ -57,14 +57,14 @@ def evaluations(ty, pv):
        total_correct = total_error = 0
        sumv = sumy = sumvv = sumyy = sumvy = 0
        for v, y in zip(pv, ty):
-               if y == v: 
+               if y == v:
                        total_correct += 1
                total_error += (v-y)*(v-y)
                sumv += v
                sumy += y
                sumvv += v*v
                sumyy += y*y
-               sumvy += v*y 
+               sumvy += v*y
        l = len(ty)
        ACC = 100.0*total_correct/l
        MSE = total_error/l
@@ -76,38 +76,40 @@ def evaluations(ty, pv):
 
 def train(arg1, arg2=None, arg3=None):
        """
-       train(y, x [, 'options']) -> model | ACC 
+       train(y, x [, 'options']) -> model | ACC
        train(prob, [, 'options']) -> model | ACC
        train(prob, param) -> model | ACC
 
        Train a model from data (y, x) or a problem prob using
-       'options' or a parameter param. 
+       'options' or a parameter param.
        If '-v' is specified in 'options' (i.e., cross validation)
        either accuracy (ACC) or mean-squared error (MSE) is returned.
 
        'options':
                -s type : set type of solver (default 1)
+                 for multi-class classification
                         0 -- L2-regularized logistic regression (primal)
-                        1 -- L2-regularized L2-loss support vector classification (dual)       
+                        1 -- L2-regularized L2-loss support vector classification (dual)
                         2 -- L2-regularized L2-loss support vector classification (primal)
                         3 -- L2-regularized L1-loss support vector classification (dual)
-                        4 -- multi-class support vector classification by Crammer and Singer
+                        4 -- support vector classification by Crammer and Singer
                         5 -- L1-regularized L2-loss support vector classification
                         6 -- L1-regularized logistic regression
                         7 -- L2-regularized logistic regression (dual)
-                       11 -- L2-regularized L2-loss epsilon support vector regression (primal)
-                       12 -- L2-regularized L2-loss epsilon support vector regression (dual)
-                       13 -- L2-regularized L1-loss epsilon support vector regression (dual)
+                 for regression
+                       11 -- L2-regularized L2-loss support vector regression (primal)
+                       12 -- L2-regularized L2-loss support vector regression (dual)
+                       13 -- L2-regularized L1-loss support vector regression (dual)
                -c cost : set the parameter C (default 1)
-               -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
+               -p epsilon : set the epsilon in loss function of SVR (default 0.1)
                -e epsilon : set tolerance of termination criterion
-                       -s 0 and 2 
-                               |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2, 
+                       -s 0 and 2
+                               |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
                                where f is the primal function, (default 0.01)
                        -s 11
-                               |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001) 
+                               |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)
                        -s 1, 3, 4, and 7
-                               Dual maximal violation <= eps; similar to liblinear (default 0.1)
+                               Dual maximal violation <= eps; similar to liblinear (default 0.)
                        -s 5 and 6
                                |f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
                                where f is the primal function (default 0.01)
@@ -164,23 +166,28 @@ def predict(y, x, m, options=""):
        """
        predict(y, x, m [, "options"]) -> (p_labels, p_acc, p_vals)
 
-       Predict data (y, x) with the SVM model m. 
-       "options": 
+       Predict data (y, x) with the SVM model m.
+       "options":
            -b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
+           -q quiet mode (no outputs)
 
        The return tuple contains
        p_labels: a list of predicted labels
-       p_acc: a tuple including  accuracy (for classification), mean-squared 
+       p_acc: a tuple including  accuracy (for classification), mean-squared
               error, and squared correlation coefficient (for regression).
-       p_vals: a list of decision values or probability estimates (if '-b 1' 
+       p_vals: a list of decision values or probability estimates (if '-b 1'
                is specified). If k is the number of classes, for decision values,
                each element includes results of predicting k binary-class
-               SVMs. if k = 2 and solver is not MCSVM_CS, only one decision value 
-               is returned. For probabilities, each element contains k values 
+               SVMs. if k = 2 and solver is not MCSVM_CS, only one decision value
+               is returned. For probabilities, each element contains k values
                indicating the probability that the testing instance is in each class.
                Note that the order of classes here is the same as 'model.label'
                field in the model structure.
        """
+
+       def info(s):
+               print(s)
+
        predict_probability = 0
        argv = options.split()
        i = 0
@@ -188,6 +195,8 @@ def predict(y, x, m, options=""):
                if argv[i] == '-b':
                        i += 1
                        predict_probability = int(argv[i])
+               elif argv[i] == '-q':
+                       info = print_null
                else:
                        raise ValueError("Wrong options")
                i+=1
@@ -233,9 +242,9 @@ def predict(y, x, m, options=""):
        ACC, MSE, SCC = evaluations(y, pred_labels)
        l = len(y)
        if solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
-               print("Mean squared error = %g (regression)" % MSE)
-               print("Squared correlation coefficient = %g (regression)" % SCC)
+               info("Mean squared error = %g (regression)" % MSE)
+               info("Squared correlation coefficient = %g (regression)" % SCC)
        else:
-               print("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
+               info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
 
        return pred_labels, (ACC, MSE, SCC), pred_values
diff --git a/train.c b/train.c
index 79fbc50ac26e692c5c4dd3c8ea98203a10b618f9..c145c4b13b0aeef18402f8a8f97e63c1f7f38b0d 100644 (file)
--- a/train.c
+++ b/train.c
@@ -16,19 +16,21 @@ void exit_with_help()
        "Usage: train [options] training_set_file [model_file]\n"
        "options:\n"
        "-s type : set type of solver (default 1)\n"
+       "  for multi-class classification\n"
        "        0 -- L2-regularized logistic regression (primal)\n"
        "        1 -- L2-regularized L2-loss support vector classification (dual)\n"    
        "        2 -- L2-regularized L2-loss support vector classification (primal)\n"
        "        3 -- L2-regularized L1-loss support vector classification (dual)\n"
-       "        4 -- multi-class support vector classification by Crammer and Singer\n"
+       "        4 -- support vector classification by Crammer and Singer\n"
        "        5 -- L1-regularized L2-loss support vector classification\n"
        "        6 -- L1-regularized logistic regression\n"
        "        7 -- L2-regularized logistic regression (dual)\n"
-       "       11 -- L2-regularized L2-loss epsilon support vector regression (primal)\n"
-       "       12 -- L2-regularized L2-loss epsilon support vector regression (dual)\n"
-       "       13 -- L2-regularized L1-loss epsilon support vector regression (dual)\n"
+       "  for regression\n"
+       "       11 -- L2-regularized L2-loss support vector regression (primal)\n"
+       "       12 -- L2-regularized L2-loss support vector regression (dual)\n"
+       "       13 -- L2-regularized L1-loss support vector regression (dual)\n"
        "-c cost : set the parameter C (default 1)\n"
-       "-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
+       "-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n"
        "-e epsilon : set tolerance of termination criterion\n"
        "       -s 0 and 2\n" 
        "               |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n"