From: leepei Date: Fri, 20 Jul 2012 07:59:36 +0000 (+0000) Subject: Added -q option for predict and modified the solver type usage message X-Git-Tag: v192~4 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b4eb8f78b2fb3147f9c1e9b63b89b47a6f250571;p=liblinear Added -q option for predict and modified the solver type usage message --- diff --git a/README b/README index cb01674..ded695e 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -LIBLINEAR is a simple package for solving large-scale regularized linear +lIBLINEAR is a simple package for solving large-scale regularized linear classification and regression. It currently supports - L2-regularized logistic regression/L2-loss support vector classification/L1-loss support vector classification - L1-regularized L2-loss support vector classification/L1-regularized logistic regression @@ -98,17 +98,19 @@ and mark Usage: train [options] training_set_file [model_file] options: -s type : set type of solver (default 1) + for multi-class classification 0 -- L2-regularized logistic regression (primal) 1 -- L2-regularized L2-loss support vector classification (dual) 2 -- L2-regularized L2-loss support vector classification (primal) 3 -- L2-regularized L1-loss support vector classification (dual) - 4 -- multi-class support vector classification by Crammer and Singer + 4 -- support vector classification by Crammer and Singer 5 -- L1-regularized L2-loss support vector classification 6 -- L1-regularized logistic regression 7 -- L2-regularized logistic regression (dual) - 11 -- L2-regularized L2-loss epsilon support vector regression (primal) - 12 -- L2-regularized L2-loss epsilon support vector regression (dual) - 13 -- L2-regularized L1-loss epsilon support vector regression (dual) + for regression + 11 -- L2-regularized L2-loss support vector regression (primal) + 12 -- L2-regularized L2-loss support vector regression (dual) + 13 -- L2-regularized L1-loss support vector regression (dual) -c cost : set the parameter C (default 1) -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1) -e epsilon : set tolerance of termination criterion @@ -221,6 +223,7 @@ and C^m_i = C if m = y_i, Usage: predict [options] test_file model_file output_file options: -b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only +-q : quiet mode (no outputs) Note that -b is only needed in the prediction phase. This is different from the setting of LIBSVM. @@ -328,15 +331,16 @@ Library Usage }; solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL. - + for classification L2R_LR L2-regularized logistic regression (primal) L2R_L2LOSS_SVC_DUAL L2-regularized L2-loss support vector classification (dual) L2R_L2LOSS_SVC L2-regularized L2-loss support vector classification (primal) L2R_L1LOSS_SVC_DUAL L2-regularized L1-loss support vector classification (dual) - MCSVM_CS multi-class support vector classification by Crammer and Singer + MCSVM_CS support vector classification by Crammer and Singer L1R_L2LOSS_SVC L1-regularized L2-loss support vector classification L1R_LR L1-regularized logistic regression L2R_LR_DUAL L2-regularized logistic regression (dual) + for regression L2R_L2LOSS_SVR L2-regularized L2-loss support vector regression (primal) L2R_L2LOSS_SVR_DUAL L2-regularized L2-loss support vector regression (dual) L2R_L1LOSS_SVR_DUAL L2-regularized L1-loss support vector regression (dual) diff --git a/matlab/predict.c b/matlab/predict.c index 84e5dd8..f60b54d 100644 --- a/matlab/predict.c +++ b/matlab/predict.c @@ -16,6 +16,9 @@ typedef int mwIndex; #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) +int print_null(const char *s,...) {} +int (*info)(const char *fmt,...) = &mexPrintf; + int col_format_flag; void read_sparse_instance(const mxArray *prhs, int index, struct feature_node *x, int feature_number, double bias) @@ -171,19 +174,19 @@ void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, co ++total; } - - if(model_->param.solver_type==L2R_L2LOSS_SVR || - model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || - model_->param.solver_type==L2R_L2LOSS_SVR_DUAL) - { - mexPrintf("Mean squared error = %g (regression)\n",error/total); - mexPrintf("Squared correlation coefficient = %g (regression)\n", - ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ - ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) - ); - } + + if(model_->param.solver_type==L2R_L2LOSS_SVR || + model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || + model_->param.solver_type==L2R_L2LOSS_SVR_DUAL) + { + info("Mean squared error = %g (regression)\n",error/total); + info("Squared correlation coefficient = %g (regression)\n", + ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ + ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) + ); + } else - mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total); + info("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total); // return accuracy, mean squared error, squared correlation coefficient plhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL); @@ -204,6 +207,7 @@ void exit_with_help() "Usage: [predicted_label, accuracy, decision_values/prob_estimates] = predict(testing_label_vector, testing_instance_matrix, model, 'liblinear_options','col')\n" "liblinear_options:\n" "-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n" + "-q quiet mode (no outputs)\n" "col: if 'col' is setted testing_instance_matrix is parsed in column format, otherwise is in row format\n" "Returns:\n" " predicted_label: prediction output vector.\n" @@ -230,7 +234,7 @@ void mexFunction( int nlhs, mxArray *plhs[], { mxGetString(prhs[4], cmd, mxGetN(prhs[4])+1); if(strcmp(cmd, "col") == 0) - { + { col_format_flag = 1; } } @@ -260,7 +264,8 @@ void mexFunction( int nlhs, mxArray *plhs[], for(i=1;i=argc) + ++i; + if(i>=argc && argv[i-1][1] != 'q') { exit_with_help(); fake_answer(plhs); @@ -271,6 +276,10 @@ void mexFunction( int nlhs, mxArray *plhs[], case 'b': prob_estimate_flag = atoi(argv[i]); break; + case 'q': + info = &print_null; + i--; + break; default: mexPrintf("unknown option\n"); exit_with_help(); diff --git a/matlab/train.c b/matlab/train.c index 7bab35f..6351dab 100644 --- a/matlab/train.c +++ b/matlab/train.c @@ -27,19 +27,21 @@ void exit_with_help() "Usage: model = train(training_label_vector, training_instance_matrix, 'liblinear_options', 'col');\n" "liblinear_options:\n" "-s type : set type of solver (default 1)\n" + " for multi-class classification\n" " 0 -- L2-regularized logistic regression (primal)\n" " 1 -- L2-regularized L2-loss support vector classification (dual)\n" " 2 -- L2-regularized L2-loss support vector classification (primal)\n" " 3 -- L2-regularized L1-loss support vector classification (dual)\n" - " 4 -- multi-class support vector classification by Crammer and Singer\n" + " 4 -- support vector classification by Crammer and Singer\n" " 5 -- L1-regularized L2-loss support vector classification\n" " 6 -- L1-regularized logistic regression\n" " 7 -- L2-regularized logistic regression (dual)\n" - " 11 -- L2-regularized L2-loss epsilon support vector regression (primal)\n" - " 12 -- L2-regularized L2-loss epsilon support vector regression (dual)\n" - " 13 -- L2-regularized L1-loss epsilon support vector regression (dual)\n" + " for regression\n" + " 11 -- L2-regularized L2-loss support vector regression (primal)\n" + " 12 -- L2-regularized L2-loss support vector regression (dual)\n" + " 13 -- L2-regularized L1-loss support vector regression (dual)\n" "-c cost : set the parameter C (default 1)\n" - "-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n" + "-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n" "-e epsilon : set tolerance of termination criterion\n" " -s 0 and 2\n" " |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n" diff --git a/predict.c b/predict.c index a8f9f83..c0b635f 100644 --- a/predict.c +++ b/predict.c @@ -5,6 +5,10 @@ #include #include "linear.h" +int print_null(const char *s,...) {} + +static int (*info)(const char *fmt,...) = &printf; + struct feature_node *x; int max_nr_attr = 64; @@ -23,7 +27,7 @@ static int max_line_len; static char* readline(FILE *input) { int len; - + if(fgets(line,max_line_len,input) == NULL) return NULL; @@ -67,7 +71,7 @@ void do_predict(FILE *input, FILE *output) labels=(int *) malloc(nr_class*sizeof(int)); get_labels(model_,labels); prob_estimates = (double *) malloc(nr_class*sizeof(double)); - fprintf(output,"labels"); + fprintf(output,"labels"); for(j=0;jparam.solver_type==L2R_L2LOSS_SVR || - model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || - model_->param.solver_type==L2R_L2LOSS_SVR_DUAL) - { - printf("Mean squared error = %g (regression)\n",error/total); - printf("Squared correlation coefficient = %g (regression)\n", - ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ - ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) - ); + sump += predict_label; + sumt += target_label; + sumpp += predict_label*predict_label; + sumtt += target_label*target_label; + sumpt += predict_label*target_label; + ++total; + } + if(model_->param.solver_type==L2R_L2LOSS_SVR || + model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || + model_->param.solver_type==L2R_L2LOSS_SVR_DUAL) + { + info("Mean squared error = %g (regression)\n",error/total); + info("Squared correlation coefficient = %g (regression)\n", + ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ + ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) + ); } else - printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); + info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); if(flag_predict_probability) free(prob_estimates); } @@ -176,6 +180,7 @@ void exit_with_help() "Usage: predict [options] test_file model_file output_file\n" "options:\n" "-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n" + "-q : quiet mode (no outputs)\n" ); exit(1); } @@ -195,7 +200,10 @@ int main(int argc, char **argv) case 'b': flag_predict_probability = atoi(argv[i]); break; - + case 'q': + info = &print_null; + i--; + break; default: fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]); exit_with_help(); diff --git a/python/liblinearutil.py b/python/liblinearutil.py index 70dbd8a..1940abe 100644 --- a/python/liblinearutil.py +++ b/python/liblinearutil.py @@ -27,11 +27,11 @@ def svm_read_problem(data_file_name): def load_model(model_file_name): """ load_model(model_file_name) -> model - + Load a LIBLINEAR model from model_file_name and return. """ model = liblinear.load_model(model_file_name.encode()) - if not model: + if not model: print("can't open model file %s" % model_file_name) return None model = toPyModel(model) @@ -57,14 +57,14 @@ def evaluations(ty, pv): total_correct = total_error = 0 sumv = sumy = sumvv = sumyy = sumvy = 0 for v, y in zip(pv, ty): - if y == v: + if y == v: total_correct += 1 total_error += (v-y)*(v-y) sumv += v sumy += y sumvv += v*v sumyy += y*y - sumvy += v*y + sumvy += v*y l = len(ty) ACC = 100.0*total_correct/l MSE = total_error/l @@ -76,38 +76,40 @@ def evaluations(ty, pv): def train(arg1, arg2=None, arg3=None): """ - train(y, x [, 'options']) -> model | ACC + train(y, x [, 'options']) -> model | ACC train(prob, [, 'options']) -> model | ACC train(prob, param) -> model | ACC Train a model from data (y, x) or a problem prob using - 'options' or a parameter param. + 'options' or a parameter param. If '-v' is specified in 'options' (i.e., cross validation) either accuracy (ACC) or mean-squared error (MSE) is returned. 'options': -s type : set type of solver (default 1) + for multi-class classification 0 -- L2-regularized logistic regression (primal) - 1 -- L2-regularized L2-loss support vector classification (dual) + 1 -- L2-regularized L2-loss support vector classification (dual) 2 -- L2-regularized L2-loss support vector classification (primal) 3 -- L2-regularized L1-loss support vector classification (dual) - 4 -- multi-class support vector classification by Crammer and Singer + 4 -- support vector classification by Crammer and Singer 5 -- L1-regularized L2-loss support vector classification 6 -- L1-regularized logistic regression 7 -- L2-regularized logistic regression (dual) - 11 -- L2-regularized L2-loss epsilon support vector regression (primal) - 12 -- L2-regularized L2-loss epsilon support vector regression (dual) - 13 -- L2-regularized L1-loss epsilon support vector regression (dual) + for regression + 11 -- L2-regularized L2-loss support vector regression (primal) + 12 -- L2-regularized L2-loss support vector regression (dual) + 13 -- L2-regularized L1-loss support vector regression (dual) -c cost : set the parameter C (default 1) - -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1) + -p epsilon : set the epsilon in loss function of SVR (default 0.1) -e epsilon : set tolerance of termination criterion - -s 0 and 2 - |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2, + -s 0 and 2 + |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2, where f is the primal function, (default 0.01) -s 11 - |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001) + |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001) -s 1, 3, 4, and 7 - Dual maximal violation <= eps; similar to liblinear (default 0.1) + Dual maximal violation <= eps; similar to liblinear (default 0.) -s 5 and 6 |f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf, where f is the primal function (default 0.01) @@ -164,23 +166,28 @@ def predict(y, x, m, options=""): """ predict(y, x, m [, "options"]) -> (p_labels, p_acc, p_vals) - Predict data (y, x) with the SVM model m. - "options": + Predict data (y, x) with the SVM model m. + "options": -b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only + -q quiet mode (no outputs) The return tuple contains p_labels: a list of predicted labels - p_acc: a tuple including accuracy (for classification), mean-squared + p_acc: a tuple including accuracy (for classification), mean-squared error, and squared correlation coefficient (for regression). - p_vals: a list of decision values or probability estimates (if '-b 1' + p_vals: a list of decision values or probability estimates (if '-b 1' is specified). If k is the number of classes, for decision values, each element includes results of predicting k binary-class - SVMs. if k = 2 and solver is not MCSVM_CS, only one decision value - is returned. For probabilities, each element contains k values + SVMs. if k = 2 and solver is not MCSVM_CS, only one decision value + is returned. For probabilities, each element contains k values indicating the probability that the testing instance is in each class. Note that the order of classes here is the same as 'model.label' field in the model structure. """ + + def info(s): + print(s) + predict_probability = 0 argv = options.split() i = 0 @@ -188,6 +195,8 @@ def predict(y, x, m, options=""): if argv[i] == '-b': i += 1 predict_probability = int(argv[i]) + elif argv[i] == '-q': + info = print_null else: raise ValueError("Wrong options") i+=1 @@ -233,9 +242,9 @@ def predict(y, x, m, options=""): ACC, MSE, SCC = evaluations(y, pred_labels) l = len(y) if solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]: - print("Mean squared error = %g (regression)" % MSE) - print("Squared correlation coefficient = %g (regression)" % SCC) + info("Mean squared error = %g (regression)" % MSE) + info("Squared correlation coefficient = %g (regression)" % SCC) else: - print("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l)) + info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l)) return pred_labels, (ACC, MSE, SCC), pred_values diff --git a/train.c b/train.c index 79fbc50..c145c4b 100644 --- a/train.c +++ b/train.c @@ -16,19 +16,21 @@ void exit_with_help() "Usage: train [options] training_set_file [model_file]\n" "options:\n" "-s type : set type of solver (default 1)\n" + " for multi-class classification\n" " 0 -- L2-regularized logistic regression (primal)\n" " 1 -- L2-regularized L2-loss support vector classification (dual)\n" " 2 -- L2-regularized L2-loss support vector classification (primal)\n" " 3 -- L2-regularized L1-loss support vector classification (dual)\n" - " 4 -- multi-class support vector classification by Crammer and Singer\n" + " 4 -- support vector classification by Crammer and Singer\n" " 5 -- L1-regularized L2-loss support vector classification\n" " 6 -- L1-regularized logistic regression\n" " 7 -- L2-regularized logistic regression (dual)\n" - " 11 -- L2-regularized L2-loss epsilon support vector regression (primal)\n" - " 12 -- L2-regularized L2-loss epsilon support vector regression (dual)\n" - " 13 -- L2-regularized L1-loss epsilon support vector regression (dual)\n" + " for regression\n" + " 11 -- L2-regularized L2-loss support vector regression (primal)\n" + " 12 -- L2-regularized L2-loss support vector regression (dual)\n" + " 13 -- L2-regularized L1-loss support vector regression (dual)\n" "-c cost : set the parameter C (default 1)\n" - "-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n" + "-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n" "-e epsilon : set tolerance of termination criterion\n" " -s 0 and 2\n" " |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n"