-LIBLINEAR is a simple package for solving large-scale regularized linear
+lIBLINEAR is a simple package for solving large-scale regularized linear
classification and regression. It currently supports
- L2-regularized logistic regression/L2-loss support vector classification/L1-loss support vector classification
- L1-regularized L2-loss support vector classification/L1-regularized logistic regression
Usage: train [options] training_set_file [model_file]
options:
-s type : set type of solver (default 1)
+ for multi-class classification
0 -- L2-regularized logistic regression (primal)
1 -- L2-regularized L2-loss support vector classification (dual)
2 -- L2-regularized L2-loss support vector classification (primal)
3 -- L2-regularized L1-loss support vector classification (dual)
- 4 -- multi-class support vector classification by Crammer and Singer
+ 4 -- support vector classification by Crammer and Singer
5 -- L1-regularized L2-loss support vector classification
6 -- L1-regularized logistic regression
7 -- L2-regularized logistic regression (dual)
- 11 -- L2-regularized L2-loss epsilon support vector regression (primal)
- 12 -- L2-regularized L2-loss epsilon support vector regression (dual)
- 13 -- L2-regularized L1-loss epsilon support vector regression (dual)
+ for regression
+ 11 -- L2-regularized L2-loss support vector regression (primal)
+ 12 -- L2-regularized L2-loss support vector regression (dual)
+ 13 -- L2-regularized L1-loss support vector regression (dual)
-c cost : set the parameter C (default 1)
-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
-e epsilon : set tolerance of termination criterion
Usage: predict [options] test_file model_file output_file
options:
-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
+-q : quiet mode (no outputs)
Note that -b is only needed in the prediction phase. This is different
from the setting of LIBSVM.
};
solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL.
-
+ for classification
L2R_LR L2-regularized logistic regression (primal)
L2R_L2LOSS_SVC_DUAL L2-regularized L2-loss support vector classification (dual)
L2R_L2LOSS_SVC L2-regularized L2-loss support vector classification (primal)
L2R_L1LOSS_SVC_DUAL L2-regularized L1-loss support vector classification (dual)
- MCSVM_CS multi-class support vector classification by Crammer and Singer
+ MCSVM_CS support vector classification by Crammer and Singer
L1R_L2LOSS_SVC L1-regularized L2-loss support vector classification
L1R_LR L1-regularized logistic regression
L2R_LR_DUAL L2-regularized logistic regression (dual)
+ for regression
L2R_L2LOSS_SVR L2-regularized L2-loss support vector regression (primal)
L2R_L2LOSS_SVR_DUAL L2-regularized L2-loss support vector regression (dual)
L2R_L1LOSS_SVR_DUAL L2-regularized L1-loss support vector regression (dual)
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
+int print_null(const char *s,...) {}
+int (*info)(const char *fmt,...) = &mexPrintf;
+
int col_format_flag;
void read_sparse_instance(const mxArray *prhs, int index, struct feature_node *x, int feature_number, double bias)
++total;
}
-
- if(model_->param.solver_type==L2R_L2LOSS_SVR ||
- model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
- model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
- {
- mexPrintf("Mean squared error = %g (regression)\n",error/total);
- mexPrintf("Squared correlation coefficient = %g (regression)\n",
- ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
- ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
- );
- }
+
+ if(model_->param.solver_type==L2R_L2LOSS_SVR ||
+ model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
+ model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
+ {
+ info("Mean squared error = %g (regression)\n",error/total);
+ info("Squared correlation coefficient = %g (regression)\n",
+ ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
+ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
+ );
+ }
else
- mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);
+ info("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);
// return accuracy, mean squared error, squared correlation coefficient
plhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL);
"Usage: [predicted_label, accuracy, decision_values/prob_estimates] = predict(testing_label_vector, testing_instance_matrix, model, 'liblinear_options','col')\n"
"liblinear_options:\n"
"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
+ "-q quiet mode (no outputs)\n"
"col: if 'col' is setted testing_instance_matrix is parsed in column format, otherwise is in row format\n"
"Returns:\n"
" predicted_label: prediction output vector.\n"
{
mxGetString(prhs[4], cmd, mxGetN(prhs[4])+1);
if(strcmp(cmd, "col") == 0)
- {
+ {
col_format_flag = 1;
}
}
for(i=1;i<argc;i++)
{
if(argv[i][0] != '-') break;
- if(++i>=argc)
+ ++i;
+ if(i>=argc && argv[i-1][1] != 'q')
{
exit_with_help();
fake_answer(plhs);
case 'b':
prob_estimate_flag = atoi(argv[i]);
break;
+ case 'q':
+ info = &print_null;
+ i--;
+ break;
default:
mexPrintf("unknown option\n");
exit_with_help();
"Usage: model = train(training_label_vector, training_instance_matrix, 'liblinear_options', 'col');\n"
"liblinear_options:\n"
"-s type : set type of solver (default 1)\n"
+ " for multi-class classification\n"
" 0 -- L2-regularized logistic regression (primal)\n"
" 1 -- L2-regularized L2-loss support vector classification (dual)\n"
" 2 -- L2-regularized L2-loss support vector classification (primal)\n"
" 3 -- L2-regularized L1-loss support vector classification (dual)\n"
- " 4 -- multi-class support vector classification by Crammer and Singer\n"
+ " 4 -- support vector classification by Crammer and Singer\n"
" 5 -- L1-regularized L2-loss support vector classification\n"
" 6 -- L1-regularized logistic regression\n"
" 7 -- L2-regularized logistic regression (dual)\n"
- " 11 -- L2-regularized L2-loss epsilon support vector regression (primal)\n"
- " 12 -- L2-regularized L2-loss epsilon support vector regression (dual)\n"
- " 13 -- L2-regularized L1-loss epsilon support vector regression (dual)\n"
+ " for regression\n"
+ " 11 -- L2-regularized L2-loss support vector regression (primal)\n"
+ " 12 -- L2-regularized L2-loss support vector regression (dual)\n"
+ " 13 -- L2-regularized L1-loss support vector regression (dual)\n"
"-c cost : set the parameter C (default 1)\n"
- "-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
+ "-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n"
"-e epsilon : set tolerance of termination criterion\n"
" -s 0 and 2\n"
" |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n"
#include <errno.h>
#include "linear.h"
+int print_null(const char *s,...) {}
+
+static int (*info)(const char *fmt,...) = &printf;
+
struct feature_node *x;
int max_nr_attr = 64;
static char* readline(FILE *input)
{
int len;
-
+
if(fgets(line,max_line_len,input) == NULL)
return NULL;
labels=(int *) malloc(nr_class*sizeof(int));
get_labels(model_,labels);
prob_estimates = (double *) malloc(nr_class*sizeof(double));
- fprintf(output,"labels");
+ fprintf(output,"labels");
for(j=0;j<nr_class;j++)
fprintf(output," %d",labels[j]);
fprintf(output,"\n");
if(predict_label == target_label)
++correct;
error += (predict_label-target_label)*(predict_label-target_label);
- sump += predict_label;
- sumt += target_label;
- sumpp += predict_label*predict_label;
- sumtt += target_label*target_label;
- sumpt += predict_label*target_label;
- ++total;
- }
- if(model_->param.solver_type==L2R_L2LOSS_SVR ||
- model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
- model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
- {
- printf("Mean squared error = %g (regression)\n",error/total);
- printf("Squared correlation coefficient = %g (regression)\n",
- ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
- ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
- );
+ sump += predict_label;
+ sumt += target_label;
+ sumpp += predict_label*predict_label;
+ sumtt += target_label*target_label;
+ sumpt += predict_label*target_label;
+ ++total;
+ }
+ if(model_->param.solver_type==L2R_L2LOSS_SVR ||
+ model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
+ model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
+ {
+ info("Mean squared error = %g (regression)\n",error/total);
+ info("Squared correlation coefficient = %g (regression)\n",
+ ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
+ ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
+ );
}
else
- printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
+ info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
if(flag_predict_probability)
free(prob_estimates);
}
"Usage: predict [options] test_file model_file output_file\n"
"options:\n"
"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
+ "-q : quiet mode (no outputs)\n"
);
exit(1);
}
case 'b':
flag_predict_probability = atoi(argv[i]);
break;
-
+ case 'q':
+ info = &print_null;
+ i--;
+ break;
default:
fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
exit_with_help();
def load_model(model_file_name):
"""
load_model(model_file_name) -> model
-
+
Load a LIBLINEAR model from model_file_name and return.
"""
model = liblinear.load_model(model_file_name.encode())
- if not model:
+ if not model:
print("can't open model file %s" % model_file_name)
return None
model = toPyModel(model)
total_correct = total_error = 0
sumv = sumy = sumvv = sumyy = sumvy = 0
for v, y in zip(pv, ty):
- if y == v:
+ if y == v:
total_correct += 1
total_error += (v-y)*(v-y)
sumv += v
sumy += y
sumvv += v*v
sumyy += y*y
- sumvy += v*y
+ sumvy += v*y
l = len(ty)
ACC = 100.0*total_correct/l
MSE = total_error/l
def train(arg1, arg2=None, arg3=None):
"""
- train(y, x [, 'options']) -> model | ACC
+ train(y, x [, 'options']) -> model | ACC
train(prob, [, 'options']) -> model | ACC
train(prob, param) -> model | ACC
Train a model from data (y, x) or a problem prob using
- 'options' or a parameter param.
+ 'options' or a parameter param.
If '-v' is specified in 'options' (i.e., cross validation)
either accuracy (ACC) or mean-squared error (MSE) is returned.
'options':
-s type : set type of solver (default 1)
+ for multi-class classification
0 -- L2-regularized logistic regression (primal)
- 1 -- L2-regularized L2-loss support vector classification (dual)
+ 1 -- L2-regularized L2-loss support vector classification (dual)
2 -- L2-regularized L2-loss support vector classification (primal)
3 -- L2-regularized L1-loss support vector classification (dual)
- 4 -- multi-class support vector classification by Crammer and Singer
+ 4 -- support vector classification by Crammer and Singer
5 -- L1-regularized L2-loss support vector classification
6 -- L1-regularized logistic regression
7 -- L2-regularized logistic regression (dual)
- 11 -- L2-regularized L2-loss epsilon support vector regression (primal)
- 12 -- L2-regularized L2-loss epsilon support vector regression (dual)
- 13 -- L2-regularized L1-loss epsilon support vector regression (dual)
+ for regression
+ 11 -- L2-regularized L2-loss support vector regression (primal)
+ 12 -- L2-regularized L2-loss support vector regression (dual)
+ 13 -- L2-regularized L1-loss support vector regression (dual)
-c cost : set the parameter C (default 1)
- -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
+ -p epsilon : set the epsilon in loss function of SVR (default 0.1)
-e epsilon : set tolerance of termination criterion
- -s 0 and 2
- |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
+ -s 0 and 2
+ |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
where f is the primal function, (default 0.01)
-s 11
- |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)
+ |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)
-s 1, 3, 4, and 7
- Dual maximal violation <= eps; similar to liblinear (default 0.1)
+ Dual maximal violation <= eps; similar to liblinear (default 0.)
-s 5 and 6
|f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
where f is the primal function (default 0.01)
"""
predict(y, x, m [, "options"]) -> (p_labels, p_acc, p_vals)
- Predict data (y, x) with the SVM model m.
- "options":
+ Predict data (y, x) with the SVM model m.
+ "options":
-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
+ -q quiet mode (no outputs)
The return tuple contains
p_labels: a list of predicted labels
- p_acc: a tuple including accuracy (for classification), mean-squared
+ p_acc: a tuple including accuracy (for classification), mean-squared
error, and squared correlation coefficient (for regression).
- p_vals: a list of decision values or probability estimates (if '-b 1'
+ p_vals: a list of decision values or probability estimates (if '-b 1'
is specified). If k is the number of classes, for decision values,
each element includes results of predicting k binary-class
- SVMs. if k = 2 and solver is not MCSVM_CS, only one decision value
- is returned. For probabilities, each element contains k values
+ SVMs. if k = 2 and solver is not MCSVM_CS, only one decision value
+ is returned. For probabilities, each element contains k values
indicating the probability that the testing instance is in each class.
Note that the order of classes here is the same as 'model.label'
field in the model structure.
"""
+
+ def info(s):
+ print(s)
+
predict_probability = 0
argv = options.split()
i = 0
if argv[i] == '-b':
i += 1
predict_probability = int(argv[i])
+ elif argv[i] == '-q':
+ info = print_null
else:
raise ValueError("Wrong options")
i+=1
ACC, MSE, SCC = evaluations(y, pred_labels)
l = len(y)
if solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
- print("Mean squared error = %g (regression)" % MSE)
- print("Squared correlation coefficient = %g (regression)" % SCC)
+ info("Mean squared error = %g (regression)" % MSE)
+ info("Squared correlation coefficient = %g (regression)" % SCC)
else:
- print("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
+ info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
return pred_labels, (ACC, MSE, SCC), pred_values
"Usage: train [options] training_set_file [model_file]\n"
"options:\n"
"-s type : set type of solver (default 1)\n"
+ " for multi-class classification\n"
" 0 -- L2-regularized logistic regression (primal)\n"
" 1 -- L2-regularized L2-loss support vector classification (dual)\n"
" 2 -- L2-regularized L2-loss support vector classification (primal)\n"
" 3 -- L2-regularized L1-loss support vector classification (dual)\n"
- " 4 -- multi-class support vector classification by Crammer and Singer\n"
+ " 4 -- support vector classification by Crammer and Singer\n"
" 5 -- L1-regularized L2-loss support vector classification\n"
" 6 -- L1-regularized logistic regression\n"
" 7 -- L2-regularized logistic regression (dual)\n"
- " 11 -- L2-regularized L2-loss epsilon support vector regression (primal)\n"
- " 12 -- L2-regularized L2-loss epsilon support vector regression (dual)\n"
- " 13 -- L2-regularized L1-loss epsilon support vector regression (dual)\n"
+ " for regression\n"
+ " 11 -- L2-regularized L2-loss support vector regression (primal)\n"
+ " 12 -- L2-regularized L2-loss support vector regression (dual)\n"
+ " 13 -- L2-regularized L1-loss support vector regression (dual)\n"
"-c cost : set the parameter C (default 1)\n"
- "-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
+ "-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n"
"-e epsilon : set tolerance of termination criterion\n"
" -s 0 and 2\n"
" |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n"