From b98b69961bd0e3f25f7d85d216fc58c92f7f5d73 Mon Sep 17 00:00:00 2001
From: popo <popo@16e7d947-dcc2-db11-b54a-0017319806e7>
Date: Tue, 17 Apr 2012 04:50:50 +0000
Subject: [PATCH] Add -s 11, 12, 13 to python and matlab interfaces.

---
 matlab/README                |  14 +++--
 matlab/linear_model_matlab.c |  12 ++--
 matlab/predict.c             |  54 +++++++++++++-----
 matlab/train.c               | 103 +++++++++++++++++++++++++++--------
 python/README                |  12 +++-
 python/liblinear.py          |  34 ++++++++----
 python/liblinearutil.py      |  76 ++++++++++++++++++--------
 7 files changed, 221 insertions(+), 84 deletions(-)

diff --git a/matlab/README b/matlab/README
index 402a155..d0fc646 100644
--- a/matlab/README
+++ b/matlab/README
@@ -18,7 +18,7 @@ Introduction
 ============
 
 This tool provides a simple interface to LIBLINEAR, a library for
-large-scale regularized linear classification
+large-scale regularized linear classification and regression
 (http://www.csie.ntu.edu.tw/~cjlin/liblinear).  It is very easy to use
 as the usage and the way of specifying parameters are the same as that
 of LIBLINEAR.
@@ -117,11 +117,11 @@ prediction.  It is a structure and is organized as [Parameters, nr_class,
 nr_feature, bias, Label, w]:
 
         -Parameters: Parameters
-        -nr_class: number of classes
+        -nr_class: number of classes; = 2 for regression
         -nr_feature: number of features in training data (without including the bias term)
         -bias: If >= 0, we assume one additional feature is added to the end
             of each data instance.
-        -Label: label of each class
+        -Label: label of each class; empty for regression
         -w: a nr_w-by-n matrix for the weights, where n is nr_feature
             or nr_feature+1 depending on the existence of the bias term.
             nr_w is 1 if nr_class=2 and -s is not 4 (i.e., not
@@ -129,14 +129,16 @@ nr_feature, bias, Label, w]:
             nr_class otherwise.
 
 If the '-v' option is specified, cross validation is conducted and the
-returned model is just a scalar: cross-validation accuracy.
+returned model is just a scalar: cross-validation accuracy for 
+classification and mean-squared error for regression.
 
 Result of Prediction
 ====================
 
 The function 'predict' has three outputs. The first one,
-predicted_label, is a vector of predicted labels.
-The second output is a scalar meaning accuracy.
+predicted_label, is a vector of predicted labels. The second output,
+accuracy, is a vector including accuracy (for classification), mean
+squared error, and squared correlation coefficient (for regression).
 The third is a matrix containing decision values or probability
 estimates (if '-b 1' is specified). If k is the number of classes
 and k' is the number of classifiers (k'=1 if k=2, otherwise k'=k), for decision values,
diff --git a/matlab/linear_model_matlab.c b/matlab/linear_model_matlab.c
index 2d4076e..7b5129e 100644
--- a/matlab/linear_model_matlab.c
+++ b/matlab/linear_model_matlab.c
@@ -154,10 +154,14 @@ const char *matlab_matrix_to_model(struct model *model_, const mxArray *matlab_s
 		n=model_->nr_feature;
 	w_size = n;
 
-	ptr = mxGetPr(rhs[id]);
-	model_->label=Malloc(int, model_->nr_class);
-	for(i=0; i<model_->nr_class; i++)
-		model_->label[i]=(int)ptr[i];
+	// Label
+	if(mxIsEmpty(rhs[id]) == 0)
+	{
+		model_->label = Malloc(int, model_->nr_class);
+		ptr = mxGetPr(rhs[id]);
+		for(i=0;i<model_->nr_class;i++)
+			model_->label[i] = (int)ptr[i];
+	}
 	id++;
 
 	ptr = mxGetPr(rhs[id]);
diff --git a/matlab/predict.c b/matlab/predict.c
index 972d249..84e5dd8 100644
--- a/matlab/predict.c
+++ b/matlab/predict.c
@@ -58,13 +58,15 @@ void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, co
 	int label_vector_row_num, label_vector_col_num;
 	int feature_number, testing_instance_number;
 	int instance_index;
-	double *ptr_instance, *ptr_label, *ptr_predict_label;
+	double *ptr_label, *ptr_predict_label;
 	double *ptr_prob_estimates, *ptr_dec_values, *ptr;
 	struct feature_node *x;
 	mxArray *pplhs[1]; // instance sparse matrix in row format
 
 	int correct = 0;
 	int total = 0;
+	double error = 0;
+	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
 
 	int nr_class=get_nr_class(model_);
 	int nr_w;
@@ -100,7 +102,6 @@ void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, co
 		return;
 	}
 
-	ptr_instance = mxGetPr(prhs[1]);
 	ptr_label    = mxGetPr(prhs[0]);
 
 	// transpose instance matrix
@@ -134,42 +135,63 @@ void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, co
 	for(instance_index=0;instance_index<testing_instance_number;instance_index++)
 	{
 		int i;
-		double target,v;
+		double target_label, predict_label;
 
-		target = ptr_label[instance_index];
+		target_label = ptr_label[instance_index];
 
 		// prhs[1] and prhs[1]^T are sparse
 		read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias);
 
 		if(predict_probability_flag)
 		{
-			v = predict_probability(model_, x, prob_estimates);
-			ptr_predict_label[instance_index] = v;
+			predict_label = predict_probability(model_, x, prob_estimates);
+			ptr_predict_label[instance_index] = predict_label;
 			for(i=0;i<nr_class;i++)
 				ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i];
 		}
 		else
 		{
 			double *dec_values = Malloc(double, nr_class);
-			v = predict(model_, x);
-			ptr_predict_label[instance_index] = v;
+			predict_label = predict_values(model_, x, dec_values);
+			ptr_predict_label[instance_index] = predict_label;
 
-			predict_values(model_, x, dec_values);
 			for(i=0;i<nr_w;i++)
 				ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i];
 			free(dec_values);
 		}
 
-		if(v == target)
+		if(predict_label == target_label)
 			++correct;
+		error += (predict_label-target_label)*(predict_label-target_label);
+		sump += predict_label;
+		sumt += target_label;
+		sumpp += predict_label*predict_label;
+		sumtt += target_label*target_label;
+		sumpt += predict_label*target_label;
+
 		++total;
 	}
-	mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);
+	
+	if(model_->param.solver_type==L2R_L2LOSS_SVR || 
+           model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || 
+           model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
+        {
+                mexPrintf("Mean squared error = %g (regression)\n",error/total);
+                mexPrintf("Squared correlation coefficient = %g (regression)\n",
+                       ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
+                       ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
+                       );
+        }
+	else
+		mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);
 
 	// return accuracy, mean squared error, squared correlation coefficient
-	plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL);
+	plhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL);
 	ptr = mxGetPr(plhs[1]);
-	ptr[0] = (double) correct/total*100;
+	ptr[0] = (double)correct/total*100;
+	ptr[1] = error/total;
+	ptr[2] = ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
+				((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt));
 
 	free(x);
 	if(prob_estimates != NULL)
@@ -182,7 +204,11 @@ void exit_with_help()
 			"Usage: [predicted_label, accuracy, decision_values/prob_estimates] = predict(testing_label_vector, testing_instance_matrix, model, 'liblinear_options','col')\n"
 			"liblinear_options:\n"
 			"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
-			"col: if 'col' is setted testing_instance_matrix is parsed in column format, otherwise is in row format"
+			"col: if 'col' is setted testing_instance_matrix is parsed in column format, otherwise is in row format\n"
+			"Returns:\n"
+			"  predicted_label: prediction output vector.\n"
+			"  accuracy: a vector with accuracy, mean squared error, squared correlation coefficient.\n"
+			"  prob_estimates: If selected, probability estimate vector.\n"
 			);
 }
 
diff --git a/matlab/train.c b/matlab/train.c
index 1094a51..7bab35f 100644
--- a/matlab/train.c
+++ b/matlab/train.c
@@ -27,25 +27,34 @@ void exit_with_help()
 	"Usage: model = train(training_label_vector, training_instance_matrix, 'liblinear_options', 'col');\n"
 	"liblinear_options:\n"
 	"-s type : set type of solver (default 1)\n"
-	"	0 -- L2-regularized logistic regression (primal)\n"
-	"	1 -- L2-regularized L2-loss support vector classification (dual)\n"	
-	"	2 -- L2-regularized L2-loss support vector classification (primal)\n"
-	"	3 -- L2-regularized L1-loss support vector classification (dual)\n"
-	"	4 -- multi-class support vector classification by Crammer and Singer\n"
-	"	5 -- L1-regularized L2-loss support vector classification\n"
-	"	6 -- L1-regularized logistic regression\n"
-	"	7 -- L2-regularized logistic regression (dual)\n"
+	"	 0 -- L2-regularized logistic regression (primal)\n"
+	"	 1 -- L2-regularized L2-loss support vector classification (dual)\n"	
+	"	 2 -- L2-regularized L2-loss support vector classification (primal)\n"
+	"	 3 -- L2-regularized L1-loss support vector classification (dual)\n"
+	"	 4 -- multi-class support vector classification by Crammer and Singer\n"
+	"	 5 -- L1-regularized L2-loss support vector classification\n"
+	"	 6 -- L1-regularized logistic regression\n"
+	"	 7 -- L2-regularized logistic regression (dual)\n"
+	"	11 -- L2-regularized L2-loss epsilon support vector regression (primal)\n"
+	"	12 -- L2-regularized L2-loss epsilon support vector regression (dual)\n"
+	"	13 -- L2-regularized L1-loss epsilon support vector regression (dual)\n"
 	"-c cost : set the parameter C (default 1)\n"
+	"-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
 	"-e epsilon : set tolerance of termination criterion\n"
 	"	-s 0 and 2\n" 
 	"		|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n" 
 	"		where f is the primal function and pos/neg are # of\n" 
 	"		positive/negative data (default 0.01)\n"
+	"	-s 11\n"
+	"		|f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)\n" 
 	"	-s 1, 3, 4 and 7\n"
 	"		Dual maximal violation <= eps; similar to libsvm (default 0.1)\n"
 	"	-s 5 and 6\n"
 	"		|f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n"
 	"		where f is the primal function (default 0.01)\n"
+	"	-s 12 and 13\n"
+	"		|f'(alpha)|_1 <= eps |f'(alpha0)|,\n"
+	"		where f is the dual function (default 0.1)\n"
 	"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
 	"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
 	"-v n: n-fold cross validation mode\n"
@@ -69,16 +78,42 @@ double do_cross_validation()
 {
 	int i;
 	int total_correct = 0;
-	int *target = Malloc(int,prob.l);
+	double total_error = 0;
+	double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
+	double *target = Malloc(double, prob.l);
 	double retval = 0.0;
 
 	cross_validation(&prob,&param,nr_fold,target);
-
-	for(i=0;i<prob.l;i++)
-		if(target[i] == prob.y[i])
-			++total_correct;
-	mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
-	retval = 100.0*total_correct/prob.l;
+	if(param.solver_type == L2R_L2LOSS_SVR || 
+	   param.solver_type == L2R_L1LOSS_SVR_DUAL || 
+	   param.solver_type == L2R_L2LOSS_SVR_DUAL)
+	{
+		for(i=0;i<prob.l;i++)
+                {
+                        double y = prob.y[i];
+                        double v = target[i];
+                        total_error += (v-y)*(v-y);
+                        sumv += v;
+                        sumy += y;
+                        sumvv += v*v;
+                        sumyy += y*y;
+                        sumvy += v*y;
+                }
+                printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
+                printf("Cross Validation Squared correlation coefficient = %g\n",
+                        ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
+                        ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
+                        );
+		retval = total_error/prob.l;
+	}
+	else
+	{
+		for(i=0;i<prob.l;i++)
+			if(target[i] == prob.y[i])
+				++total_correct;
+		printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
+		retval = 100.0*total_correct/prob.l;
+	}
 
 	free(target);
 	return retval;
@@ -96,6 +131,7 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
 	param.solver_type = L2R_L2LOSS_SVC_DUAL;
 	param.C = 1;
 	param.eps = INF; // see setting below
+	param.p = 0.1;
 	param.nr_weight = 0;
 	param.weight_label = NULL;
 	param.weight = NULL;
@@ -138,6 +174,9 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
 			case 'c':
 				param.C = atof(argv[i]);
 				break;
+			case 'p':
+				param.p = atof(argv[i]);
+				break;
 			case 'e':
 				param.eps = atof(argv[i]);
 				break;
@@ -174,12 +213,30 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
 
 	if(param.eps == INF)
 	{
-		if(param.solver_type == L2R_LR || param.solver_type == L2R_L2LOSS_SVC)
-			param.eps = 0.01;
-		else if(param.solver_type == L2R_L2LOSS_SVC_DUAL || param.solver_type == L2R_L1LOSS_SVC_DUAL || param.solver_type == MCSVM_CS || param.solver_type == L2R_LR_DUAL)
-			param.eps = 0.1;
-		else if(param.solver_type == L1R_L2LOSS_SVC || param.solver_type == L1R_LR)
-			param.eps = 0.01;
+		switch(param.solver_type)
+		{
+			case L2R_LR: 
+			case L2R_L2LOSS_SVC:
+				param.eps = 0.01;
+				break;
+			case L2R_L2LOSS_SVR:
+				param.eps = 0.001;
+				break;
+			case L2R_L2LOSS_SVC_DUAL: 
+			case L2R_L1LOSS_SVC_DUAL: 
+			case MCSVM_CS: 
+			case L2R_LR_DUAL: 
+				param.eps = 0.1;
+				break;
+			case L1R_L2LOSS_SVC: 
+			case L1R_LR:
+				param.eps = 0.01;
+				break;
+			case L2R_L1LOSS_SVR_DUAL:
+			case L2R_L2LOSS_SVR_DUAL:
+				param.eps = 0.1;
+				break;
+		}
 	}
 	return 0;
 }
@@ -238,7 +295,7 @@ int read_problem_sparse(const mxArray *label_vec, const mxArray *instance_mat)
 	elements = num_samples + prob.l*2;
 	max_index = (int) mxGetM(instance_mat_col);
 
-	prob.y = Malloc(int, prob.l);
+	prob.y = Malloc(double, prob.l);
 	prob.x = Malloc(struct feature_node*, prob.l);
 	x_space = Malloc(struct feature_node, elements);
 
@@ -248,7 +305,7 @@ int read_problem_sparse(const mxArray *label_vec, const mxArray *instance_mat)
 	for(i=0;i<prob.l;i++)
 	{
 		prob.x[i] = &x_space[j];
-		prob.y[i] = (int) labels[i];
+		prob.y[i] = labels[i];
 		low = (int) jc[i], high = (int) jc[i+1];
 		for(k=low;k<high;k++)
 		{
diff --git a/python/README b/python/README
index 7eca2a7..e856e74 100644
--- a/python/README
+++ b/python/README
@@ -63,7 +63,7 @@ in liblinearutil.py and the usage is the same as the LIBLINEAR MATLAB interface.
 >>> save_model('heart_scale.model', m)
 >>> m = load_model('heart_scale.model')
 >>> p_label, p_acc, p_val = predict(y, x, m, '-b 1')
->>> ACC = evaluations(y, p_label)
+>>> ACC, MSE, SCC = evaluations(y, p_val)
 
 # Getting online help
 >>> help(train)
@@ -278,7 +278,9 @@ The above command loads
 
     p_labels: a list of predicted labels
 
-    p_acc: testing accuracy
+    p_acc: a tuple including accuracy (for classification), mean
+           squared error, and squared correlation coefficient (for
+           regression).
 
     p_vals: a list of decision values or probability estimates (if '-b 1' 
             is specified). If k is the number of classes, for decision values,
@@ -307,7 +309,7 @@ The above command loads
     Calculate some evaluations using the true values (ty) and predicted
     values (pv):
 
-    >>> ACC = evaluations(ty, pv)
+    >>> (ACC, MSE, SCC) = evaluations(ty, pv)
 
     ty: a list of true values.
 
@@ -315,6 +317,10 @@ The above command loads
 
     ACC: accuracy.
 
+    MSE: mean squared error.
+
+    SCC: squared correlation coefficient.
+
 
 Additional Information
 ======================
diff --git a/python/liblinear.py b/python/liblinear.py
index 5872a4a..890e18a 100644
--- a/python/liblinear.py
+++ b/python/liblinear.py
@@ -20,8 +20,11 @@ else:
 
 # Construct constants
 SOLVER_TYPE = ['L2R_LR', 'L2R_L2LOSS_SVC_DUAL', 'L2R_L2LOSS_SVC', 'L2R_L1LOSS_SVC_DUAL',\
-		'MCSVM_CS', 'L1R_L2LOSS_SVC', 'L1R_LR', 'L2R_LR_DUAL']
-for i, s in enumerate(SOLVER_TYPE): exec("%s = %d" % (s , i))
+		'MCSVM_CS', 'L1R_L2LOSS_SVC', 'L1R_LR', 'L2R_LR_DUAL', \
+		None, None, None, \
+		'L2R_L2LOSS_SVR', 'L2R_L2LOSS_SVR_DUAL', 'L2R_L1LOSS_SVR_DUAL']
+for i, s in enumerate(SOLVER_TYPE): 
+	if s is not None: exec("%s = %d" % (s , i))
 
 PRINT_STRING_FUN = CFUNCTYPE(None, c_char_p)
 def print_null(s): 
@@ -68,7 +71,7 @@ def gen_feature_nodearray(xi, feature_max=None, issparse=True):
 
 class problem(Structure):
 	_names = ["l", "n", "y", "x", "bias"]
-	_types = [c_int, c_int, POINTER(c_int), POINTER(POINTER(feature_node)), c_double]
+	_types = [c_int, c_int, POINTER(c_double), POINTER(POINTER(feature_node)), c_double]
 	_fields_ = genFields(_names, _types)
 
 	def __init__(self, y, x, bias = -1):
@@ -85,7 +88,7 @@ class problem(Structure):
 			max_idx = max(max_idx, tmp_idx)
 		self.n = max_idx
 
-		self.y = (c_int * l)()
+		self.y = (c_double * l)()
 		for i, yi in enumerate(y): self.y[i] = y[i]
 
 		self.x = (POINTER(feature_node) * l)() 
@@ -109,8 +112,8 @@ class problem(Structure):
 
 
 class parameter(Structure):
-	_names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight"]
-	_types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double)]
+	_names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p"]
+	_types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double]
 	_fields_ = genFields(_names, _types)
 
 	def __init__(self, options = None):
@@ -128,6 +131,7 @@ class parameter(Structure):
 		self.solver_type = L2R_L2LOSS_SVC_DUAL
 		self.eps = float('inf')
 		self.C = 1
+		self.p = 0.1
 		self.nr_weight = 0
 		self.weight_label = (c_int * 0)()
 		self.weight = (c_double * 0)()
@@ -151,6 +155,9 @@ class parameter(Structure):
 			elif argv[i] == "-c":
 				i = i + 1
 				self.C = float(argv[i])
+			elif argv[i] == "-p":
+				i = i + 1
+				self.p = float(argv[i])
 			elif argv[i] == "-e":
 				i = i + 1
 				self.eps = float(argv[i])
@@ -185,11 +192,14 @@ class parameter(Structure):
 		if self.eps == float('inf'):
 			if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
 				self.eps = 0.01
+			elif self.solver_type in [L2R_L2LOSS_SVR]:
+				self.eps = 0.001
 			elif self.solver_type in [L2R_L2LOSS_SVC_DUAL, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L2R_LR_DUAL]:
 				self.eps = 0.1
 			elif self.solver_type in [L1R_L2LOSS_SVC, L1R_LR]:
 				self.eps = 0.01
-
+			elif self.solver_type in [L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
+				self.eps = 0.1
 
 class model(Structure):
 	_names = ["param", "nr_class", "nr_feature", "w", "label", "bias"]
@@ -212,7 +222,7 @@ class model(Structure):
 
 	def get_labels(self):
 		nr_class = self.get_nr_class()
-		labels = (c_int * nr_class)()
+		labels = (c_double * nr_class)()
 		liblinear.get_labels(self, labels)
 		return labels[:nr_class]
 
@@ -232,11 +242,11 @@ def toPyModel(model_ptr):
 	return m
 
 fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
-fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_int)])
+fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
 
-fillprototype(liblinear.predict_values, c_int, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
-fillprototype(liblinear.predict, c_int, [POINTER(model), POINTER(feature_node)])
-fillprototype(liblinear.predict_probability, c_int, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
+fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
+fillprototype(liblinear.predict, c_double, [POINTER(model), POINTER(feature_node)])
+fillprototype(liblinear.predict_probability, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
 
 fillprototype(liblinear.save_model, c_int, [c_char_p, POINTER(model)])
 fillprototype(liblinear.load_model, POINTER(model), [c_char_p])
diff --git a/python/liblinearutil.py b/python/liblinearutil.py
index 42f7352..445dfae 100644
--- a/python/liblinearutil.py
+++ b/python/liblinearutil.py
@@ -30,7 +30,7 @@ def load_model(model_file_name):
 	
 	Load a LIBLINEAR model from model_file_name and return.
 	"""
-	model = liblinear.load_model(model_file_name.encode())
+	model = liblinear.load_model(model_file_name)
 	if not model: 
 		print("can't open model file %s" % model_file_name)
 		return None
@@ -47,19 +47,32 @@ def save_model(model_file_name, model):
 
 def evaluations(ty, pv):
 	"""
-	evaluations(ty, pv) -> ACC
+	evaluations(ty, pv) -> (ACC, MSE, SCC)
 
-	Calculate accuracy using the true values (ty) and predicted values (pv).
+	Calculate accuracy, mean squared error and squared correlation coefficient
+	using the true values (ty) and predicted values (pv).
 	"""
 	if len(ty) != len(pv):
 		raise ValueError("len(ty) must equal to len(pv)")
 	total_correct = total_error = 0
+	sumv = sumy = sumvv = sumyy = sumvy = 0
 	for v, y in zip(pv, ty):
 		if y == v: 
 			total_correct += 1
+		total_error += (v-y)*(v-y)
+		sumv += v
+		sumy += y
+		sumvv += v*v
+		sumyy += y*y
+		sumvy += v*y 
 	l = len(ty)
 	ACC = 100.0*total_correct/l
-	return ACC
+	MSE = total_error/l
+	try:
+		SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
+	except:
+		SCC = float('nan')
+	return (ACC, MSE, SCC)
 
 def train(arg1, arg2=None, arg3=None):
 	"""
@@ -70,28 +83,37 @@ def train(arg1, arg2=None, arg3=None):
 	Train a model from data (y, x) or a problem prob using
 	'options' or a parameter param. 
 	If '-v' is specified in 'options' (i.e., cross validation)
-	accuracy (ACC) is returned.
+	either accuracy (ACC) or mean-squared error (MSE) is returned.
 
 	'options':
 		-s type : set type of solver (default 1)
-			0 -- L2-regularized logistic regression (primal)
-			1 -- L2-regularized L2-loss support vector classification (dual)	
-			2 -- L2-regularized L2-loss support vector classification (primal)
-			3 -- L2-regularized L1-loss support vector classification (dual)
-			4 -- multi-class support vector classification by Crammer and Singer
-			5 -- L1-regularized L2-loss support vector classification
-			6 -- L1-regularized logistic regression
-			7 -- L2-regularized logistic regression (dual)
+			 0 -- L2-regularized logistic regression (primal)
+			 1 -- L2-regularized L2-loss support vector classification (dual)	
+			 2 -- L2-regularized L2-loss support vector classification (primal)
+			 3 -- L2-regularized L1-loss support vector classification (dual)
+			 4 -- multi-class support vector classification by Crammer and Singer
+			 5 -- L1-regularized L2-loss support vector classification
+			 6 -- L1-regularized logistic regression
+			 7 -- L2-regularized logistic regression (dual)
+			11 -- L2-regularized L2-loss epsilon support vector regression (primal)
+			12 -- L2-regularized L2-loss epsilon support vector regression (dual)
+			13 -- L2-regularized L1-loss epsilon support vector regression (dual)
 		-c cost : set the parameter C (default 1)
+		-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
 		-e epsilon : set tolerance of termination criterion
 			-s 0 and 2 
 				|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2, 
 				where f is the primal function, (default 0.01)
+			-s 11
+				|f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001) 
 			-s 1, 3, 4, and 7
 				Dual maximal violation <= eps; similar to liblinear (default 0.1)
 			-s 5 and 6
 				|f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
 				where f is the primal function (default 0.01)
+			-s 12 and 13
+				|f'(alpha)|_1 <= eps |f'(alpha0)|,
+				where f is the dual function (default 0.1)
 		-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
 		-wi weight: weights adjust the parameter C of different classes (see README for details)
 		-v n: n-fold cross validation mode
@@ -120,11 +142,16 @@ def train(arg1, arg2=None, arg3=None):
 
 	if param.cross_validation:
 		l, nr_fold = prob.l, param.nr_fold
-		target = (c_int * l)()
+		target = (c_double * l)()
 		liblinear.cross_validation(prob, param, nr_fold, target)
-		ACC = evaluations(prob.y[:l], target[:l])
-		print("Cross Validation Accuracy = %g%%" % ACC)
-		return ACC
+		ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
+		if param.solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
+			print("Cross Validation Mean squared error = %g" % MSE)
+			print("Cross Validation Squared correlation coefficient = %g" % SCC)
+			return MSE
+		else:
+			print("Cross Validation Accuracy = %g%%" % ACC)
+			return ACC
 	else :
 		m = liblinear.train(prob, param)
 		m = toPyModel(m)
@@ -143,7 +170,8 @@ def predict(y, x, m, options=""):
 
 	The return tuple contains
 	p_labels: a list of predicted labels
-	p_acc: testing accuracy. 
+	p_acc: a tuple including  accuracy (for classification), mean-squared 
+	       error, and squared correlation coefficient (for regression).
 	p_vals: a list of decision values or probability estimates (if '-b 1' 
 	        is specified). If k is the number of classes, for decision values,
 	        each element includes results of predicting k binary-class
@@ -164,6 +192,7 @@ def predict(y, x, m, options=""):
 			raise ValueError("Wrong options")
 		i+=1
 
+	solver_type = m.param.solver_type
 	nr_class = m.get_nr_class()
 	nr_feature = m.get_nr_feature()
 	is_prob_model = m.is_probability_model()
@@ -201,9 +230,12 @@ def predict(y, x, m, options=""):
 			pred_values += [values]
 	if len(y) == 0:
 		y = [0] * len(x)
-	ACC = evaluations(y, pred_labels)
+	ACC, MSE, SCC = evaluations(y, pred_labels)
 	l = len(y)
-	print("Accuracy = %g%% (%d/%d)" % (ACC, int(l*ACC//100), l))
+	if solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
+		print("Mean squared error = %g (regression)" % MSE)
+		print("Squared correlation coefficient = %g (regression)" % SCC)
+	else:
+		print("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
 
-	return pred_labels, ACC, pred_values
-	
+	return pred_labels, (ACC, MSE, SCC), pred_values
-- 
2.50.1