CC ?= gcc
CFLAGS = -Wall -Wconversion -O3 -fPIC
LIBS = blas/blas.a
-SHVER = 2
+SHVER = 3
OS = $(shell uname)
#LIBS = -lblas
-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
-wi weight: weights adjust the parameter C of different classes (see README for details)
-v n: n-fold cross validation mode
+-C : find parameter C (only for -s 0 and 2)
-q : quiet mode (no outputs)
Option -v randomly splits the data into n parts and calculates cross
validation accuracy on them.
+Option -C conducts cross validation under different C values and finds
+the best one. This options is supported only by -s 0 and -s 2. If
+the solver is not specified, -s 2 is used.
+
Formulations:
For L2-regularized logistic regression (-s 0), we solve
Use a smaller stopping tolerance 0.001 than the default
0.1 if you want more accurate solutions.
+> train -C -s 0 data_file
+
+Conduct cross validation many times by logistic regression
+and finds the parameter C which achieves the best cross
+validation accuracy.
+
> train -c 10 -w1 2 -w2 5 -w3 2 four_class_data_file
Train four classifiers:
The format of prob is same as that for train().
+- Function: void find_parameter_C(const struct problem *prob,
+ const struct parameter *param, int nr_fold, double start_C,
+ double max_C, double *best_C, double *best_rate);
+
+ This function is similar to cross_validation. However, instead of
+ conducting cross validation under a specified parameter C, it
+ conducts cross validation many times under parameters C = start_C,
+ 2*start_C, 4*start_C, 8*start_C, ..., and finds the best one with
+ the highest cross validation accuracy.
+
+ If start_C <= 0, then this procedure calculates a small enough C
+ for prob as the start_C. The procedure stops when the models of
+ all folds become stable or C reaches max_C. The best C and the
+ corresponding accuracy are assigned to *best_C and *best_rate,
+ respectively.
+
- Function: double predict(const model *model_, const feature_node *x);
For a classification model, the predicted class for x is returned.
fputs(s,stdout);
fflush(stdout);
}
+static void print_null(const char *s) {}
static void (*liblinear_print_string) (const char *) = &print_string_stdout;
static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
{
- double eps=param->eps;
+ //inner and outer tolerances for TRON
+ double eps = param->eps;
+ double eps_cg = 0.1;
+ if(param->init_sol != NULL)
+ eps_cg = 0.5;
+
int pos = 0;
int neg = 0;
for(int i=0;i<prob->l;i++)
if(prob->y[i] > 0)
pos++;
neg = prob->l - pos;
-
double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
function *fun_obj=NULL;
C[i] = Cn;
}
fun_obj=new l2r_lr_fun(prob, C);
- TRON tron_obj(fun_obj, primal_solver_tol);
+ TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
tron_obj.set_print_string(liblinear_print_string);
tron_obj.tron(w);
delete fun_obj;
C[i] = Cn;
}
fun_obj=new l2r_l2_svc_fun(prob, C);
- TRON tron_obj(fun_obj, primal_solver_tol);
+ TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
tron_obj.set_print_string(liblinear_print_string);
tron_obj.tron(w);
delete fun_obj;
}
}
+// Calculate the initial C for parameter selection
+static double calc_start_C(const problem *prob, const parameter *param)
+{
+ int i;
+ double xTx,max_xTx;
+ max_xTx = 0;
+ for(i=0; i<prob->l; i++)
+ {
+ xTx = 0;
+ feature_node *xi=prob->x[i];
+ while(xi->index != -1)
+ {
+ double val = xi->value;
+ xTx += val*val;
+ xi++;
+ }
+ if(xTx > max_xTx)
+ max_xTx = xTx;
+ }
+
+ double min_C = 1.0;
+ if(param->solver_type == L2R_LR)
+ min_C = 1.0 / (prob->l * max_xTx);
+ else if(param->solver_type == L2R_L2LOSS_SVC)
+ min_C = 1.0 / (2 * prob->l * max_xTx);
+
+ return pow( 2, floor(log(min_C) / log(2.0)) );
+}
+
+
//
// Interface functions
//
model_->w = Malloc(double, w_size);
model_->nr_class = 2;
model_->label = NULL;
- train_one(prob, param, &model_->w[0], 0, 0);
+ train_one(prob, param, model_->w, 0, 0);
}
else
{
sub_prob.y[k] = +1;
for(; k<sub_prob.l; k++)
sub_prob.y[k] = -1;
+
+ if(param->init_sol != NULL)
+ for(i=0;i<w_size;i++)
+ model_->w[i] = param->init_sol[i];
+ else
+ for(i=0;i<w_size;i++)
+ model_->w[i] = 0;
- train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
+ train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
}
else
{
for(; k<sub_prob.l; k++)
sub_prob.y[k] = -1;
+ if(param->init_sol != NULL)
+ for(j=0;j<w_size;j++)
+ w[j] = param->init_sol[j*nr_class+i];
+ else
+ for(j=0;j<w_size;j++)
+ w[j] = 0;
+
train_one(&sub_prob, param, w, weighted_C[i], param->C);
for(int j=0;j<w_size;j++)
free(perm);
}
+void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
+{
+ // variables for CV
+ int i;
+ int *fold_start;
+ int l = prob->l;
+ int *perm = Malloc(int, l);
+ double *target = Malloc(double, prob->l);
+ struct problem *subprob = Malloc(problem,nr_fold);
+
+ // variables for warm start
+ double ratio = 2;
+ double **prev_w = Malloc(double*, nr_fold);
+ for(i = 0; i < nr_fold; i++)
+ prev_w[i] = NULL;
+ int num_unchanged_w = 0;
+ struct parameter param1 = *param;
+ void (*default_print_string) (const char *) = liblinear_print_string;
+
+ if (nr_fold > l)
+ {
+ nr_fold = l;
+ fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
+ }
+ fold_start = Malloc(int,nr_fold+1);
+ for(i=0;i<l;i++) perm[i]=i;
+ for(i=0;i<l;i++)
+ {
+ int j = i+rand()%(l-i);
+ swap(perm[i],perm[j]);
+ }
+ for(i=0;i<=nr_fold;i++)
+ fold_start[i]=i*l/nr_fold;
+
+ for(i=0;i<nr_fold;i++)
+ {
+ int begin = fold_start[i];
+ int end = fold_start[i+1];
+ int j,k;
+
+ subprob[i].bias = prob->bias;
+ subprob[i].n = prob->n;
+ subprob[i].l = l-(end-begin);
+ subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
+ subprob[i].y = Malloc(double,subprob[i].l);
+
+ k=0;
+ for(j=0;j<begin;j++)
+ {
+ subprob[i].x[k] = prob->x[perm[j]];
+ subprob[i].y[k] = prob->y[perm[j]];
+ ++k;
+ }
+ for(j=end;j<l;j++)
+ {
+ subprob[i].x[k] = prob->x[perm[j]];
+ subprob[i].y[k] = prob->y[perm[j]];
+ ++k;
+ }
+
+ }
+
+ *best_rate = 0;
+ if(start_C <= 0)
+ start_C = calc_start_C(prob,param);
+ param1.C = start_C;
+
+ while(param1.C <= max_C)
+ {
+ //Output diabled for running CV at a particular C
+ set_print_string_function(&print_null);
+
+ for(i=0; i<nr_fold; i++)
+ {
+ int j;
+ int begin = fold_start[i];
+ int end = fold_start[i+1];
+
+ param1.init_sol = prev_w[i];
+ struct model *submodel = train(&subprob[i],¶m1);
+
+ int total_w_size;
+ if(submodel->nr_class == 2)
+ total_w_size = subprob[i].n;
+ else
+ total_w_size = subprob[i].n * submodel->nr_class;
+
+ if(prev_w[i] != NULL && num_unchanged_w >= 0)
+ {
+ double norm_w_diff = 0;
+ for(j=0; j<total_w_size; j++)
+ {
+ norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
+ prev_w[i][j] = submodel->w[j];
+ }
+ norm_w_diff = sqrt(norm_w_diff);
+
+ if(norm_w_diff > 1e-15)
+ num_unchanged_w = -1;
+ }
+ else
+ {
+ prev_w[i] = Malloc(double, total_w_size);
+ for(j=0; j<total_w_size; j++)
+ prev_w[i][j] = submodel->w[j];
+ }
+
+ for(j=begin; j<end; j++)
+ target[perm[j]] = predict(submodel,prob->x[perm[j]]);
+
+ free_and_destroy_model(&submodel);
+ }
+ set_print_string_function(default_print_string);
+
+ int total_correct = 0;
+ for(i=0; i<prob->l; i++)
+ if(target[i] == prob->y[i])
+ ++total_correct;
+ double current_rate = (double)total_correct/prob->l;
+ if(current_rate > *best_rate)
+ {
+ *best_C = param1.C;
+ *best_rate = current_rate;
+ }
+
+ info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
+ num_unchanged_w++;
+ if(num_unchanged_w == 3)
+ break;
+ param1.C = param1.C*ratio;
+ }
+
+ if(param1.C > max_C && max_C > start_C)
+ info("warning: maximum C reached.\n");
+ free(fold_start);
+ free(perm);
+ free(target);
+ for(i=0; i<nr_fold; i++)
+ free(prev_w[i]);
+ free(prev_w);
+}
+
double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
{
int idx;
free(param->weight_label);
if(param->weight != NULL)
free(param->weight);
+ if(param->init_sol != NULL)
+ free(param->init_sol);
}
const char *check_parameter(const problem *prob, const parameter *param)
&& param->solver_type != L2R_L1LOSS_SVR_DUAL)
return "unknown solver type";
+ if(param->init_sol != NULL
+ && param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC)
+ return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC";
+
return NULL;
}
int *weight_label;
double* weight;
double p;
+ double *init_sol;
};
struct model
struct model* train(const struct problem *prob, const struct parameter *param);
void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
+void find_parameter_C(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate);
double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
double predict(const struct model *model_, const struct feature_node *x);
If the '-v' option is specified, cross validation is conducted and the
returned model is just a scalar: cross-validation accuracy for
-classification and mean-squared error for regression.
+classification and mean-squared error for regression. If the '-C' option
+is specified, the best parameter C is found by cross validation. The
+returned model is a two dimensional vector, where the first value is
+the best C and the second value is the corresponding cross-validation
+accuracy. The parameter selection utility is supported by only -s 0
+and -s 2.
Result of Prediction
====================
matlab> [predict_label, accuracy, prob_estimates] = predict(heart_scale_label, heart_scale_inst, model, '-b 1');
+Use the best parameter to train (only supported by -s 0 and -s 2):
+
+matlab> best = train(heart_scale_label, heart_scale_inst, '-C -s 0');
+matlab> model = train(heart_scale_label, heart_scale_inst, sprintf('-c %f -s 0', best(1))); % use the same solver: -s 0
+
Additional Information
======================
-#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
"-v n: n-fold cross validation mode\n"
+ "-C : find parameter C (only for -s 0 and 2)\n"
"-q : quiet mode (no outputs)\n"
"col:\n"
" if 'col' is setted, training_instance_matrix is parsed in column format, otherwise is in row format\n"
struct problem prob; // set by read_problem
struct model *model_;
struct feature_node *x_space;
-int cross_validation_flag;
+int flag_cross_validation;
+int flag_find_C;
+int flag_C_specified;
+int flag_solver_specified;
int col_format_flag;
int nr_fold;
double bias;
+
+void do_find_parameter_C(double *best_C, double *best_rate)
+{
+ double start_C;
+ double max_C = 1024;
+ if (flag_C_specified)
+ start_C = param.C;
+ else
+ start_C = -1.0;
+ find_parameter_C(&prob, ¶m, nr_fold, start_C, max_C, best_C, best_rate);
+ mexPrintf("Best C = %lf CV accuracy = %g%%\n", *best_C, 100.0**best_rate);
+}
+
+
double do_cross_validation()
{
int i;
sumyy += y*y;
sumvy += v*y;
}
- printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
- printf("Cross Validation Squared correlation coefficient = %g\n",
+ mexPrintf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
+ mexPrintf("Cross Validation Squared correlation coefficient = %g\n",
((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
);
for(i=0;i<prob.l;i++)
if(target[i] == prob.y[i])
++total_correct;
- printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
+ mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
retval = 100.0*total_correct/prob.l;
}
param.nr_weight = 0;
param.weight_label = NULL;
param.weight = NULL;
- cross_validation_flag = 0;
+ param.init_sol = NULL;
+ flag_cross_validation = 0;
col_format_flag = 0;
+ flag_C_specified = 0;
+ flag_solver_specified = 0;
+ flag_find_C = 0;
bias = -1;
{
if(argv[i][0] != '-') break;
++i;
- if(i>=argc && argv[i-1][1] != 'q') // since option -q has no parameter
+ if(i>=argc && argv[i-1][1] != 'q' && argv[i-1][1] != 'C') // since options -q and -C have no parameter
return 1;
switch(argv[i-1][1])
{
case 's':
param.solver_type = atoi(argv[i]);
+ flag_solver_specified = 1;
break;
case 'c':
param.C = atof(argv[i]);
+ flag_C_specified = 1;
break;
case 'p':
param.p = atof(argv[i]);
bias = atof(argv[i]);
break;
case 'v':
- cross_validation_flag = 1;
+ flag_cross_validation = 1;
nr_fold = atoi(argv[i]);
if(nr_fold < 2)
{
print_func = &print_null;
i--;
break;
+ case 'C':
+ flag_find_C = 1;
+ i--;
+ break;
default:
mexPrintf("unknown option\n");
return 1;
set_print_string_function(print_func);
+ // default solver for parameter selection is L2R_L2LOSS_SVC
+ if(flag_find_C)
+ {
+ if(!flag_cross_validation)
+ nr_fold = 5;
+ if(!flag_solver_specified)
+ {
+ mexPrintf("Solver not specified. Using -s 2\n");
+ param.solver_type = L2R_L2LOSS_SVC;
+ }
+ else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
+ {
+ mexPrintf("Warm-start parameter search only available for -s 0 and -s 2\n");
+ return 1;
+ }
+ }
+
if(param.eps == INF)
{
switch(param.solver_type)
return;
}
- if(cross_validation_flag)
+ if (flag_find_C)
+ {
+ double best_C, best_rate, *ptr;
+
+ do_find_parameter_C(&best_C, &best_rate);
+
+ plhs[0] = mxCreateDoubleMatrix(2, 1, mxREAL);
+ ptr = mxGetPr(plhs[0]);
+ ptr[0] = best_C;
+ ptr[1] = best_rate;
+ }
+ else if(flag_cross_validation)
{
double *ptr;
plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
structure. If '-v' is specified, cross validation is
conducted and the returned model is just a scalar: cross-validation
accuracy for classification and mean-squared error for regression.
+ If the '-C' option is specified, the best parameter C is found
+ by cross validation. The returned model is a tuple of the best C
+ and the corresponding cross-validation accuracy. The parameter
+ selection utility is supported by only -s 0 and -s 2.
+
To train the same data many times with different
parameters, the second and the third ways should be faster..
>>> m = train(prob, '-w1 5 -c 5')
>>> m = train(prob, param)
>>> CV_ACC = train(y, x, '-v 3')
+ >>> best_C, best_rate = train(y, x, '-C -s 0')
+ >>> m = train(y, x, '-c {0} -s 0'.format(best_C)) # use the same solver: -s 0
- Function: predict
if sys.platform == 'win32':
liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll'))
else:
- liblinear = CDLL(path.join(dirname, '../liblinear.so.2'))
+ liblinear = CDLL(path.join(dirname, '../liblinear.so.3'))
except:
# For unix the prefix 'lib' is not considered.
if find_library('linear'):
class parameter(Structure):
- _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p"]
- _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double]
+ _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p", "init_sol"]
+ _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double, POINTER(c_double)]
_fields_ = genFields(_names, _types)
def __init__(self, options = None):
self.C = 1
self.p = 0.1
self.nr_weight = 0
- self.weight_label = (c_int * 0)()
- self.weight = (c_double * 0)()
+ self.weight_label = None
+ self.weight = None
+ self.init_sol = None
self.bias = -1
- self.cross_validation = False
+ self.flag_cross_validation = False
+ self.flag_C_specified = False
+ self.flag_solver_specified = False
+ self.flag_find_C = False
self.nr_fold = 0
self.print_func = cast(None, PRINT_STRING_FUN)
if argv[i] == "-s":
i = i + 1
self.solver_type = int(argv[i])
+ self.flag_solver_specified = True
elif argv[i] == "-c":
i = i + 1
self.C = float(argv[i])
+ self.flag_C_specified = True
elif argv[i] == "-p":
i = i + 1
self.p = float(argv[i])
self.bias = float(argv[i])
elif argv[i] == "-v":
i = i + 1
- self.cross_validation = 1
+ self.flag_cross_validation = 1
self.nr_fold = int(argv[i])
if self.nr_fold < 2 :
raise ValueError("n-fold cross validation: n must >= 2")
elif argv[i].startswith("-w"):
i = i + 1
self.nr_weight += 1
- nr_weight = self.nr_weight
weight_label += [int(argv[i-1][2:])]
weight += [float(argv[i])]
elif argv[i] == "-q":
self.print_func = PRINT_STRING_FUN(print_null)
+ elif argv[i] == "-C":
+ self.flag_find_C = True
+
else :
raise ValueError("Wrong options")
i += 1
self.weight[i] = weight[i]
self.weight_label[i] = weight_label[i]
+ # default solver for parameter selection is L2R_L2LOSS_SVC
+ if self.flag_find_C:
+ if not self.flag_cross_validation:
+ self.nr_fold = 5
+ if not self.flag_solver_specified:
+ self.solver_type = L2R_L2LOSS_SVC
+ self.flag_solver_specified = True
+ elif self.solver_type not in [L2R_LR, L2R_L2LOSS_SVC]:
+ raise ValueError("Warm-start parameter search only available for -s 0 and -s 2")
+
if self.eps == float('inf'):
if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
self.eps = 0.01
return m
fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
+fillprototype(liblinear.find_parameter_C, None, [POINTER(problem), POINTER(parameter), c_int, c_double, c_double, POINTER(c_double), POINTER(c_double)])
fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
if err_msg :
raise ValueError('Error: %s' % err_msg)
- if param.cross_validation:
+ if param.flag_find_C:
+ nr_fold = param.nr_fold
+ best_C = c_double()
+ best_rate = c_double()
+ max_C = 1024
+ if param.flag_C_specified:
+ start_C = param.C
+ else:
+ start_C = -1.0
+ liblinear.find_parameter_C(prob, param, nr_fold, start_C, max_C, best_C, best_rate)
+ print("Best C = %lf CV accuracy = %g%%\n"% (best_C.value, 100.0*best_rate.value))
+ return best_C.value,best_rate.value
+
+
+ elif param.flag_cross_validation:
l, nr_fold = prob.l, param.nr_fold
target = (c_double * l)()
liblinear.cross_validation(prob, param, nr_fold, target)
"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
"-v n: n-fold cross validation mode\n"
+ "-C : find parameter C (only for -s 0 and 2)\n"
"-q : quiet mode (no outputs)\n"
);
exit(1);
void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
void read_problem(const char *filename);
void do_cross_validation();
+void do_find_parameter_C();
struct feature_node *x_space;
struct parameter param;
struct problem prob;
struct model* model_;
int flag_cross_validation;
+int flag_find_C;
+int flag_C_specified;
+int flag_solver_specified;
int nr_fold;
double bias;
exit(1);
}
- if(flag_cross_validation)
+ if (flag_find_C)
+ {
+ do_find_parameter_C();
+ }
+ else if(flag_cross_validation)
{
do_cross_validation();
}
return 0;
}
+void do_find_parameter_C()
+{
+ double start_C, best_C, best_rate;
+ double max_C = 1024;
+ if (flag_C_specified)
+ start_C = param.C;
+ else
+ start_C = -1.0;
+ find_parameter_C(&prob, ¶m, nr_fold, start_C, max_C, &best_C, &best_rate);
+ printf("Best C = %lf CV accuracy = %g%%\n", best_C, 100.0*best_rate);
+}
+
void do_cross_validation()
{
int i;
param.nr_weight = 0;
param.weight_label = NULL;
param.weight = NULL;
+ param.init_sol = NULL;
flag_cross_validation = 0;
+ flag_C_specified = 0;
+ flag_solver_specified = 0;
+ flag_find_C = 0;
bias = -1;
// parse options
{
case 's':
param.solver_type = atoi(argv[i]);
+ flag_solver_specified = 1;
break;
case 'c':
param.C = atof(argv[i]);
+ flag_C_specified = 1;
break;
case 'p':
i--;
break;
+ case 'C':
+ flag_find_C = 1;
+ i--;
+ break;
+
default:
fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
exit_with_help();
sprintf(model_file_name,"%s.model",p);
}
+ // default solver for parameter selection is L2R_L2LOSS_SVC
+ if(flag_find_C)
+ {
+ if(!flag_cross_validation)
+ nr_fold = 5;
+ if(!flag_solver_specified)
+ {
+ fprintf(stderr, "Solver not specified. Using -s 2\n");
+ param.solver_type = L2R_L2LOSS_SVC;
+ }
+ else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
+ {
+ fprintf(stderr, "Warm-start parameter search only available for -s 0 and -s 2\n");
+ exit_with_help();
+ }
+ }
+
if(param.eps == INF)
{
switch(param.solver_type)
(*tron_print_string)(buf);
}
-TRON::TRON(const function *fun_obj, double eps, int max_iter)
+TRON::TRON(const function *fun_obj, double eps, double eps_cg, int max_iter)
{
this->fun_obj=const_cast<function *>(fun_obj);
this->eps=eps;
+ this->eps_cg=eps_cg;
this->max_iter=max_iter;
tron_print_string = default_print;
}
double *w_new = new double[n];
double *g = new double[n];
+ // calculate gradient norm at w=0 for stopping condition.
+ double *w0 = new double[n];
for (i=0; i<n; i++)
- w[i] = 0;
+ w0[i] = 0;
+ fun_obj->fun(w0);
+ fun_obj->grad(w0, g);
+ double gnorm0 = dnrm2_(&n, g, &inc);
+ delete [] w0;
f = fun_obj->fun(w);
fun_obj->grad(w, g);
delta = dnrm2_(&n, g, &inc);
- double gnorm1 = delta;
- double gnorm = gnorm1;
+ double gnorm = delta;
- if (gnorm <= eps*gnorm1)
+ if (gnorm <= eps*gnorm0)
search = 0;
iter = 1;
fun_obj->grad(w, g);
gnorm = dnrm2_(&n, g, &inc);
- if (gnorm <= eps*gnorm1)
+ if (gnorm <= eps*gnorm0)
break;
}
if (f < -1.0e+32)
r[i] = -g[i];
d[i] = r[i];
}
- cgtol = 0.1*dnrm2_(&n, g, &inc);
+ cgtol = eps_cg*dnrm2_(&n, g, &inc);
int cg_iter = 0;
rTr = ddot_(&n, r, &inc, r, &inc);
class TRON
{
public:
- TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000);
+ TRON(const function *fun_obj, double eps = 0.1, double eps_cg = 0.1, int max_iter = 1000);
~TRON();
void tron(double *w);
double norm_inf(int n, double *x);
double eps;
+ double eps_cg;
int max_iter;
function *fun_obj;
void info(const char *fmt,...);