Usage: train [options] training_set_file [model_file]
options:
+options:
-s type : set type of solver (default 1)
- 0 -- L2-regularized logistic regression
- 1 -- L2-regularized L2-loss support vector classification (dual)
- 2 -- L2-regularized L2-loss support vector classification (primal)
- 3 -- L2-regularized L1-loss support vector classification (dual)
- 4 -- multi-class support vector classification by Crammer and Singer
- 5 -- L1-regularized L2-loss support vector classification
- 6 -- L1-regularized logistic regression
+ 0 -- L2-regularized logistic regression (primal)
+ 1 -- L2-regularized L2-loss support vector classification (dual)
+ 2 -- L2-regularized L2-loss support vector classification (primal)
+ 3 -- L2-regularized L1-loss support vector classification (dual)
+ 4 -- multi-class support vector classification by Crammer and Singer
+ 5 -- L1-regularized L2-loss support vector classification
+ 6 -- L1-regularized logistic regression
+ 7 -- L2-regularized logistic regression (dual)
-c cost : set the parameter C (default 1)
-e epsilon : set tolerance of termination criterion
- -s 0 and 2
- |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
- where f is the primal function and pos/neg are # of
- positive/negative data (default 0.01)
- -s 1, 3, and 4
- Dual maximal violation <= eps; similar to libsvm (default 0.1)
- -s 5 and 6
- |f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
- where f is the primal function (default 0.01)
+ -s 0 and 2
+ |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
+ where f is the primal function and pos/neg are # of
+ positive/negative data (default 0.01)
+ -s 1, 3, 4 and 7
+ Dual maximal violation <= eps; similar to libsvm (default 0.1)
+ -s 5 and 6
+ |f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
+ where f is the primal function (default 0.01)
-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
-wi weight: weights adjust the parameter C of different classes (see README for details)
-v n: n-fold cross validation mode
Q is a matrix with Q_ij = y_i y_j x_i^T x_j.
+For L2-regularized logistic regression (-s 7), we solve
+
+min_alpha 0.5(alpha^T Q alpha) + \sum alpha_i*log(alpha_i) + \sum (C-alpha_i)*log(C-alpha_i) - a constant
+ s.t. 0 <= alpha_i <= C,
+
If bias >= 0, w becomes [w; w_{n+1}] and x becomes [x; bias].
-The primal-dual relationship implies that -s 1 and -s 2 gives the same
-model.
+The primal-dual relationship implies that -s 1 and -s 2 give the same
+model, and -s 0 and -s 7 give the same.
We implement 1-vs-the rest multi-class strategy. In training i
vs. non_i, their C parameters are (weight from -wi)*C and C,
double* weight;
};
- solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR.
+ solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL.
- L2R_LR L2-regularized logistic regression
+ L2R_LR L2-regularized logistic regression (primal)
L2R_L2LOSS_SVC_DUAL L2-regularized L2-loss support vector classification (dual)
L2R_L2LOSS_SVC L2-regularized L2-loss support vector classification (primal)
L2R_L1LOSS_SVC_DUAL L2-regularized L1-loss support vector classification (dual)
MCSVM_CS multi-class support vector classification by Crammer and Singer
L1R_L2LOSS_SVC L1-regularized L2-loss support vector classification
L1R_LR L1-regularized logistic regression
+ L2R_LR_DUAL L2-regularized logistic regression (dual)
C is the cost of constraints violation.
eps is the stopping criterion.
info("\noptimization finished, #iter = %d\n",iter);
if (iter >= max_iter)
- info("Warning: reaching max number of iterations\n");
+ info("\nWARNING: reaching max number of iterations\n");
// calculate objective value
double v = 0;
delete [] index;
}
+// A coordinate descent algorithm for
+// the dual of L2-regularized logistic regression problems
+//
+// min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - alpha_i) log (upper_bound_i - alpha_i) ,
+// s.t. 0 <= alpha_i <= upper_bound_i,
+//
+// where Qij = yi yj xi^T xj and
+// upper_bound_i = Cp if y_i = 1
+// upper_bound_i = Cn if y_i = -1
+//
+// Given:
+// x, y, Cp, Cn
+// eps is the stopping tolerance
+//
+// solution will be put in w
+//
+// See Algorithm 5 of Yu et al., MLJ 2010
+
+#undef GETI
+#define GETI(i) (y[i]+1)
+// To support weights for instances, use GETI(i) (i)
+
+void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
+{
+ int l = prob->l;
+ int w_size = prob->n;
+ int i, s, iter = 0;
+ double *xTx = new double[l];
+ int max_iter = 1000;
+ int *index = new int[l];
+ double *alpha = new double[2*l]; // store alpha and C - alpha
+ schar *y = new schar[l];
+ int max_inner_iter = 100; // for inner Newton
+ double innereps = 1e-2;
+ double innereps_min = min(1e-8, eps);
+ double upper_bound[3] = {Cn, 0, Cp};
+
+ for(i=0; i<w_size; i++)
+ w[i] = 0;
+ for(i=0; i<l; i++)
+ {
+ if(prob->y[i] > 0)
+ {
+ y[i] = +1;
+ }
+ else
+ {
+ y[i] = -1;
+ }
+ alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
+ alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
+
+ xTx[i] = 0;
+ feature_node *xi = prob->x[i];
+ while (xi->index != -1)
+ {
+ xTx[i] += (xi->value)*(xi->value);
+ w[xi->index-1] += y[i]*alpha[2*i]*xi->value;
+ xi++;
+ }
+ index[i] = i;
+ }
+
+ while (iter < max_iter)
+ {
+ for (i=0; i<l; i++)
+ {
+ int j = i+rand()%(l-i);
+ swap(index[i], index[j]);
+ }
+ int newton_iter = 0;
+ double Gmax = 0;
+ for (s=0; s<l; s++)
+ {
+ i = index[s];
+ schar yi = y[i];
+ double C = upper_bound[GETI(i)];
+ double ywTx = 0, xisq = xTx[i];
+ feature_node *xi = prob->x[i];
+ while (xi->index != -1)
+ {
+ ywTx += w[xi->index-1]*xi->value;
+ xi++;
+ }
+ ywTx *= y[i];
+ double a = xisq, b = ywTx;
+
+ // Decide to minimize g_1(z) or g_2(z)
+ int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
+ if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
+ {
+ ind1 = 2*i+1;
+ ind2 = 2*i;
+ sign = -1;
+ }
+
+ // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
+ double alpha_old = alpha[ind1];
+ double z = alpha_old;
+ if(C - z < 0.5 * C)
+ z = 0.1*z;
+ double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
+ Gmax = max(Gmax, fabs(gp));
+
+ // Newton method on the sub-problem
+ const double eta = 0.1; // xi in the paper
+ int inner_iter = 0;
+ while (inner_iter <= max_inner_iter)
+ {
+ if(fabs(gp) < innereps)
+ break;
+ double gpp = a + C/(C-z)/z;
+ double tmpz = z - gp/gpp;
+ if(tmpz <= 0)
+ z *= eta;
+ else // tmpz in (0, C)
+ z = tmpz;
+ gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
+ newton_iter++;
+ inner_iter++;
+ }
+
+ if(inner_iter > 0) // update w
+ {
+ alpha[ind1] = z;
+ alpha[ind2] = C-z;
+ xi = prob->x[i];
+ while (xi->index != -1)
+ {
+ w[xi->index-1] += sign*(z-alpha_old)*yi*xi->value;
+ xi++;
+ }
+ }
+ }
+
+ iter++;
+ if(iter % 10 == 0)
+ info(".");
+
+ if(Gmax < eps)
+ break;
+
+ if(newton_iter < l/10)
+ innereps = max(innereps_min, 0.1*innereps);
+
+ }
+
+ info("\noptimization finished, #iter = %d\n",iter);
+ if (iter >= max_iter)
+ info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
+
+ // calculate objective value
+
+ double v = 0;
+ for(i=0; i<w_size; i++)
+ v += w[i] * w[i];
+ v *= 0.5;
+ for(i=0; i<l; i++)
+ v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
+ - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
+ info("Objective value = %lf\n", v);
+
+ delete [] xTx;
+ delete [] alpha;
+ delete [] y;
+ delete [] index;
+}
+
// A coordinate descent algorithm for
// L1-regularized L2-loss support vector classification
//
delete [] x_space;
break;
}
+ case L2R_LR_DUAL:
+ solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
+ break;
default:
fprintf(stderr, "Error: unknown solver_type\n");
break;
if(param->weight_label[i] == label[j])
break;
if(j == nr_class)
- fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
+ fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
else
weighted_C[j] *= param->weight[i];
}
static const char *solver_type_table[]=
{
- "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC","L2R_L1LOSS_SVC_DUAL","MCSVM_CS", "L1R_L2LOSS_SVC","L1R_LR", NULL
+ "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
+ "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL", NULL
};
int save_model(const char *model_file_name, const struct model *model_)
&& param->solver_type != L2R_L1LOSS_SVC_DUAL
&& param->solver_type != MCSVM_CS
&& param->solver_type != L1R_L2LOSS_SVC
- && param->solver_type != L1R_LR)
+ && param->solver_type != L1R_LR
+ && param->solver_type != L2R_LR_DUAL)
return "unknown solver type";
return NULL;
int check_probability_model(const struct model *model_)
{
- return (model_->param.solver_type==L2R_LR || model_->param.solver_type==L1R_LR);
+ return (model_->param.solver_type==L2R_LR ||
+ model_->param.solver_type==L2R_LR_DUAL ||
+ model_->param.solver_type==L1R_LR);
}
void set_print_string_function(void (*print_func)(const char*))
"Usage: train [options] training_set_file [model_file]\n"
"options:\n"
"-s type : set type of solver (default 1)\n"
- " 0 -- L2-regularized logistic regression\n"
+ " 0 -- L2-regularized logistic regression (primal)\n"
" 1 -- L2-regularized L2-loss support vector classification (dual)\n"
" 2 -- L2-regularized L2-loss support vector classification (primal)\n"
" 3 -- L2-regularized L1-loss support vector classification (dual)\n"
" 4 -- multi-class support vector classification by Crammer and Singer\n"
" 5 -- L1-regularized L2-loss support vector classification\n"
" 6 -- L1-regularized logistic regression\n"
+ " 7 -- L2-regularized logistic regression (dual)\n"
"-c cost : set the parameter C (default 1)\n"
"-e epsilon : set tolerance of termination criterion\n"
" -s 0 and 2\n"
" |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n"
" where f is the primal function and pos/neg are # of\n"
" positive/negative data (default 0.01)\n"
- " -s 1, 3, and 4\n"
+ " -s 1, 3, 4 and 7\n"
" Dual maximal violation <= eps; similar to libsvm (default 0.1)\n"
" -s 5 and 6\n"
" |f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,\n"
{
if(param.solver_type == L2R_LR || param.solver_type == L2R_L2LOSS_SVC)
param.eps = 0.01;
- else if(param.solver_type == L2R_L2LOSS_SVC_DUAL || param.solver_type == L2R_L1LOSS_SVC_DUAL || param.solver_type == MCSVM_CS)
+ else if(param.solver_type == L2R_L2LOSS_SVC_DUAL || param.solver_type == L2R_L1LOSS_SVC_DUAL || param.solver_type == MCSVM_CS || param.solver_type == L2R_LR_DUAL)
param.eps = 0.1;
else if(param.solver_type == L1R_L2LOSS_SVC || param.solver_type == L1R_LR)
param.eps = 0.01;