From f68d25cc425a057cd8cdcce1554bce0172a245e8 Mon Sep 17 00:00:00 2001 From: Wei-Lin Chiang Date: Thu, 16 Jul 2020 22:25:44 +0800 Subject: [PATCH] Add a new option -R for not regularizing the bias --- README | 2 + linear.cpp | 250 +++++++++++++++++++++++++++++++++++------------------ linear.h | 1 + train.c | 8 ++ 4 files changed, 178 insertions(+), 83 deletions(-) diff --git a/README b/README index 5143680..b81ad8c 100644 --- a/README +++ b/README @@ -136,6 +136,8 @@ options: |f'(alpha)|_1 <= eps |f'(alpha0)|, where f is the dual function (default 0.1) -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1) +-R : not regularize the bias; must with -B 1 to have the bias; DON'T use this unless you know what it is + (for -s 0, 2, 5, 6, 11) -wi weight: weights adjust the parameter C of different classes (see README for details) -v n: n-fold cross validation mode -C : find parameters (C for -s 0, 2 and C, p for -s 11) diff --git a/linear.cpp b/linear.cpp index e2965eb..2335406 100644 --- a/linear.cpp +++ b/linear.cpp @@ -105,7 +105,7 @@ public: class l2r_lr_fun: public function { public: - l2r_lr_fun(const problem *prob, double *C); + l2r_lr_fun(const problem *prob, const parameter *param, double *C); ~l2r_lr_fun(); double fun(double *w); @@ -123,9 +123,10 @@ private: double *z; double *D; const problem *prob; + int regularize_bias; }; -l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C) +l2r_lr_fun::l2r_lr_fun(const problem *prob, const parameter *param, double *C) { int l=prob->l; @@ -134,6 +135,7 @@ l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C) z = new double[l]; D = new double[l]; this->C = C; + this->regularize_bias = param->regularize_bias; } l2r_lr_fun::~l2r_lr_fun() @@ -155,6 +157,8 @@ double l2r_lr_fun::fun(double *w) for(i=0;il; @@ -291,6 +302,7 @@ l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C) z = new double[l]; I = new int[l]; this->C = C; + this->regularize_bias = param->regularize_bias; } l2r_l2_svc_fun::~l2r_l2_svc_fun() @@ -311,6 +323,8 @@ double l2r_l2_svc_fun::fun(double *w) for(i=0;ip = p; + this->p = param->p; + this->regularize_bias = param->regularize_bias; } double l2r_l2_svr_fun::fun(double *w) @@ -444,6 +466,8 @@ double l2r_l2_svr_fun::fun(double *w) for(i=0;il; int w_size = prob_col->n; @@ -1497,49 +1526,66 @@ static void solve_l1r_l2_svc( H *= 2; H = max(H, 1e-12); - double Gp = G+1; - double Gn = G-1; double violation = 0; - if(w[j] == 0) + double Gp = 0, Gn = 0; + if(j == w_size-1 && regularize_bias == 0) + violation = fabs(G); + else { - if(Gp < 0) - violation = -Gp; - else if(Gn > 0) - violation = Gn; - else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) + Gp = G+1; + Gn = G-1; + if(w[j] == 0) { - active_size--; - swap(index[s], index[active_size]); - s--; - continue; + if(Gp < 0) + violation = -Gp; + else if(Gn > 0) + violation = Gn; + else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) + { + active_size--; + swap(index[s], index[active_size]); + s--; + continue; + } } + else if(w[j] > 0) + violation = fabs(Gp); + else + violation = fabs(Gn); } - else if(w[j] > 0) - violation = fabs(Gp); - else - violation = fabs(Gn); - Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; // obtain Newton direction d - if(Gp < H*w[j]) - d = -Gp/H; - else if(Gn > H*w[j]) - d = -Gn/H; + if(j == w_size-1 && regularize_bias == 0) + d = -G/H; else - d = -w[j]; + { + if(Gp < H*w[j]) + d = -Gp/H; + else if(Gn > H*w[j]) + d = -Gn/H; + else + d = -w[j]; + } if(fabs(d) < 1.0e-12) continue; - double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; + double delta; + if(j == w_size-1 && regularize_bias == 0) + delta = G*d; + else + delta = fabs(w[j]+d)-fabs(w[j]) + G*d; d_old = 0; int num_linesearch; for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) { d_diff = d_old - d; - cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; + if(j == w_size-1 && regularize_bias == 0) + cond = -sigma*delta; + else + cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; appxcond = xj_sq[j]*d*d + G_loss*d + cond; if(appxcond <= 0) @@ -1654,6 +1700,8 @@ static void solve_l1r_l2_svc( nnz++; } } + if (regularize_bias == 0) + v -= fabs(w[w_size-1]); for(j=0; j 0) v += C[GETI(j)]*b[j]*b[j]; @@ -1679,6 +1727,9 @@ static void solve_l1r_l2_svc( // solution will be put in w // // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008) +// +// To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1 +// must have been added to the original data. (see -B and -R option) #undef GETI #define GETI(i) (y[i]+1) @@ -1686,7 +1737,7 @@ static void solve_l1r_l2_svc( static void solve_l1r_lr( const problem *prob_col, double *w, double eps, - double Cp, double Cn) + double Cp, double Cn, int regularize_bias) { int l = prob_col->l; int w_size = prob_col->n; @@ -1756,6 +1807,9 @@ static void solve_l1r_lr( x++; } } + if (regularize_bias == 0) + w_norm -= fabs(w[w_size-1]); + for(j=0; j 0) - violation = Gn; - //outer-level shrinking - else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) + double Gp = Grad[j]+1; + double Gn = Grad[j]-1; + if(w[j] == 0) { - active_size--; - swap(index[s], index[active_size]); - s--; - continue; + if(Gp < 0) + violation = -Gp; + else if(Gn > 0) + violation = Gn; + //outer-level shrinking + else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) + { + active_size--; + swap(index[s], index[active_size]); + s--; + continue; + } } + else if(w[j] > 0) + violation = fabs(Gp); + else + violation = fabs(Gn); } - else if(w[j] > 0) - violation = fabs(Gp); - else - violation = fabs(Gn); - Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; } @@ -1853,40 +1911,48 @@ static void solve_l1r_lr( x++; } - double Gp = G+1; - double Gn = G-1; double violation = 0; - if(wpd[j] == 0) + if (j == w_size-1 && regularize_bias == 0) { - if(Gp < 0) - violation = -Gp; - else if(Gn > 0) - violation = Gn; - //inner-level shrinking - else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l) - { - QP_active_size--; - swap(index[s], index[QP_active_size]); - s--; - continue; - } + // bias term not shrunken + violation = fabs(G); + z = -G/H; } - else if(wpd[j] > 0) - violation = fabs(Gp); else - violation = fabs(Gn); + { + double Gp = G+1; + double Gn = G-1; + if(wpd[j] == 0) + { + if(Gp < 0) + violation = -Gp; + else if(Gn > 0) + violation = Gn; + //inner-level shrinking + else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l) + { + QP_active_size--; + swap(index[s], index[QP_active_size]); + s--; + continue; + } + } + else if(wpd[j] > 0) + violation = fabs(Gp); + else + violation = fabs(Gn); + // obtain solution of one-variable problem + if(Gp < H*wpd[j]) + z = -Gp/H; + else if(Gn > H*wpd[j]) + z = -Gn/H; + else + z = -wpd[j]; + } QP_Gmax_new = max(QP_Gmax_new, violation); QP_Gnorm1_new += violation; - // obtain solution of one-variable problem - if(Gp < H*wpd[j]) - z = -Gp/H; - else if(Gn > H*wpd[j]) - z = -Gn/H; - else - z = -wpd[j]; - if(fabs(z) < 1.0e-12) continue; z = min(max(z,-10.0),10.0); @@ -1927,6 +1993,8 @@ static void solve_l1r_lr( if(wpd[j] != 0) w_norm_new += fabs(wpd[j]); } + if (regularize_bias == 0) + w_norm_new -= fabs(wpd[w_size-1]); delta += (w_norm_new-w_norm); negsum_xTd = 0; @@ -1969,6 +2037,8 @@ static void solve_l1r_lr( if(wpd[j] != 0) w_norm_new += fabs(wpd[j]); } + if (regularize_bias == 0) + w_norm_new -= fabs(wpd[w_size-1]); delta *= 0.5; negsum_xTd *= 0.5; for(int i=0; iregularize_bias); delete [] prob_col.y; delete [] prob_col.x; delete [] x_space; @@ -2585,7 +2657,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do problem prob_col; feature_node *x_space = NULL; transpose(prob, &x_space ,&prob_col); - solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn); + solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias); delete [] prob_col.y; delete [] prob_col.x; delete [] x_space; @@ -2600,7 +2672,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do for(int i = 0; i < prob->l; i++) C[i] = param->C; - fun_obj=new l2r_l2_svr_fun(prob, C, param->p); + fun_obj=new l2r_l2_svr_fun(prob, param, C); TRON tron_obj(fun_obj, param->eps); tron_obj.set_print_string(liblinear_print_string); tron_obj.tron(w); @@ -3559,6 +3631,18 @@ const char *check_parameter(const problem *prob, const parameter *param) if(prob->bias >= 0 && param->solver_type == ONECLASS_SVM) return "prob->bias >=0, but this is ignored in ONECLASS_SVM"; + if(param->regularize_bias == 0) + { + if(prob->bias != 1.0) + return "To not regularize bias, must specify -B 1 along with -R"; + if(param->solver_type != L2R_LR + && param->solver_type != L2R_L2LOSS_SVC + && param->solver_type != L1R_L2LOSS_SVC + && param->solver_type != L1R_LR + && param->solver_type != L2R_L2LOSS_SVR) + return "-R option supported only for solver L2R_LR, L2R_L2LOSS_SVC, L1R_L2LOSS_SVC, L1R_LR, and L2R_L2LOSS_SVR"; + } + if(param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC_DUAL && param->solver_type != L2R_L2LOSS_SVC diff --git a/linear.h b/linear.h index 368ba2f..0180f04 100644 --- a/linear.h +++ b/linear.h @@ -38,6 +38,7 @@ struct parameter double p; double nu; double *init_sol; + int regularize_bias; }; struct model diff --git a/train.c b/train.c index bd0af94..ef8fe70 100644 --- a/train.c +++ b/train.c @@ -50,6 +50,8 @@ void exit_with_help() " |f'(alpha)|_1 <= eps |f'(alpha0)|,\n" " where f is the dual function (default 0.1)\n" "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n" + "-R : not regularize the bias; must with -B 1 to have the bias; DON'T use this unless you know what it is\n" + " (for -s 0, 2, 5, 6, 11)\n" "-wi weight: weights adjust the parameter C of different classes (see README for details)\n" "-v n: n-fold cross validation mode\n" "-C : find parameters (C for -s 0, 2 and C, p for -s 11)\n" @@ -218,6 +220,7 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode param.nu = 0.5; param.eps = INF; // see setting below param.nr_weight = 0; + param.regularize_bias = 1; param.weight_label = NULL; param.weight = NULL; param.init_sol = NULL; @@ -291,6 +294,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode i--; break; + case 'R': + param.regularize_bias = 0; + i--; + break; + default: fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]); exit_with_help(); -- 2.40.0