From ab84f1062fb46cb7c3759a5266d66f1a47a77a20 Mon Sep 17 00:00:00 2001 From: eaudex Date: Mon, 7 Mar 2011 08:21:31 +0000 Subject: [PATCH] In solve_l1r_lr(.), replace CDN with newGLMNET. In solve_l1r_l2_svc(.), change the stopping condition from inf-norm to 1-norm. --- linear.cpp | 355 +++++++++++++++++++++++++++++++++-------------------- train.c | 2 +- 2 files changed, 222 insertions(+), 135 deletions(-) diff --git a/linear.cpp b/linear.cpp index a97b6bc..ded6345 100644 --- a/linear.cpp +++ b/linear.cpp @@ -1086,8 +1086,8 @@ static void solve_l1r_l2_svc( double sigma = 0.01; double d, G_loss, G, H; double Gmax_old = INF; - double Gmax_new; - double Gmax_init; + double Gmax_new, Gnorm1_new; + double Gnorm1_init; double d_old, d_diff; double loss_old, loss_new; double appxcond, cond; @@ -1126,7 +1126,8 @@ static void solve_l1r_l2_svc( while(iter < max_iter) { - Gmax_new = 0; + Gmax_new = 0; + Gnorm1_new = 0; for(j=0; jl; int w_size = prob_col->n; - int j, s, iter = 0; + int j, s, newton_iter=0, iter=0; + int max_newton_iter = 100; int max_iter = 1000; - int active_size = w_size; int max_num_linesearch = 20; + int active_size; + int QP_active_size; - double x_min = 0; + double nu = 1e-12; + double inner_eps = 1; double sigma = 0.01; - double d, G, H; + double w_norm=0, w_norm_new; + double z, G, H; + double Gnorm1_init; double Gmax_old = INF; - double Gmax_new; - double Gmax_init; - double sum1, appxcond1; - double sum2, appxcond2; - double cond; + double Gmax_new, Gnorm1_new; + double QP_Gmax_old = INF; + double QP_Gmax_new, QP_Gnorm1_new; + double delta, negsum_xTd, cond; int *index = new int[w_size]; schar *y = new schar[l]; + double *Hdiag = new double[w_size]; + double *Grad = new double[w_size]; + double *wpd = new double[w_size]; + double *xjneg_sum = new double[w_size]; + double *xTd = new double[l]; double *exp_wTx = new double[l]; double *exp_wTx_new = new double[l]; - double *xj_max = new double[w_size]; - double *C_sum = new double[w_size]; - double *xjneg_sum = new double[w_size]; - double *xjpos_sum = new double[w_size]; + double *tau = new double[l]; + double *D = new double[l]; feature_node *x; double C[3] = {Cn,0,Cp}; for(j=0; jy[j] > 0) y[j] = 1; else y[j] = -1; + + // assume initial w is 0 + exp_wTx[j] = 1; + tau[j] = C[GETI(j)]*0.5; + D[j] = C[GETI(j)]*0.25; } for(j=0; jx[j]; while(x->index != -1) { int ind = x->index-1; - double val = x->value; - x_min = min(x_min, val); - xj_max[j] = max(xj_max[j], val); - C_sum[j] += C[GETI(ind)]; if(y[ind] == -1) - xjneg_sum[j] += C[GETI(ind)]*val; - else - xjpos_sum[j] += C[GETI(ind)]*val; + xjneg_sum[j] += C[GETI(ind)]*x->value; x++; } } - while(iter < max_iter) + while(newton_iter < max_newton_iter) { Gmax_new = 0; - - for(j=0; jx[j]; while(x->index != -1) { int ind = x->index-1; - double exp_wTxind = exp_wTx[ind]; - double tmp1 = x->value/(1+exp_wTxind); - double tmp2 = C[GETI(ind)]*tmp1; - double tmp3 = tmp2*exp_wTxind; - sum2 += tmp2; - sum1 += tmp3; - H += tmp1*tmp3; + Hdiag[j] += x->value*x->value*D[ind]; + tmp += x->value*tau[ind]; x++; } + Grad[j] = -tmp + xjneg_sum[j]; - G = -sum2 + xjneg_sum[j]; - - double Gp = G+1; - double Gn = G-1; + double Gp = Grad[j]+1; + double Gn = Grad[j]-1; double violation = 0; if(w[j] == 0) { @@ -1460,6 +1455,7 @@ static void solve_l1r_lr( violation = -Gp; else if(Gn > 0) violation = Gn; + //outer-level shrinking else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) { active_size--; @@ -1474,126 +1470,214 @@ static void solve_l1r_lr( violation = fabs(Gn); Gmax_new = max(Gmax_new, violation); + Gnorm1_new += violation; + } - // obtain Newton direction d - if(Gp <= H*w[j]) - d = -Gp/H; - else if(Gn >= H*w[j]) - d = -Gn/H; - else - d = -w[j]; + if(newton_iter == 0) + Gnorm1_init = Gnorm1_new; - if(fabs(d) < 1.0e-12) - continue; + if(Gnorm1_new <= eps*Gnorm1_init) + break; - d = min(max(d,-10.0),10.0); + iter = 0; + QP_Gmax_old = INF; + QP_active_size = active_size; - double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; - int num_linesearch; - for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) + for(int i=0; i=0 - if(x_min >= 0) + x = prob_col->x[j]; + G = Grad[j] + (wpd[j]-w[j])*nu; + while(x->index != -1) { - double tmp = exp(d*xj_max[j]); - appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j]; - appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j]; - if(min(appxcond1,appxcond2) <= 0) + int ind = x->index-1; + G += x->value*D[ind]*xTd[ind]; + x++; + } + + double Gp = G+1; + double Gn = G-1; + double violation = 0; + if(wpd[j] == 0) + { + if(Gp < 0) + violation = -Gp; + else if(Gn > 0) + violation = Gn; + //inner-level shrinking + else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l) { - x = prob_col->x[j]; - while(x->index != -1) - { - exp_wTx[x->index-1] *= exp(d*x->value); - x++; - } - break; + QP_active_size--; + swap(index[s], index[QP_active_size]); + s--; + continue; } } + else if(wpd[j] > 0) + violation = fabs(Gp); + else + violation = fabs(Gn); + + QP_Gmax_new = max(QP_Gmax_new, violation); + QP_Gnorm1_new += violation; - cond += d*xjneg_sum[j]; + // obtain solution of one-variable problem + if(Gp <= H*wpd[j]) + z = -Gp/H; + else if(Gn >= H*wpd[j]) + z = -Gn/H; + else + z = -wpd[j]; + + if(fabs(z) < 1.0e-12) + continue; + z = min(max(z,-10.0),10.0); + + wpd[j] += z; - int i = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; - double exp_dx = exp(d*x->value); - exp_wTx_new[i] = exp_wTx[ind]*exp_dx; - cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); - x++; i++; + xTd[ind] += x->value*z; + x++; } + } - if(cond <= 0) - { - int i = 0; - x = prob_col->x[j]; - while(x->index != -1) - { - int ind = x->index-1; - exp_wTx[ind] = exp_wTx_new[i]; - x++; i++; - } + iter++; + //if(iter % 10 == 0) + // info("."); + + if(QP_Gnorm1_new <= inner_eps*Gnorm1_init) + { + //inner stopping + if(QP_active_size == active_size) break; - } + //active set reactivation else { - d *= 0.5; - delta *= 0.5; + info("*"); + QP_active_size = active_size; + QP_Gmax_old = INF; + continue; } } - w[j] += d; + QP_Gmax_old = QP_Gmax_new; + } - // recompute exp_wTx[] if line search takes too many steps - if(num_linesearch >= max_num_linesearch) + if(iter >= max_iter) + info("WARNING: reaching max number of inner iterations\n"); + + delta = 0; + w_norm_new = 0; + for(j=0; jx[i]; - while(x->index != -1) - { - exp_wTx[x->index-1] += w[i]*x->value; - x++; - } + exp_wTx[i] = exp_wTx_new[i]; + double tau_tmp = 1/(1+exp_wTx[i]); + tau[i] = C[GETI(i)]*tau_tmp; + D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp; } - + break; + } + else + { + w_norm_new = 0; + for(j=0; j= max_num_linesearch) { - if(active_size == w_size) - break; - else + for(int i=0; ix[i]; + while(x->index != -1) + { + exp_wTx[x->index-1] += w[i]*x->value; + x++; + } } + + for(int i=0; i= max_iter) - info("\nWARNING: reaching max number of iterations\n"); + info("=========================\n"); + info("optimization finished, #iter = %d\n", newton_iter); + if(newton_iter >= max_newton_iter) + info("WARNING: reaching max number of iterations\n"); // calculate objective value @@ -1616,12 +1700,15 @@ static void solve_l1r_lr( delete [] index; delete [] y; + delete [] Hdiag; + delete [] Grad; + delete [] wpd; + delete [] xjneg_sum; + delete [] xTd; delete [] exp_wTx; delete [] exp_wTx_new; - delete [] xj_max; - delete [] C_sum; - delete [] xjneg_sum; - delete [] xjpos_sum; + delete [] tau; + delete [] D; } // transpose matrix X from row format to column format diff --git a/train.c b/train.c index 60c8dae..4920313 100644 --- a/train.c +++ b/train.c @@ -33,7 +33,7 @@ void exit_with_help() " -s 1, 3, 4 and 7\n" " Dual maximal violation <= eps; similar to libsvm (default 0.1)\n" " -s 5 and 6\n" - " |f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,\n" + " |f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n" " where f is the primal function (default 0.01)\n" "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n" "-wi weight: weights adjust the parameter C of different classes (see README for details)\n" -- 2.40.0