From: eaudex Date: Fri, 2 Oct 2009 05:04:56 +0000 (+0000) Subject: L1-regularized L2-loss SVM and Logistic Regression X-Git-Tag: v150~2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0538f85d6f8bc48ef71fb1f28f584b1ad204176d;p=liblinear L1-regularized L2-loss SVM and Logistic Regression - sufficient decrease condition - shriking violation - H <- max(H, 1e-12) for L2-loss SVM - swap the indices i and j i for instances and j for features --- diff --git a/linear.cpp b/linear.cpp index 3fdadf4..290a394 100644 --- a/linear.cpp +++ b/linear.cpp @@ -888,7 +888,7 @@ static void solve_l2r_l1l2_svc( // A coordinate descent algorithm for // L1-regularized L2-loss support vector classification // -// min_w \sum |wj| - C \sum max(0, 1-yi w^T xi)^2, +// min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2, // // Given: // x, y, Cp, Cn @@ -902,7 +902,7 @@ static void solve_l1r_l2_svc( { int l = prob_col->l; int w_size = prob_col->n; - int i, s, iter = 0; + int j, s, iter = 0; int max_iter = 1000; int active_size = w_size; int max_num_linesearch = 20; @@ -919,34 +919,33 @@ static void solve_l1r_l2_svc( int *index = new int[w_size]; schar *y = new schar[l]; double *b = new double[l]; // b = 1-ywTx - double *xi_sq = new double[w_size]; + double *xj_sq = new double[w_size]; feature_node *x; // To support weights for instances, // replace C[y[i]] with C[i]. double C[2] = {Cn,Cp}; - for(i=0; iy[i] > 0) - y[i] = 1; + b[j] = 1; + if(prob_col->y[j] > 0) + y[j] = 1; else - y[i] = 0; + y[j] = 0; } - - for(i=0; ix[i]; + w[j] = 0; + index[j] = j; + xj_sq[j] = 0; + x = prob_col->x[j]; while(x->index != -1) { int ind = x->index; double val = x->value; - x->value *= prob_col->y[ind]; // x->value stores yj*xji - xi_sq[i] += C[y[ind]]*val*val; + x->value *= prob_col->y[ind]; // x->value stores yi*xij + xj_sq[j] += C[y[ind]]*val*val; x++; } } @@ -955,19 +954,19 @@ static void solve_l1r_l2_svc( { Gmax_new = 0; - for(i=0; ix[i]; + x = prob_col->x[j]; while(x->index != -1) { int ind = x->index; @@ -984,13 +983,18 @@ static void solve_l1r_l2_svc( G = G_loss; H *= 2; + H = max(H, 1e-12); double Gp = G+1; double Gn = G-1; - // shrinking - if(w[i] == 0) + double violation = 0; + if(w[j] == 0) { - if(Gp>Gmax_old/l && Gn<-Gmax_old/l) + if(Gp < 0) + violation = -Gp; + else if(Gn > 0) + violation = Gn; + else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) { active_size--; swap(index[s], index[active_size]); @@ -998,40 +1002,36 @@ static void solve_l1r_l2_svc( continue; } } + else if(w[j] > 0) + violation = fabs(Gp); + else + violation = fabs(Gn); + + Gmax_new = max(Gmax_new, violation); // obtain Newton direction d - if(Gp <= H*w[i]) - { - G = Gp; + if(Gp <= H*w[j]) d = -Gp/H; - } - else if(Gn >= H*w[i]) - { - G = Gn; + else if(Gn >= H*w[j]) d = -Gn/H; - } else - { - G = 0; - d = -w[i]; - } - - Gmax_new = max(Gmax_new, fabs(G)); + d = -w[j]; if(fabs(d) < 1.0e-12) continue; + double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; d_old = 0; int num_linesearch; for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) { d_diff = d_old - d; - cond = fabs(w[i]+d)-fabs(w[i]) + sigma*d*d; + cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; - appxcond = xi_sq[i]*d*d + G_loss*d + cond; + appxcond = xj_sq[j]*d*d + G_loss*d + cond; if(appxcond <= 0) { - x = prob_col->x[i]; + x = prob_col->x[j]; while(x->index != -1) { b[x->index] += d_diff*x->value; @@ -1044,7 +1044,7 @@ static void solve_l1r_l2_svc( { loss_old = 0; loss_new = 0; - x = prob_col->x[i]; + x = prob_col->x[j]; while(x->index != -1) { int ind = x->index; @@ -1057,10 +1057,10 @@ static void solve_l1r_l2_svc( x++; } } - else + else { loss_new = 0; - x = prob_col->x[i]; + x = prob_col->x[j]; while(x->index != -1) { int ind = x->index; @@ -1079,25 +1079,26 @@ static void solve_l1r_l2_svc( { d_old = d; d *= 0.5; + delta *= 0.5; } } - w[i] += d; + w[j] += d; // recompute b[] if line search takes too many steps if(num_linesearch >= max_num_linesearch) { info("#"); - for(int j=0; jx[j]; + if(w[i]==0) continue; + x = prob_col->x[i]; while(x->index != -1) { - b[x->index] -= w[j]*x->value; + b[x->index] -= w[i]*x->value; x++; } } @@ -1134,23 +1135,23 @@ static void solve_l1r_l2_svc( double v = 0; int nnz = 0; - for(i=0; ix[i]; + x = prob_col->x[j]; while(x->index != -1) { x->value *= prob_col->y[x->index]; // restore x->value x++; } - if(w[i] != 0) + if(w[j] != 0) { - v += fabs(w[i]); + v += fabs(w[j]); nnz++; } } - for(i=0; i 0) - v += C[y[i]]*b[i]*b[i]; + for(j=0; j 0) + v += C[y[j]]*b[j]*b[j]; info("Objective value = %lf\n", v); info("#nonzeros/#features = %d/%d\n", nnz, w_size); @@ -1158,13 +1159,13 @@ static void solve_l1r_l2_svc( delete [] index; delete [] y; delete [] b; - delete [] xi_sq; + delete [] xj_sq; } // A coordinate descent algorithm for // L1-regularized logistic regression problems // -// min_w \sum |wj| - C \sum log(1+exp(-yi w^T xi)), +// min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)), // // Given: // x, y, Cp, Cn @@ -1178,7 +1179,7 @@ static void solve_l1r_lr( { int l = prob_col->l; int w_size = prob_col->n; - int i, s, iter = 0; + int j, s, iter = 0; int max_iter = 1000; int active_size = w_size; int max_num_linesearch = 20; @@ -1197,44 +1198,44 @@ static void solve_l1r_lr( schar *y = new schar[l]; double *exp_wTx = new double[l]; double *exp_wTx_new = new double[l]; - double *xi_max = new double[w_size]; + double *xj_max = new double[w_size]; double *C_sum = new double[w_size]; - double *xineg_sum = new double[w_size]; - double *xipos_sum = new double[w_size]; + double *xjneg_sum = new double[w_size]; + double *xjpos_sum = new double[w_size]; feature_node *x; // To support weights for instances, // replace C[y[i]] with C[i]. double C[2] = {Cn,Cp}; - for(i=0; iy[i] > 0) - y[i] = 1; + exp_wTx[j] = 1; + if(prob_col->y[j] > 0) + y[j] = 1; else - y[i] = 0; + y[j] = 0; } - for(i=0; ix[i]; + w[j] = 0; + index[j] = j; + xj_max[j] = 0; + C_sum[j] = 0; + xjneg_sum[j] = 0; + xjpos_sum[j] = 0; + x = prob_col->x[j]; while(x->index != -1) { int ind = x->index; double val = x->value; x_min = min(x_min, val); - xi_max[i] = max(xi_max[i], val); - C_sum[i] += C[y[ind]]; + xj_max[j] = max(xj_max[j], val); + C_sum[j] += C[y[ind]]; if(y[ind] == 0) - xineg_sum[i] += C[y[ind]]*val; + xjneg_sum[j] += C[y[ind]]*val; else - xipos_sum[i] += C[y[ind]]*val; + xjpos_sum[j] += C[y[ind]]*val; x++; } } @@ -1243,20 +1244,20 @@ static void solve_l1r_lr( { Gmax_new = 0; - for(i=0; ix[i]; + x = prob_col->x[j]; while(x->index != -1) { int ind = x->index; @@ -1270,14 +1271,18 @@ static void solve_l1r_lr( x++; } - G = -sum2 + xineg_sum[i]; + G = -sum2 + xjneg_sum[j]; double Gp = G+1; double Gn = G-1; - // shrinking - if(w[i] == 0) + double violation = 0; + if(w[j] == 0) { - if(Gp>Gmax_old/l && Gn<-Gmax_old/l) + if(Gp < 0) + violation = -Gp; + else if(Gn > 0) + violation = Gn; + else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) { active_size--; swap(index[s], index[active_size]); @@ -1285,44 +1290,40 @@ static void solve_l1r_lr( continue; } } + else if(w[j] > 0) + violation = fabs(Gp); + else + violation = fabs(Gn); + + Gmax_new = max(Gmax_new, violation); // obtain Newton direction d - if(Gp <= H*w[i]) - { - G = Gp; + if(Gp <= H*w[j]) d = -Gp/H; - } - else if(Gn >= H*w[i]) - { - G = Gn; + else if(Gn >= H*w[j]) d = -Gn/H; - } else - { - G = 0; - d = -w[i]; - } - - Gmax_new = max(Gmax_new, fabs(G)); + d = -w[j]; if(fabs(d) < 1.0e-12) continue; d = min(max(d,-10.0),10.0); + double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; int num_linesearch; for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) { - cond = fabs(w[i]+d)-fabs(w[i]) + sigma*d*d; + cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; if(x_min >= 0) { - double tmp = exp(d*xi_max[i]); - appxcond1 = log(1+sum1*(tmp-1)/xi_max[i]/C_sum[i])*C_sum[i] + cond - d*xipos_sum[i]; - appxcond2 = log(1+sum2*(1/tmp-1)/xi_max[i]/C_sum[i])*C_sum[i] + cond + d*xineg_sum[i]; + double tmp = exp(d*xj_max[j]); + appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j]; + appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j]; if(min(appxcond1,appxcond2) <= 0) { - x = prob_col->x[i]; + x = prob_col->x[j]; while(x->index != -1) { exp_wTx[x->index] *= exp(d*x->value); @@ -1332,57 +1333,60 @@ static void solve_l1r_lr( } } - cond += d*xineg_sum[i]; + cond += d*xjneg_sum[j]; - int j = 0; - x = prob_col->x[i]; + int i = 0; + x = prob_col->x[j]; while(x->index != -1) { int ind = x->index; double exp_dx = exp(d*x->value); - exp_wTx_new[j] = exp_wTx[ind]*exp_dx; - cond += C[y[ind]]*log((1+exp_wTx_new[j])/(exp_dx+exp_wTx_new[j])); - x++; j++; + exp_wTx_new[i] = exp_wTx[ind]*exp_dx; + cond += C[y[ind]]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); + x++; i++; } if(cond <= 0) { - int j = 0; - x = prob_col->x[i]; + int i = 0; + x = prob_col->x[j]; while(x->index != -1) { int ind = x->index; - exp_wTx[ind] = exp_wTx_new[j]; - x++; j++; + exp_wTx[ind] = exp_wTx_new[i]; + x++; i++; } break; } else + { d *= 0.5; + delta *= 0.5; + } } - w[i] += d; + w[j] += d; // recompute exp_wTx[] if line search takes too many steps if(num_linesearch >= max_num_linesearch) { info("#"); - for(int j=0; jx[j]; + if(w[i]==0) continue; + x = prob_col->x[i]; while(x->index != -1) { - exp_wTx[x->index] += w[j]*x->value; + exp_wTx[x->index] += w[i]*x->value; x++; } } - for(int j=0; j