From: Chih-Jen Lin Date: Fri, 16 Oct 2020 12:40:20 +0000 (+0800) Subject: Two changes in newton.cpp X-Git-Tag: v242~6 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=07208b8beb4bc6545fcca2182734e0963cdcedaa;p=liblinear Two changes in newton.cpp - in the previous version, the output is like init f 1.403e+07 iter 1 f 5.390e+06 |g| 6.197e+05 CG 2 step_size 1.00e+00 we indeed print f(w_k) |g(w_{k-1})| So |g| 6.197e+05 is the grad at f(0) = 1.403e+07. The output is changed to init f 1.403e+07 |g| 6.197e+05 iter 1 f 5.390e+06 |g| 2.118e+05 CG 2 step_size 1.00e+00 iter 2 f 1.925e+06 |g| 7.244e+04 CG 4 step_size 1.00e+00 Each line shows the new f and |g| after an iteration with CG steps and line search step_size - in pcg(), we calculate alpha = zTr/ddot_(&n, d, &inc, Hd, &inc); before the stopping condition based on quadratic approximations. But after the previous CG step, the new zTr and dHd may both be zero (or very close to zero). Then 0/0 may occur. In such a situation CG should stop. Now a safeguard check is added: + dHd = ddot_(&n, d, &inc, Hd, &inc); + // avoid 0/0 in getting alpha + if (dHd <= 1.0e-16) + break; --- diff --git a/newton.cpp b/newton.cpp index bd09fbb..17ba7de 100644 --- a/newton.cpp +++ b/newton.cpp @@ -117,9 +117,9 @@ void NEWTON::newton(double *w) delete [] w0; f = fun_obj->fun(w); - info("init f %5.3e\n", f); fun_obj->grad(w, g); double gnorm = dnrm2_(&n, g, &inc); + info("init f %5.3e |g| %5.3e\n", f, gnorm); if (gnorm <= eps*gnorm0) search = 0; @@ -132,7 +132,7 @@ void NEWTON::newton(double *w) cg_iter = pcg(g, M, s, r); fold = f; - step_size = fun_obj->linesearch_and_update(w, s, & f, g, init_step_size); + step_size = fun_obj->linesearch_and_update(w, s, &f, g, init_step_size); if (step_size == 0) { @@ -140,14 +140,11 @@ void NEWTON::newton(double *w) break; } - info("iter %2d f %5.3e |g| %5.3e CG %3d step_size %4.2e \n", iter, f, gnorm, cg_iter, step_size); - - actred = fold - f; - iter++; - fun_obj->grad(w, g); - gnorm = dnrm2_(&n, g, &inc); + + info("iter %2d f %5.3e |g| %5.3e CG %3d step_size %4.2e \n", iter, f, gnorm, cg_iter, step_size); + if (gnorm <= eps*gnorm0) break; if (f < -1.0e+32) @@ -155,11 +152,14 @@ void NEWTON::newton(double *w) info("WARNING: f < -1.0e+32\n"); break; } + actred = fold - f; if (fabs(actred) <= 1.0e-12*fabs(f)) { info("WARNING: actred too small\n"); break; } + + iter++; } delete[] g; @@ -175,7 +175,7 @@ int NEWTON::pcg(double *g, double *M, double *s, double *r) double one = 1; double *d = new double[n]; double *Hd = new double[n]; - double zTr, znewTrnew, alpha, beta, cgtol; + double zTr, znewTrnew, alpha, beta, cgtol, dHd; double *z = new double[n]; double Q = 0, newQ, Qdiff; @@ -196,9 +196,14 @@ int NEWTON::pcg(double *g, double *M, double *s, double *r) while (cg_iter < max_cg_iter) { cg_iter++; - fun_obj->Hv(d, Hd); - alpha = zTr/ddot_(&n, d, &inc, Hd, &inc); + fun_obj->Hv(d, Hd); + dHd = ddot_(&n, d, &inc, Hd, &inc); + // avoid 0/0 in getting alpha + if (dHd <= 1.0e-16) + break; + + alpha = zTr/dHd; daxpy_(&n, &alpha, d, &inc, s, &inc); alpha = -alpha; daxpy_(&n, &alpha, Hd, &inc, r, &inc);