From: Chih-Jen Lin <cjlin@csie.ntu.edu.tw>
Date: Fri, 16 Oct 2020 12:40:20 +0000 (+0800)
Subject: Two changes in newton.cpp
X-Git-Tag: v242~6
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=07208b8beb4bc6545fcca2182734e0963cdcedaa;p=liblinear

Two changes in newton.cpp

- in the previous version, the output is like

init f 1.403e+07
iter  1 f 5.390e+06 |g| 6.197e+05 CG   2 step_size 1.00e+00

we indeed print

f(w_k) |g(w_{k-1})|

So |g| 6.197e+05 is the grad at f(0) = 1.403e+07.

The output is changed to

init f 1.403e+07 |g| 6.197e+05
iter  1 f 5.390e+06 |g| 2.118e+05 CG   2 step_size 1.00e+00
iter  2 f 1.925e+06 |g| 7.244e+04 CG   4 step_size 1.00e+00

Each line shows the new f and |g| after an iteration with
CG steps and line search step_size

- in pcg(), we calculate

        alpha = zTr/ddot_(&n, d, &inc, Hd, &inc);

before the stopping condition based on quadratic approximations.
But after the previous CG step, the new zTr and dHd may both be zero
(or very close to zero). Then 0/0 may occur. In such a situation CG
should stop. Now a safeguard check is added:

+		dHd = ddot_(&n, d, &inc, Hd, &inc);
+		// avoid 0/0 in getting alpha
+		if (dHd <= 1.0e-16)
+			break;
---

diff --git a/newton.cpp b/newton.cpp
index bd09fbb..17ba7de 100644
--- a/newton.cpp
+++ b/newton.cpp
@@ -117,9 +117,9 @@ void NEWTON::newton(double *w)
 	delete [] w0;
 
 	f = fun_obj->fun(w);
-	info("init f %5.3e\n", f);
 	fun_obj->grad(w, g);
 	double gnorm = dnrm2_(&n, g, &inc);
+	info("init f %5.3e |g| %5.3e\n", f, gnorm);
 
 	if (gnorm <= eps*gnorm0)
 		search = 0;
@@ -132,7 +132,7 @@ void NEWTON::newton(double *w)
 		cg_iter = pcg(g, M, s, r);
 
 		fold = f;
-		step_size = fun_obj->linesearch_and_update(w, s, & f, g, init_step_size);
+		step_size = fun_obj->linesearch_and_update(w, s, &f, g, init_step_size);
 
 		if (step_size == 0)
 		{
@@ -140,14 +140,11 @@ void NEWTON::newton(double *w)
 			break;
 		}
 
-		info("iter %2d f %5.3e |g| %5.3e CG %3d step_size %4.2e \n", iter, f, gnorm, cg_iter, step_size);
-
-		actred = fold - f;
-		iter++;
-
 		fun_obj->grad(w, g);
-
 		gnorm = dnrm2_(&n, g, &inc);
+
+		info("iter %2d f %5.3e |g| %5.3e CG %3d step_size %4.2e \n", iter, f, gnorm, cg_iter, step_size);
+		
 		if (gnorm <= eps*gnorm0)
 			break;
 		if (f < -1.0e+32)
@@ -155,11 +152,14 @@ void NEWTON::newton(double *w)
 			info("WARNING: f < -1.0e+32\n");
 			break;
 		}
+		actred = fold - f;
 		if (fabs(actred) <= 1.0e-12*fabs(f))
 		{
 			info("WARNING: actred too small\n");
 			break;
 		}
+
+		iter++;
 	}
 
 	delete[] g;
@@ -175,7 +175,7 @@ int NEWTON::pcg(double *g, double *M, double *s, double *r)
 	double one = 1;
 	double *d = new double[n];
 	double *Hd = new double[n];
-	double zTr, znewTrnew, alpha, beta, cgtol;
+	double zTr, znewTrnew, alpha, beta, cgtol, dHd;
 	double *z = new double[n];
 	double Q = 0, newQ, Qdiff;
 
@@ -196,9 +196,14 @@ int NEWTON::pcg(double *g, double *M, double *s, double *r)
 	while (cg_iter < max_cg_iter)
 	{
 		cg_iter++;
-		fun_obj->Hv(d, Hd);
 
-		alpha = zTr/ddot_(&n, d, &inc, Hd, &inc);
+		fun_obj->Hv(d, Hd);
+		dHd = ddot_(&n, d, &inc, Hd, &inc);
+		// avoid 0/0 in getting alpha
+		if (dHd <= 1.0e-16)
+			break;
+		
+		alpha = zTr/dHd;
 		daxpy_(&n, &alpha, d, &inc, s, &inc);
 		alpha = -alpha;
 		daxpy_(&n, &alpha, Hd, &inc, r, &inc);