From c923f559c944e2e16031f14f99c07653b8e5fd04 Mon Sep 17 00:00:00 2001 From: b92paul Date: Tue, 17 Oct 2017 18:51:40 +0800 Subject: [PATCH] Add mixed diagonal preconditioner to TRON method We use (1-alpha)*I + alpha*diag(H_k) with alpha=0.01 as a preconditioner to run PCG in TRON method. Following lines are the changes: - Change TRON::trcg to TRON::trpcg with additional argument M for PCG iterations - Add l2r_l2_svc_fun::get_diagH and l2r_l2_svc_fun::get_diagH to get diagonal component of Hessian - Add static function uTMv for weighted dot for vector u and v with weight M For detailed analysis, please check http://www.csie.ntu.edu.tw/~cjlin/papers/tron_pcg/precondition.pdf --- linear.cpp | 44 ++++++++++++++++++++++++++ tron.cpp | 90 ++++++++++++++++++++++++++++++++++++------------------ tron.h | 3 +- 3 files changed, 106 insertions(+), 31 deletions(-) diff --git a/linear.cpp b/linear.cpp index 335941a..9b49892 100644 --- a/linear.cpp +++ b/linear.cpp @@ -91,6 +91,7 @@ public: void Hv(double *s, double *Hs); int get_nr_variable(void); + void get_diagH(double *M); private: void Xv(double *v, double *Xv); @@ -191,6 +192,27 @@ void l2r_lr_fun::Hv(double *s, double *Hs) Hs[i] = s[i] + Hs[i]; } +void l2r_lr_fun::get_diagH(double *M) +{ + int i; + int l = prob->l; + int w_size=get_nr_variable(); + feature_node **x = prob->x; + + for (i=0; iindex!=-1) + { + M[s->index-1] += s->value*s->value*C[i]*D[i]; + s++; + } + } +} + void l2r_lr_fun::Xv(double *v, double *Xv) { int i; @@ -225,6 +247,7 @@ public: void Hv(double *s, double *Hs); int get_nr_variable(void); + void get_diagH(double *M); protected: void Xv(double *v, double *Xv); @@ -304,6 +327,27 @@ int l2r_l2_svc_fun::get_nr_variable(void) return prob->n; } +void l2r_l2_svc_fun::get_diagH(double *M) +{ + int i; + int w_size=get_nr_variable(); + feature_node **x = prob->x; + + for (i=0; iindex!=-1) + { + M[s->index-1] += s->value*s->value*C[idx]*2; + s++; + } + } +} + void l2r_l2_svc_fun::Hv(double *s, double *Hs) { int i; diff --git a/tron.cpp b/tron.cpp index 67175b3..d662d99 100644 --- a/tron.cpp +++ b/tron.cpp @@ -31,6 +31,19 @@ static void default_print(const char *buf) fflush(stdout); } +static double uTMv(int n, double *u, double *M, double *v) +{ + const int m = n-4; + double res = 0; + int i; + for (i=0; iget_nr_variable(); int i, cg_iter; - double delta, snorm, one=1.0; + double delta=0, sMnorm, one=1.0; double alpha, f, fnew, prered, actred, gs; int search = 1, iter = 1, inc = 1; double *s = new double[n]; double *r = new double[n]; double *g = new double[n]; + const double alpha_pcg = 0.01; + double *M = new double[n]; + // calculate gradient norm at w=0 for stopping condition. double *w0 = new double[n]; for (i=0; ifun(w); fun_obj->grad(w, g); - delta = dnrm2_(&n, g, &inc); - double gnorm = delta; + double gnorm = dnrm2_(&n, g, &inc); if (gnorm <= eps*gnorm0) search = 0; @@ -94,7 +109,12 @@ void TRON::tron(double *w) bool reach_boundary; while (iter <= max_iter && search) { - cg_iter = trcg(delta, g, s, r, &reach_boundary); + fun_obj->get_diagH(M); + for(i=0; iget_nr_variable(); double one = 1; double *d = new double[n]; double *Hd = new double[n]; - double rTr, rnewTrnew, alpha, beta, cgtol; + double zTr, znewTrnew, alpha, beta, cgtol; + double *z = new double[n]; *reach_boundary = false; for (i=0; iHv(d, Hd); - alpha = rTr/ddot_(&n, d, &inc, Hd, &inc); + alpha = zTr/ddot_(&n, d, &inc, Hd, &inc); daxpy_(&n, &alpha, d, &inc, s, &inc); - if (dnrm2_(&n, s, &inc) > delta) + + double sMnorm = sqrt(uTMv(n, s, M, s)); + if (sMnorm > delta) { info("cg reaches trust region boundary\n"); *reach_boundary = true; alpha = -alpha; daxpy_(&n, &alpha, d, &inc, s, &inc); - double std = ddot_(&n, s, &inc, d, &inc); - double sts = ddot_(&n, s, &inc, s, &inc); - double dtd = ddot_(&n, d, &inc, d, &inc); + double sTMd = uTMv(n, s, M, d); + double sTMs = uTMv(n, s, M, s); + double dTMd = uTMv(n, d, M, d); double dsq = delta*delta; - double rad = sqrt(std*std + dtd*(dsq-sts)); - if (std >= 0) - alpha = (dsq - sts)/(std + rad); + double rad = sqrt(sTMd*sTMd + dTMd*(dsq-sTMs)); + if (sTMd >= 0) + alpha = (dsq - sTMs)/(sTMd + rad); else - alpha = (rad - std)/dtd; + alpha = (rad - sTMd)/dTMd; daxpy_(&n, &alpha, d, &inc, s, &inc); alpha = -alpha; daxpy_(&n, &alpha, Hd, &inc, r, &inc); @@ -221,15 +247,19 @@ int TRON::trcg(double delta, double *g, double *s, double *r, bool *reach_bounda } alpha = -alpha; daxpy_(&n, &alpha, Hd, &inc, r, &inc); - rnewTrnew = ddot_(&n, r, &inc, r, &inc); - beta = rnewTrnew/rTr; + + for (i=0; i