Add mixed diagonal preconditioner to TRON method

author b92paul <b92paul@gmail.com>

Tue, 17 Oct 2017 10:51:40 +0000 (18:51 +0800)

committer b92paul <b92paul@gmail.com>

Tue, 17 Oct 2017 10:51:40 +0000 (18:51 +0800)
author b92paul <b92paul@gmail.com>
Tue, 17 Oct 2017 10:51:40 +0000 (18:51 +0800)
committer b92paul <b92paul@gmail.com>
Tue, 17 Oct 2017 10:51:40 +0000 (18:51 +0800)
diff --git a/linear.cpp b/linear.cpp

index 335941a43bf7a2f8a239d8333d7324bb46e313c6..9b49892a14dca61773982bd061d14cccc41aad14 100644 (file)
--- a/linear.cpp
+++ b/linear.cpp
@@ -91,6 +91,7 @@ public:
         void Hv(double *s, double *Hs);
  
         int get_nr_variable(void);
+       void get_diagH(double *M);
  
  private:
         void Xv(double *v, double *Xv);
@@ -191,6 +192,27 @@ void l2r_lr_fun::Hv(double *s, double *Hs)
                 Hs[i] = s[i] + Hs[i];
  }
  
+void l2r_lr_fun::get_diagH(double *M)
+{
+       int i;
+       int l = prob->l;
+       int w_size=get_nr_variable();
+       feature_node **x = prob->x;
+
+       for (i=0; i<w_size; i++)
+               M[i] = 1;
+
+       for (i=0; i<l; i++)
+       {
+               feature_node *s = x[i];
+               while (s->index!=-1)
+               {
+                       M[s->index-1] += s->value*s->value*C[i]*D[i];
+                       s++;
+               }
+       }
+}
+
  void l2r_lr_fun::Xv(double *v, double *Xv)
  {
         int i;
@@ -225,6 +247,7 @@ public:
         void Hv(double *s, double *Hs);
  
         int get_nr_variable(void);
+       void get_diagH(double *M);
  
  protected:
         void Xv(double *v, double *Xv);
@@ -304,6 +327,27 @@ int l2r_l2_svc_fun::get_nr_variable(void)
         return prob->n;
  }
  
+void l2r_l2_svc_fun::get_diagH(double *M)
+{
+       int i;
+       int w_size=get_nr_variable();
+       feature_node **x = prob->x;
+
+       for (i=0; i<w_size; i++)
+               M[i] = 1;
+
+       for (i=0; i<sizeI; i++)
+       {
+               int idx = I[i];
+               feature_node *s = x[idx];
+               while (s->index!=-1)
+               {
+                       M[s->index-1] += s->value*s->value*C[idx]*2;
+                       s++;
+               }
+       }
+}
+
  void l2r_l2_svc_fun::Hv(double *s, double *Hs)
  {
         int i;
diff --git a/tron.cpp b/tron.cpp

index 67175b32880663adc1d087e12197a2ba96d63709..d662d99191246f8eb5b5a342c9f1c18b0a194b8f 100644 (file)
--- a/tron.cpp
+++ b/tron.cpp
@@ -31,6 +31,19 @@ static void default_print(const char *buf)
         fflush(stdout);
  }
  
+static double uTMv(int n, double *u, double *M, double *v)
+{
+       const int m = n-4;
+       double res = 0;
+       int i;
+       for (i=0; i<m; i+=5)
+               res += u[i]*M[i]*v[i]+u[i+1]*M[i+1]*v[i+1]+u[i+2]*M[i+2]*v[i+2]+
+                       u[i+3]*M[i+3]*v[i+3]+u[i+4]*M[i+4]*v[i+4];
+       for (; i<n; i++)
+               res += u[i]*M[i]*v[i];
+       return res;
+}
+
  void TRON::info(const char *fmt,...)
  {
         char buf[BUFSIZ];
@@ -64,13 +77,16 @@ void TRON::tron(double *w)
  
         int n = fun_obj->get_nr_variable();
         int i, cg_iter;
-       double delta, snorm, one=1.0;
+       double delta=0, sMnorm, one=1.0;
         double alpha, f, fnew, prered, actred, gs;
         int search = 1, iter = 1, inc = 1;
         double *s = new double[n];
         double *r = new double[n];
         double *g = new double[n];
  
+       const double alpha_pcg = 0.01;
+       double *M = new double[n];
+
         // calculate gradient norm at w=0 for stopping condition.
         double *w0 = new double[n];
         for (i=0; i<n; i++)
@@ -82,8 +98,7 @@ void TRON::tron(double *w)
  
         f = fun_obj->fun(w);
         fun_obj->grad(w, g);
-       delta = dnrm2_(&n, g, &inc);
-       double gnorm = delta;
+       double gnorm = dnrm2_(&n, g, &inc);
  
         if (gnorm <= eps*gnorm0)
                 search = 0;
@@ -94,7 +109,12 @@ void TRON::tron(double *w)
         bool reach_boundary;
         while (iter <= max_iter && search)
         {
-               cg_iter = trcg(delta, g, s, r, &reach_boundary);
+               fun_obj->get_diagH(M);
+               for(i=0; i<n; i++)
+                       M[i] = (1-alpha_pcg) + alpha_pcg*M[i];
+               if (iter == 1)
+                       delta = sqrt(uTMv(n, g, M, g));
+               cg_iter = trpcg(delta, g, M, s, r, &reach_boundary);
  
                 memcpy(w_new, w, sizeof(double)*n);
                 daxpy_(&n, &one, s, &inc, w_new, &inc);
@@ -107,11 +127,11 @@ void TRON::tron(double *w)
                 actred = f - fnew;
  
                 // On the first iteration, adjust the initial step bound.
-               snorm = dnrm2_(&n, s, &inc);
+               sMnorm = sqrt(uTMv(n, s, M, s));
                 if (iter == 1)
-                       delta = min(delta, snorm);
+                       delta = min(delta, sMnorm);
  
-               // Compute prediction alpha*snorm of the step.
+               // Compute prediction alpha*sMnorm of the step.
                 if (fnew - f - gs <= 0)
                         alpha = sigma3;
                 else
@@ -119,17 +139,17 @@ void TRON::tron(double *w)
  
                 // Update the trust region bound according to the ratio of actual to predicted reduction.
                 if (actred < eta0*prered)
-                       delta = min(max(alpha, sigma1)*snorm, sigma2*delta);
+                       delta = min(alpha*sMnorm, sigma2*delta);
                 else if (actred < eta1*prered)
-                       delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta));
+                       delta = max(sigma1*delta, min(alpha*sMnorm, sigma2*delta));
                 else if (actred < eta2*prered)
-                       delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta));
+                       delta = max(sigma1*delta, min(alpha*sMnorm, sigma3*delta));
                 else
                 {
                         if (reach_boundary)
                                 delta = sigma3*delta;
                         else
-                               delta = max(delta, min(alpha*snorm, sigma3*delta));
+                               delta = max(delta, min(alpha*sMnorm, sigma3*delta));
                 }
  
                 info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter);
@@ -167,53 +187,59 @@ void TRON::tron(double *w)
         delete[] r;
         delete[] w_new;
         delete[] s;
+       delete[] M;
  }
  
-int TRON::trcg(double delta, double *g, double *s, double *r, bool *reach_boundary)
+int TRON::trpcg(double delta, double *g, double *M, double *s, double *r, bool *reach_boundary)
  {
         int i, inc = 1;
         int n = fun_obj->get_nr_variable();
         double one = 1;
         double *d = new double[n];
         double *Hd = new double[n];
-       double rTr, rnewTrnew, alpha, beta, cgtol;
+       double zTr, znewTrnew, alpha, beta, cgtol;
+       double *z = new double[n];
  
         *reach_boundary = false;
         for (i=0; i<n; i++)
         {
                 s[i] = 0;
                 r[i] = -g[i];
-               d[i] = r[i];
+               z[i] = r[i] / M[i];
+               d[i] = z[i];
         }
-       cgtol = eps_cg*dnrm2_(&n, g, &inc);
  
+       zTr = ddot_(&n, z, &inc, r, &inc);
+       cgtol = eps_cg*sqrt(zTr);
         int cg_iter = 0;
-       rTr = ddot_(&n, r, &inc, r, &inc);
+
         while (1)
         {
-               if (dnrm2_(&n, r, &inc) <= cgtol)
+               if (sqrt(zTr) <= cgtol)
                         break;
                 cg_iter++;
                 fun_obj->Hv(d, Hd);
  
-               alpha = rTr/ddot_(&n, d, &inc, Hd, &inc);
+               alpha = zTr/ddot_(&n, d, &inc, Hd, &inc);
                 daxpy_(&n, &alpha, d, &inc, s, &inc);
-               if (dnrm2_(&n, s, &inc) > delta)
+
+               double sMnorm = sqrt(uTMv(n, s, M, s));
+               if (sMnorm > delta)
                 {
                         info("cg reaches trust region boundary\n");
                         *reach_boundary = true;
                         alpha = -alpha;
                         daxpy_(&n, &alpha, d, &inc, s, &inc);
  
-                       double std = ddot_(&n, s, &inc, d, &inc);
-                       double sts = ddot_(&n, s, &inc, s, &inc);
-                       double dtd = ddot_(&n, d, &inc, d, &inc);
+                       double sTMd = uTMv(n, s, M, d);
+                       double sTMs = uTMv(n, s, M, s);
+                       double dTMd = uTMv(n, d, M, d);
                         double dsq = delta*delta;
-                       double rad = sqrt(std*std + dtd*(dsq-sts));
-                       if (std >= 0)
-                               alpha = (dsq - sts)/(std + rad);
+                       double rad = sqrt(sTMd*sTMd + dTMd*(dsq-sTMs));
+                       if (sTMd >= 0)
+                               alpha = (dsq - sTMs)/(sTMd + rad);
                         else
-                               alpha = (rad - std)/dtd;
+                               alpha = (rad - sTMd)/dTMd;
                         daxpy_(&n, &alpha, d, &inc, s, &inc);
                         alpha = -alpha;
                         daxpy_(&n, &alpha, Hd, &inc, r, &inc);
@@ -221,15 +247,19 @@ int TRON::trcg(double delta, double *g, double *s, double *r, bool *reach_bounda
                 }
                 alpha = -alpha;
                 daxpy_(&n, &alpha, Hd, &inc, r, &inc);
-               rnewTrnew = ddot_(&n, r, &inc, r, &inc);
-               beta = rnewTrnew/rTr;
+
+               for (i=0; i<n; i++)
+                       z[i] = r[i] / M[i];
+               znewTrnew = ddot_(&n, z, &inc, r, &inc);
+               beta = znewTrnew/zTr;
                 dscal_(&n, &beta, d, &inc);
-               daxpy_(&n, &one, r, &inc, d, &inc);
-               rTr = rnewTrnew;
+               daxpy_(&n, &one, z, &inc, d, &inc);
+               zTr = znewTrnew;
         }
  
         delete[] d;
         delete[] Hd;
+       delete[] z;
  
         return(cg_iter);
  }
diff --git a/tron.h b/tron.h

index 061623e0f841533f4f13b7b9fded019a09bddbe0..6c39e185d6dade23b05ca48605ffcc3217113b5e 100644 (file)
--- a/tron.h
+++ b/tron.h
@@ -9,6 +9,7 @@ public:
         virtual void Hv(double *s, double *Hs) = 0 ;
  
         virtual int get_nr_variable(void) = 0 ;
+       virtual void get_diagH(double *M) = 0 ;
         virtual ~function(void){}
  };
  
@@ -22,7 +23,7 @@ public:
         void set_print_string(void (*i_print) (const char *buf));
  
  private:
-       int trcg(double delta, double *g, double *s, double *r, bool *reach_boundary);
+       int trpcg(double delta, double *g, double *M, double *s, double *r, bool *reach_boundary);
         double norm_inf(int n, double *x);
  
         double eps;
author	b92paul <b92paul@gmail.com>
	Tue, 17 Oct 2017 10:51:40 +0000 (18:51 +0800)
committer	b92paul <b92paul@gmail.com>
	Tue, 17 Oct 2017 10:51:40 +0000 (18:51 +0800)
linear.cpp		patch \| blob \| history
tron.cpp		patch \| blob \| history
tron.h		patch \| blob \| history