From: leepei Date: Thu, 31 Dec 2015 12:39:06 +0000 (+0800) Subject: Revert "initial commit of line search branch" X-Git-Tag: v211~12^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=557a9ceedaceb4c636c7089725ea17b2c702a573;p=liblinear Revert "initial commit of line search branch" This reverts commit 2f55d046ff6aa48171ceee85572d4783cedb2f38. --- diff --git a/linear.cpp b/linear.cpp index 620781d..b7bfeca 100644 --- a/linear.cpp +++ b/linear.cpp @@ -79,47 +79,47 @@ public: } }; -class l2r_erm_fun: public function +class l2r_lr_fun: public function { public: - l2r_erm_fun(const problem *prob, double *C); - ~l2r_erm_fun(); + l2r_lr_fun(const problem *prob, double *C); + ~l2r_lr_fun(); double fun(double *w); - double line_search(double *d, double *w, double *g, double alpha, double *f); + void grad(double *w, double *g); + void Hv(double *s, double *Hs); + int get_nr_variable(void); -protected: - virtual double C_times_loss(int i, double wx_i) = 0; +private: void Xv(double *v, double *Xv); void XTv(double *v, double *XTv); double *C; double *z; + double *D; const problem *prob; - double *wx; - double wTw; - double current_f; }; -l2r_erm_fun::l2r_erm_fun(const problem *prob, double *C) +l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C) { int l=prob->l; this->prob = prob; z = new double[l]; - wx = new double[l]; + D = new double[l]; this->C = C; } -l2r_erm_fun::~l2r_erm_fun() +l2r_lr_fun::~l2r_lr_fun() { delete[] z; - delete[] wx; + delete[] D; } -double l2r_erm_fun::fun(double *w) + +double l2r_lr_fun::fun(double *w) { int i; double f=0; @@ -127,135 +127,21 @@ double l2r_erm_fun::fun(double *w) int l=prob->l; int w_size=get_nr_variable(); - wTw = 0; Xv(w, z); for(i=0;in; -} - -double l2r_erm_fun::line_search(double *d, double *w, double* g, double alpha, double *f) -{ - int i; - int l = prob->l; - double dTd, wTd, gd; - double eta = 1e-4; - int w_size = get_nr_variable(); - int max_line_search_iter = 1000; - double *y=prob->y; - Xv(d, z); - - dTd = 0; - wTd = 0; - gd = 0; - for (i=0;i= max_line_search_iter) - { - f[0] = current_f; - return 0; - } - double loss = 0; - for(i=0;i= 0) + f += C[i]*log(1 + exp(-yz)); else - alpha *= 0.5; + f += C[i]*(-yz+log(1 + exp(yz))); } - wTw += alpha*alpha * dTd + 2* alpha * wTd; - current_f = f[0]; - return alpha; -} -void l2r_erm_fun::Xv(double *v, double *Xv) -{ - int i; - int l=prob->l; - feature_node **x=prob->x; - - for(i=0;il; - int w_size=get_nr_variable(); - feature_node **x=prob->x; - - for(i=0;il; - D = new double[l]; -} - -l2r_lr_fun::~l2r_lr_fun() -{ - delete[] D; -} - -double l2r_lr_fun::C_times_loss(int i, double wx_i) -{ - double ywx_i = wx_i * prob->y[i]; - if (ywx_i >= 0) - return C[i]*log(1 + exp(-ywx_i)); - else - return C[i]*(-ywx_i + log(1 + exp(ywx_i))); + return(f); } void l2r_lr_fun::grad(double *w, double *g) @@ -277,6 +163,11 @@ void l2r_lr_fun::grad(double *w, double *g) g[i] = w[i] + g[i]; } +int l2r_lr_fun::get_nr_variable(void) +{ + return prob->n; +} + void l2r_lr_fun::Hv(double *s, double *Hs) { int i; @@ -301,43 +192,91 @@ void l2r_lr_fun::Hv(double *s, double *Hs) delete[] wa; } -class l2r_l2_svc_fun: public l2r_erm_fun +void l2r_lr_fun::Xv(double *v, double *Xv) +{ + int i; + int l=prob->l; + feature_node **x=prob->x; + + for(i=0;il; + int w_size=get_nr_variable(); + feature_node **x=prob->x; + + for(i=0;il]; + int l=prob->l; + + this->prob = prob; + + z = new double[l]; + I = new int[l]; + this->C = C; } l2r_l2_svc_fun::~l2r_l2_svc_fun() { + delete[] z; delete[] I; } -double l2r_l2_svc_fun::C_times_loss(int i, double wx_i) +double l2r_l2_svc_fun::fun(double *w) { - double d = 1 - prob->y[i] * wx_i; + int i; + double f=0; + double *y=prob->y; + int l=prob->l; + int w_size=get_nr_variable(); + + Xv(w, z); + + for(i=0;i 0) - return C[i]*d*d; - else - return 0; + f += C[i]*d*d; + } + + return(f); } void l2r_l2_svc_fun::grad(double *w, double *g) @@ -349,21 +288,23 @@ void l2r_l2_svc_fun::grad(double *w, double *g) sizeI = 0; for (i=0;in; +} + void l2r_l2_svc_fun::Hv(double *s, double *Hs) { int i; @@ -387,6 +328,16 @@ void l2r_l2_svc_fun::Hv(double *s, double *Hs) delete[] wa; } +void l2r_l2_svc_fun::Xv(double *v, double *Xv) +{ + int i; + int l=prob->l; + feature_node **x=prob->x; + + for(i=0;ip = p; } -double l2r_l2_svr_fun::C_times_loss(int i, double wx_i) +double l2r_l2_svr_fun::fun(double *w) { - double d = wx_i - prob->y[i]; + int i; + double f=0; + double *y=prob->y; + int l=prob->l; + int w_size=get_nr_variable(); + double d; + + Xv(w, z); + + for(i=0;i p) - return C[i]*(d-p)*(d-p); - return 0; + f += C[i]*(d-p)*(d-p); + } + + return(f); } void l2r_l2_svr_fun::grad(double *w, double *g) @@ -2235,7 +2202,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do C[i] = param->C; fun_obj=new l2r_l2_svr_fun(prob, C, param->p); - TRON tron_obj(fun_obj, param->eps, eps_cg); + TRON tron_obj(fun_obj, param->eps); tron_obj.set_print_string(liblinear_print_string); tron_obj.tron(w); delete fun_obj; diff --git a/tron.cpp b/tron.cpp index 0ef2090..3ea60f6 100644 --- a/tron.cpp +++ b/tron.cpp @@ -56,11 +56,16 @@ TRON::~TRON() void TRON::tron(double *w) { + // Parameters for updating the iterates. + double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75; + + // Parameters for updating the trust region size delta. + double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4; + int n = fun_obj->get_nr_variable(); int i, cg_iter; - double snorm, step_size, one=1.0; - double alpha, f, fnew; - double init_step_size = 1; + double delta, snorm, one=1.0; + double alpha, f, fnew, prered, actred, gs; int search = 1, iter = 1, inc = 1; double *s = new double[n]; double *r = new double[n]; @@ -77,7 +82,8 @@ void TRON::tron(double *w) f = fun_obj->fun(w); fun_obj->grad(w, g); - double gnorm = dnrm2_(&n, g, &inc); + delta = dnrm2_(&n, g, &inc); + double gnorm = delta; if (gnorm <= eps*gnorm0) search = 0; @@ -87,34 +93,68 @@ void TRON::tron(double *w) double *w_new = new double[n]; while (iter <= max_iter && search) { - cg_iter = trcg(g, s, r); + cg_iter = trcg(delta, g, s, r); memcpy(w_new, w, sizeof(double)*n); daxpy_(&n, &one, s, &inc, w_new, &inc); - step_size = fun_obj->line_search(s, w, g, init_step_size, &fnew); - if (step_size == 0) + gs = ddot_(&n, g, &inc, s, &inc); + prered = -0.5*(gs-ddot_(&n, s, &inc, r, &inc)); + fnew = fun_obj->fun(w_new); + + // Compute the actual reduction. + actred = f - fnew; + + // On the first iteration, adjust the initial step bound. + snorm = dnrm2_(&n, s, &inc); + if (iter == 1) + delta = min(delta, snorm); + + // Compute prediction alpha*snorm of the step. + if (fnew - f - gs <= 0) + alpha = sigma3; + else + alpha = max(sigma1, -0.5*(gs/(fnew - f - gs))); + + // Update the trust region bound according to the ratio of actual to predicted reduction. + if (actred < eta0*prered) + delta = min(max(alpha, sigma1)*snorm, sigma2*delta); + else if (actred < eta1*prered) + delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta)); + else if (actred < eta2*prered) + delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta)); + else + delta = max(delta, min(alpha*snorm, sigma3*delta)); + + info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter); + + if (actred > eta0*prered) { - info("WARNING: line search fails\n"); - break; + iter++; + memcpy(w, w_new, sizeof(double)*n); + f = fnew; + fun_obj->grad(w, g); + + gnorm = dnrm2_(&n, g, &inc); + if (gnorm <= eps*gnorm0) + break; } - daxpy_(&n, &step_size, s, &inc, w, &inc); - - info("iter %2d f %5.3e |g| %5.3e CG %3d step_size %5.3e \n", iter, f, gnorm, cg_iter, step_size); - - f = fnew; - iter++; - - fun_obj->grad(w, g); - - gnorm = dnrm2_(&n, g, &inc); - if (gnorm <= eps*gnorm0) - break; if (f < -1.0e+32) { info("WARNING: f < -1.0e+32\n"); break; } + if (fabs(actred) <= 0 && prered <= 0) + { + info("WARNING: actred and prered <= 0\n"); + break; + } + if (fabs(actred) <= 1.0e-12*fabs(f) && + fabs(prered) <= 1.0e-12*fabs(f)) + { + info("WARNING: actred and prered too small\n"); + break; + } } delete[] g; @@ -123,7 +163,7 @@ void TRON::tron(double *w) delete[] s; } -int TRON::trcg(double *g, double *s, double *r) +int TRON::trcg(double delta, double *g, double *s, double *r) { int i, inc = 1; int n = fun_obj->get_nr_variable(); @@ -151,6 +191,26 @@ int TRON::trcg(double *g, double *s, double *r) alpha = rTr/ddot_(&n, d, &inc, Hd, &inc); daxpy_(&n, &alpha, d, &inc, s, &inc); + if (dnrm2_(&n, s, &inc) > delta) + { + info("cg reaches trust region boundary\n"); + alpha = -alpha; + daxpy_(&n, &alpha, d, &inc, s, &inc); + + double std = ddot_(&n, s, &inc, d, &inc); + double sts = ddot_(&n, s, &inc, s, &inc); + double dtd = ddot_(&n, d, &inc, d, &inc); + double dsq = delta*delta; + double rad = sqrt(std*std + dtd*(dsq-sts)); + if (std >= 0) + alpha = (dsq - sts)/(std + rad); + else + alpha = (rad - std)/dtd; + daxpy_(&n, &alpha, d, &inc, s, &inc); + alpha = -alpha; + daxpy_(&n, &alpha, Hd, &inc, r, &inc); + break; + } alpha = -alpha; daxpy_(&n, &alpha, Hd, &inc, r, &inc); rnewTrnew = ddot_(&n, r, &inc, r, &inc); diff --git a/tron.h b/tron.h index a7da979..56002dc 100644 --- a/tron.h +++ b/tron.h @@ -7,7 +7,6 @@ public: virtual double fun(double *w) = 0 ; virtual void grad(double *w, double *g) = 0 ; virtual void Hv(double *s, double *Hs) = 0 ; - virtual double line_search(double *s, double *w, double *g, double init_step_size, double *fnew) = 0 ; virtual int get_nr_variable(void) = 0 ; virtual ~function(void){} @@ -23,7 +22,7 @@ public: void set_print_string(void (*i_print) (const char *buf)); private: - int trcg(double *g, double *s, double *r); + int trcg(double delta, double *g, double *s, double *r); double norm_inf(int n, double *x); double eps;