9 int liblinear_version = LIBLINEAR_VERSION;
10 typedef signed char schar;
11 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
13 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
16 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
18 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
21 memcpy((void *)dst,(void *)src,sizeof(T)*n);
23 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
26 static void print_string_stdout(const char *s)
31 static void print_null(const char *s) {}
33 static void (*liblinear_print_string) (const char *) = &print_string_stdout;
36 static void info(const char *fmt,...)
43 (*liblinear_print_string)(buf);
46 static void info(const char *fmt,...) {}
51 static double nrm2_sq(const feature_node *x)
56 ret += x->value*x->value;
62 static double dot(const double *s, const feature_node *x)
67 ret += s[x->index-1]*x->value;
73 static void axpy(const double a, const feature_node *x, double *y)
77 y[x->index-1] += a*x->value;
83 class l2r_lr_fun: public function
86 l2r_lr_fun(const problem *prob, double *C);
89 double fun(double *w);
90 void grad(double *w, double *g);
91 void Hv(double *s, double *Hs);
93 int get_nr_variable(void);
94 void get_diag_preconditioner(double *M);
97 void Xv(double *v, double *Xv);
98 void XTv(double *v, double *XTv);
106 l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C)
117 l2r_lr_fun::~l2r_lr_fun()
124 double l2r_lr_fun::fun(double *w)
130 int w_size=get_nr_variable();
134 for(i=0;i<w_size;i++)
139 double yz = y[i]*z[i];
141 f += C[i]*log(1 + exp(-yz));
143 f += C[i]*(-yz+log(1 + exp(yz)));
149 void l2r_lr_fun::grad(double *w, double *g)
154 int w_size=get_nr_variable();
158 z[i] = 1/(1 + exp(-y[i]*z[i]));
159 D[i] = z[i]*(1-z[i]);
160 z[i] = C[i]*(z[i]-1)*y[i];
164 for(i=0;i<w_size;i++)
168 int l2r_lr_fun::get_nr_variable(void)
173 void l2r_lr_fun::get_diag_preconditioner(double *M)
177 int w_size=get_nr_variable();
178 feature_node **x = prob->x;
180 for (i=0; i<w_size; i++)
185 feature_node *s = x[i];
188 M[s->index-1] += s->value*s->value*C[i]*D[i];
194 void l2r_lr_fun::Hv(double *s, double *Hs)
198 int w_size=get_nr_variable();
199 feature_node **x=prob->x;
201 for(i=0;i<w_size;i++)
205 feature_node * const xi=x[i];
206 double xTs = sparse_operator::dot(s, xi);
210 sparse_operator::axpy(xTs, xi, Hs);
212 for(i=0;i<w_size;i++)
213 Hs[i] = s[i] + Hs[i];
216 void l2r_lr_fun::Xv(double *v, double *Xv)
220 feature_node **x=prob->x;
223 Xv[i]=sparse_operator::dot(v, x[i]);
226 void l2r_lr_fun::XTv(double *v, double *XTv)
230 int w_size=get_nr_variable();
231 feature_node **x=prob->x;
233 for(i=0;i<w_size;i++)
236 sparse_operator::axpy(v[i], x[i], XTv);
239 class l2r_l2_svc_fun: public function
242 l2r_l2_svc_fun(const problem *prob, double *C);
245 double fun(double *w);
246 void grad(double *w, double *g);
247 void Hv(double *s, double *Hs);
249 int get_nr_variable(void);
250 void get_diag_preconditioner(double *M);
253 void Xv(double *v, double *Xv);
254 void subXTv(double *v, double *XTv);
263 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C)
274 l2r_l2_svc_fun::~l2r_l2_svc_fun()
280 double l2r_l2_svc_fun::fun(double *w)
286 int w_size=get_nr_variable();
290 for(i=0;i<w_size;i++)
304 void l2r_l2_svc_fun::grad(double *w, double *g)
309 int w_size=get_nr_variable();
315 z[sizeI] = C[i]*y[i]*(z[i]-1);
321 for(i=0;i<w_size;i++)
322 g[i] = w[i] + 2*g[i];
325 int l2r_l2_svc_fun::get_nr_variable(void)
330 void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
333 int w_size=get_nr_variable();
334 feature_node **x = prob->x;
336 for (i=0; i<w_size; i++)
339 for (i=0; i<sizeI; i++)
342 feature_node *s = x[idx];
345 M[s->index-1] += s->value*s->value*C[idx]*2;
351 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
354 int w_size=get_nr_variable();
355 feature_node **x=prob->x;
357 for(i=0;i<w_size;i++)
361 feature_node * const xi=x[I[i]];
362 double xTs = sparse_operator::dot(s, xi);
366 sparse_operator::axpy(xTs, xi, Hs);
368 for(i=0;i<w_size;i++)
369 Hs[i] = s[i] + 2*Hs[i];
372 void l2r_l2_svc_fun::Xv(double *v, double *Xv)
376 feature_node **x=prob->x;
379 Xv[i]=sparse_operator::dot(v, x[i]);
382 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
385 int w_size=get_nr_variable();
386 feature_node **x=prob->x;
388 for(i=0;i<w_size;i++)
391 sparse_operator::axpy(v[i], x[I[i]], XTv);
394 class l2r_l2_svr_fun: public l2r_l2_svc_fun
397 l2r_l2_svr_fun(const problem *prob, double *C, double p);
399 double fun(double *w);
400 void grad(double *w, double *g);
406 l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *C, double p):
407 l2r_l2_svc_fun(prob, C)
412 double l2r_l2_svr_fun::fun(double *w)
418 int w_size=get_nr_variable();
423 for(i=0;i<w_size;i++)
430 f += C[i]*(d+p)*(d+p);
432 f += C[i]*(d-p)*(d-p);
438 void l2r_l2_svr_fun::grad(double *w, double *g)
443 int w_size=get_nr_variable();
451 // generate index set I
454 z[sizeI] = C[i]*(d+p);
460 z[sizeI] = C[i]*(d-p);
468 for(i=0;i<w_size;i++)
469 g[i] = w[i] + 2*g[i];
472 // A coordinate descent algorithm for
473 // multi-class support vector machines by Crammer and Singer
475 // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
476 // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
478 // where e^m_i = 0 if y_i = m,
479 // e^m_i = 1 if y_i != m,
480 // C^m_i = C if m = y_i,
481 // C^m_i = 0 if m != y_i,
482 // and w_m(\alpha) = \sum_i \alpha^m_i x_i
486 // eps is the stopping tolerance
488 // solution will be put in w
490 // See Appendix of LIBLINEAR paper, Fan et al. (2008)
492 #define GETI(i) ((int) prob->y[i])
493 // To support weights for instances, use GETI(i) (i)
495 class Solver_MCSVM_CS
498 Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
500 void Solve(double *w);
502 void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
503 bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
512 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
514 this->w_size = prob->n;
516 this->nr_class = nr_class;
518 this->max_iter = max_iter;
520 this->B = new double[nr_class];
521 this->G = new double[nr_class];
522 this->C = weighted_C;
525 Solver_MCSVM_CS::~Solver_MCSVM_CS()
531 int compare_double(const void *a, const void *b)
533 if(*(double *)a > *(double *)b)
535 if(*(double *)a < *(double *)b)
540 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
545 clone(D, B, active_i);
548 qsort(D, active_i, sizeof(double), compare_double);
550 double beta = D[0] - A_i*C_yi;
551 for(r=1;r<active_i && beta<r*D[r];r++)
555 for(r=0;r<active_i;r++)
558 alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
560 alpha_new[r] = min((double)0, (beta - B[r])/A_i);
565 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
570 if(alpha_i == bound && G[m] < minG)
575 void Solver_MCSVM_CS::Solve(double *w)
579 double *alpha = new double[l*nr_class];
580 double *alpha_new = new double[nr_class];
581 int *index = new int[l];
582 double *QD = new double[l];
583 int *d_ind = new int[nr_class];
584 double *d_val = new double[nr_class];
585 int *alpha_index = new int[nr_class*l];
586 int *y_index = new int[l];
588 int *active_size_i = new int[l];
589 double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
590 bool start_from_all = true;
592 // Initial alpha can be set here. Note that
593 // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
594 // alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
595 // alpha[i*nr_class+m] <= 0 if prob->y[i] != m
596 // If initial alpha isn't zero, uncomment the for loop below to initialize w
597 for(i=0;i<l*nr_class;i++)
600 for(i=0;i<w_size*nr_class;i++)
604 for(m=0;m<nr_class;m++)
605 alpha_index[i*nr_class+m] = m;
606 feature_node *xi = prob->x[i];
608 while(xi->index != -1)
610 double val = xi->value;
613 // Uncomment the for loop if initial alpha isn't zero
614 // for(m=0; m<nr_class; m++)
615 // w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
618 active_size_i[i] = nr_class;
619 y_index[i] = (int)prob->y[i];
623 while(iter < max_iter)
625 double stopping = -INF;
626 for(i=0;i<active_size;i++)
628 int j = i+rand()%(active_size-i);
629 swap(index[i], index[j]);
631 for(s=0;s<active_size;s++)
635 double *alpha_i = &alpha[i*nr_class];
636 int *alpha_index_i = &alpha_index[i*nr_class];
640 for(m=0;m<active_size_i[i];m++)
642 if(y_index[i] < active_size_i[i])
645 feature_node *xi = prob->x[i];
646 while(xi->index!= -1)
648 double *w_i = &w[(xi->index-1)*nr_class];
649 for(m=0;m<active_size_i[i];m++)
650 G[m] += w_i[alpha_index_i[m]]*(xi->value);
656 for(m=0;m<active_size_i[i];m++)
658 if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
663 if(y_index[i] < active_size_i[i])
664 if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
665 minG = G[y_index[i]];
667 for(m=0;m<active_size_i[i];m++)
669 if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
672 while(active_size_i[i]>m)
674 if(!be_shrunk(i, active_size_i[i], y_index[i],
675 alpha_i[alpha_index_i[active_size_i[i]]], minG))
677 swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
678 swap(G[m], G[active_size_i[i]]);
679 if(y_index[i] == active_size_i[i])
681 else if(y_index[i] == m)
682 y_index[i] = active_size_i[i];
690 if(active_size_i[i] <= 1)
693 swap(index[s], index[active_size]);
698 if(maxG-minG <= 1e-12)
701 stopping = max(maxG - minG, stopping);
703 for(m=0;m<active_size_i[i];m++)
704 B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
706 solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
708 for(m=0;m<active_size_i[i];m++)
710 double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
711 alpha_i[alpha_index_i[m]] = alpha_new[m];
714 d_ind[nz_d] = alpha_index_i[m];
721 while(xi->index != -1)
723 double *w_i = &w[(xi->index-1)*nr_class];
725 w_i[d_ind[m]] += d_val[m]*xi->value;
737 if(stopping < eps_shrink)
739 if(stopping < eps && start_from_all == true)
745 active_size_i[i] = nr_class;
747 eps_shrink = max(eps_shrink/2, eps);
748 start_from_all = true;
752 start_from_all = false;
755 info("\noptimization finished, #iter = %d\n",iter);
756 if (iter >= max_iter)
757 info("\nWARNING: reaching max number of iterations\n");
759 // calculate objective value
762 for(i=0;i<w_size*nr_class;i++)
765 for(i=0;i<l*nr_class;i++)
768 if(fabs(alpha[i]) > 0)
772 v -= alpha[i*nr_class+(int)prob->y[i]];
773 info("Objective value = %lf\n",v);
774 info("nSV = %d\n",nSV);
782 delete [] alpha_index;
784 delete [] active_size_i;
787 // A coordinate descent algorithm for
788 // L1-loss and L2-loss SVM dual problems
790 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
791 // s.t. 0 <= \alpha_i <= upper_bound_i,
793 // where Qij = yi yj xi^T xj and
794 // D is a diagonal matrix
797 // upper_bound_i = Cp if y_i = 1
798 // upper_bound_i = Cn if y_i = -1
801 // upper_bound_i = INF
802 // D_ii = 1/(2*Cp) if y_i = 1
803 // D_ii = 1/(2*Cn) if y_i = -1
807 // eps is the stopping tolerance
809 // solution will be put in w
811 // See Algorithm 3 of Hsieh et al., ICML 2008
814 #define GETI(i) (y[i]+1)
815 // To support weights for instances, use GETI(i) (i)
817 static void solve_l2r_l1l2_svc(
818 const problem *prob, double *w, double eps,
819 double Cp, double Cn, int solver_type)
822 int w_size = prob->n;
825 double *QD = new double[l];
827 int *index = new int[l];
828 double *alpha = new double[l];
829 schar *y = new schar[l];
832 // PG: projected gradient, for shrinking and stopping
834 double PGmax_old = INF;
835 double PGmin_old = -INF;
836 double PGmax_new, PGmin_new;
838 // default solver_type: L2R_L2LOSS_SVC_DUAL
839 double diag[3] = {0.5/Cn, 0, 0.5/Cp};
840 double upper_bound[3] = {INF, 0, INF};
841 if(solver_type == L2R_L1LOSS_SVC_DUAL)
861 // Initial alpha can be set here. Note that
862 // 0 <= alpha[i] <= upper_bound[GETI(i)]
866 for(i=0; i<w_size; i++)
870 QD[i] = diag[GETI(i)];
872 feature_node * const xi = prob->x[i];
873 QD[i] += sparse_operator::nrm2_sq(xi);
874 sparse_operator::axpy(y[i]*alpha[i], xi, w);
879 while (iter < max_iter)
884 for (i=0; i<active_size; i++)
886 int j = i+rand()%(active_size-i);
887 swap(index[i], index[j]);
890 for (s=0; s<active_size; s++)
893 const schar yi = y[i];
894 feature_node * const xi = prob->x[i];
896 G = yi*sparse_operator::dot(w, xi)-1;
898 C = upper_bound[GETI(i)];
899 G += alpha[i]*diag[GETI(i)];
907 swap(index[s], index[active_size]);
914 else if (alpha[i] == C)
919 swap(index[s], index[active_size]);
929 PGmax_new = max(PGmax_new, PG);
930 PGmin_new = min(PGmin_new, PG);
932 if(fabs(PG) > 1.0e-12)
934 double alpha_old = alpha[i];
935 alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
936 d = (alpha[i] - alpha_old)*yi;
937 sparse_operator::axpy(d, xi, w);
945 if(PGmax_new - PGmin_new <= eps)
958 PGmax_old = PGmax_new;
959 PGmin_old = PGmin_new;
966 info("\noptimization finished, #iter = %d\n",iter);
967 if (iter >= max_iter)
968 info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
970 // calculate objective value
974 for(i=0; i<w_size; i++)
978 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
982 info("Objective value = %lf\n",v/2);
983 info("nSV = %d\n",nSV);
992 // A coordinate descent algorithm for
993 // L1-loss and L2-loss epsilon-SVR dual problem
995 // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
996 // s.t. -upper_bound_i <= \beta_i <= upper_bound_i,
998 // where Qij = xi^T xj and
999 // D is a diagonal matrix
1002 // upper_bound_i = C
1005 // upper_bound_i = INF
1006 // lambda_i = 1/(2*C)
1010 // eps is the stopping tolerance
1012 // solution will be put in w
1014 // See Algorithm 4 of Ho and Lin, 2012
1018 // To support weights for instances, use GETI(i) (i)
1020 static void solve_l2r_l1l2_svr(
1021 const problem *prob, double *w, const parameter *param,
1025 double C = param->C;
1026 double p = param->p;
1027 int w_size = prob->n;
1028 double eps = param->eps;
1030 int max_iter = 1000;
1031 int active_size = l;
1032 int *index = new int[l];
1035 double Gmax_old = INF;
1036 double Gmax_new, Gnorm1_new;
1037 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1038 double *beta = new double[l];
1039 double *QD = new double[l];
1040 double *y = prob->y;
1042 // L2R_L2LOSS_SVR_DUAL
1043 double lambda[1], upper_bound[1];
1045 upper_bound[0] = INF;
1047 if(solver_type == L2R_L1LOSS_SVR_DUAL)
1053 // Initial beta can be set here. Note that
1054 // -upper_bound <= beta[i] <= upper_bound
1058 for(i=0; i<w_size; i++)
1062 feature_node * const xi = prob->x[i];
1063 QD[i] = sparse_operator::nrm2_sq(xi);
1064 sparse_operator::axpy(beta[i], xi, w);
1070 while(iter < max_iter)
1075 for(i=0; i<active_size; i++)
1077 int j = i+rand()%(active_size-i);
1078 swap(index[i], index[j]);
1081 for(s=0; s<active_size; s++)
1084 G = -y[i] + lambda[GETI(i)]*beta[i];
1085 H = QD[i] + lambda[GETI(i)];
1087 feature_node * const xi = prob->x[i];
1088 G += sparse_operator::dot(w, xi);
1092 double violation = 0;
1099 else if(Gp>Gmax_old && Gn<-Gmax_old)
1102 swap(index[s], index[active_size]);
1107 else if(beta[i] >= upper_bound[GETI(i)])
1111 else if(Gp < -Gmax_old)
1114 swap(index[s], index[active_size]);
1119 else if(beta[i] <= -upper_bound[GETI(i)])
1123 else if(Gn > Gmax_old)
1126 swap(index[s], index[active_size]);
1131 else if(beta[i] > 0)
1132 violation = fabs(Gp);
1134 violation = fabs(Gn);
1136 Gmax_new = max(Gmax_new, violation);
1137 Gnorm1_new += violation;
1139 // obtain Newton direction d
1142 else if(Gn > H*beta[i])
1147 if(fabs(d) < 1.0e-12)
1150 double beta_old = beta[i];
1151 beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
1152 d = beta[i]-beta_old;
1155 sparse_operator::axpy(d, xi, w);
1159 Gnorm1_init = Gnorm1_new;
1164 if(Gnorm1_new <= eps*Gnorm1_init)
1166 if(active_size == l)
1177 Gmax_old = Gmax_new;
1180 info("\noptimization finished, #iter = %d\n", iter);
1181 if(iter >= max_iter)
1182 info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
1184 // calculate objective value
1187 for(i=0; i<w_size; i++)
1192 v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
1197 info("Objective value = %lf\n", v);
1198 info("nSV = %d\n",nSV);
1206 // A coordinate descent algorithm for
1207 // the dual of L2-regularized logistic regression problems
1209 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1210 // s.t. 0 <= \alpha_i <= upper_bound_i,
1212 // where Qij = yi yj xi^T xj and
1213 // upper_bound_i = Cp if y_i = 1
1214 // upper_bound_i = Cn if y_i = -1
1218 // eps is the stopping tolerance
1220 // solution will be put in w
1222 // See Algorithm 5 of Yu et al., MLJ 2010
1225 #define GETI(i) (y[i]+1)
1226 // To support weights for instances, use GETI(i) (i)
1228 void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
1231 int w_size = prob->n;
1233 double *xTx = new double[l];
1234 int max_iter = 1000;
1235 int *index = new int[l];
1236 double *alpha = new double[2*l]; // store alpha and C - alpha
1237 schar *y = new schar[l];
1238 int max_inner_iter = 100; // for inner Newton
1239 double innereps = 1e-2;
1240 double innereps_min = min(1e-8, eps);
1241 double upper_bound[3] = {Cn, 0, Cp};
1255 // Initial alpha can be set here. Note that
1256 // 0 < alpha[i] < upper_bound[GETI(i)]
1257 // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1260 alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
1261 alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1264 for(i=0; i<w_size; i++)
1268 feature_node * const xi = prob->x[i];
1269 xTx[i] = sparse_operator::nrm2_sq(xi);
1270 sparse_operator::axpy(y[i]*alpha[2*i], xi, w);
1274 while (iter < max_iter)
1278 int j = i+rand()%(l-i);
1279 swap(index[i], index[j]);
1281 int newton_iter = 0;
1286 const schar yi = y[i];
1287 double C = upper_bound[GETI(i)];
1288 double ywTx = 0, xisq = xTx[i];
1289 feature_node * const xi = prob->x[i];
1290 ywTx = yi*sparse_operator::dot(w, xi);
1291 double a = xisq, b = ywTx;
1293 // Decide to minimize g_1(z) or g_2(z)
1294 int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1295 if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1302 // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1303 double alpha_old = alpha[ind1];
1304 double z = alpha_old;
1307 double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1308 Gmax = max(Gmax, fabs(gp));
1310 // Newton method on the sub-problem
1311 const double eta = 0.1; // xi in the paper
1313 while (inner_iter <= max_inner_iter)
1315 if(fabs(gp) < innereps)
1317 double gpp = a + C/(C-z)/z;
1318 double tmpz = z - gp/gpp;
1321 else // tmpz in (0, C)
1323 gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1328 if(inner_iter > 0) // update w
1332 sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w);
1343 if(newton_iter <= l/10)
1344 innereps = max(innereps_min, 0.1*innereps);
1348 info("\noptimization finished, #iter = %d\n",iter);
1349 if (iter >= max_iter)
1350 info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1352 // calculate objective value
1355 for(i=0; i<w_size; i++)
1359 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1360 - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1361 info("Objective value = %lf\n", v);
1369 // A coordinate descent algorithm for
1370 // L1-regularized L2-loss support vector classification
1372 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1376 // eps is the stopping tolerance
1378 // solution will be put in w
1380 // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1383 #define GETI(i) (y[i]+1)
1384 // To support weights for instances, use GETI(i) (i)
1386 static void solve_l1r_l2_svc(
1387 problem *prob_col, double *w, double eps,
1388 double Cp, double Cn)
1390 int l = prob_col->l;
1391 int w_size = prob_col->n;
1393 int max_iter = 1000;
1394 int active_size = w_size;
1395 int max_num_linesearch = 20;
1397 double sigma = 0.01;
1398 double d, G_loss, G, H;
1399 double Gmax_old = INF;
1400 double Gmax_new, Gnorm1_new;
1401 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1402 double d_old, d_diff;
1403 double loss_old, loss_new;
1404 double appxcond, cond;
1406 int *index = new int[w_size];
1407 schar *y = new schar[l];
1408 double *b = new double[l]; // b = 1-ywTx
1409 double *xj_sq = new double[w_size];
1412 double C[3] = {Cn,0,Cp};
1414 // Initial w can be set here.
1415 for(j=0; j<w_size; j++)
1421 if(prob_col->y[j] > 0)
1426 for(j=0; j<w_size; j++)
1431 while(x->index != -1)
1433 int ind = x->index-1;
1434 x->value *= y[ind]; // x->value stores yi*xij
1435 double val = x->value;
1437 xj_sq[j] += C[GETI(ind)]*val*val;
1442 while(iter < max_iter)
1447 for(j=0; j<active_size; j++)
1449 int i = j+rand()%(active_size-j);
1450 swap(index[i], index[j]);
1453 for(s=0; s<active_size; s++)
1460 while(x->index != -1)
1462 int ind = x->index-1;
1465 double val = x->value;
1466 double tmp = C[GETI(ind)]*val;
1467 G_loss -= tmp*b[ind];
1480 double violation = 0;
1487 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1490 swap(index[s], index[active_size]);
1496 violation = fabs(Gp);
1498 violation = fabs(Gn);
1500 Gmax_new = max(Gmax_new, violation);
1501 Gnorm1_new += violation;
1503 // obtain Newton direction d
1506 else if(Gn > H*w[j])
1511 if(fabs(d) < 1.0e-12)
1514 double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1517 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1520 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1522 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1526 sparse_operator::axpy(d_diff, x, b);
1530 if(num_linesearch == 0)
1535 while(x->index != -1)
1537 int ind = x->index-1;
1539 loss_old += C[GETI(ind)]*b[ind]*b[ind];
1540 double b_new = b[ind] + d_diff*x->value;
1543 loss_new += C[GETI(ind)]*b_new*b_new;
1551 while(x->index != -1)
1553 int ind = x->index-1;
1554 double b_new = b[ind] + d_diff*x->value;
1557 loss_new += C[GETI(ind)]*b_new*b_new;
1562 cond = cond + loss_new - loss_old;
1575 // recompute b[] if line search takes too many steps
1576 if(num_linesearch >= max_num_linesearch)
1579 for(int i=0; i<l; i++)
1582 for(int i=0; i<w_size; i++)
1584 if(w[i]==0) continue;
1586 sparse_operator::axpy(-w[i], x, b);
1592 Gnorm1_init = Gnorm1_new;
1597 if(Gnorm1_new <= eps*Gnorm1_init)
1599 if(active_size == w_size)
1603 active_size = w_size;
1610 Gmax_old = Gmax_new;
1613 info("\noptimization finished, #iter = %d\n", iter);
1614 if(iter >= max_iter)
1615 info("\nWARNING: reaching max number of iterations\n");
1617 // calculate objective value
1621 for(j=0; j<w_size; j++)
1624 while(x->index != -1)
1626 x->value *= prob_col->y[x->index-1]; // restore x->value
1637 v += C[GETI(j)]*b[j]*b[j];
1639 info("Objective value = %lf\n", v);
1640 info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1648 // A coordinate descent algorithm for
1649 // L1-regularized logistic regression problems
1651 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1655 // eps is the stopping tolerance
1657 // solution will be put in w
1659 // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1662 #define GETI(i) (y[i]+1)
1663 // To support weights for instances, use GETI(i) (i)
1665 static void solve_l1r_lr(
1666 const problem *prob_col, double *w, double eps,
1667 double Cp, double Cn)
1669 int l = prob_col->l;
1670 int w_size = prob_col->n;
1671 int j, s, newton_iter=0, iter=0;
1672 int max_newton_iter = 100;
1673 int max_iter = 1000;
1674 int max_num_linesearch = 20;
1679 double inner_eps = 1;
1680 double sigma = 0.01;
1681 double w_norm, w_norm_new;
1683 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1684 double Gmax_old = INF;
1685 double Gmax_new, Gnorm1_new;
1686 double QP_Gmax_old = INF;
1687 double QP_Gmax_new, QP_Gnorm1_new;
1688 double delta, negsum_xTd, cond;
1690 int *index = new int[w_size];
1691 schar *y = new schar[l];
1692 double *Hdiag = new double[w_size];
1693 double *Grad = new double[w_size];
1694 double *wpd = new double[w_size];
1695 double *xjneg_sum = new double[w_size];
1696 double *xTd = new double[l];
1697 double *exp_wTx = new double[l];
1698 double *exp_wTx_new = new double[l];
1699 double *tau = new double[l];
1700 double *D = new double[l];
1703 double C[3] = {Cn,0,Cp};
1705 // Initial w can be set here.
1706 for(j=0; j<w_size; j++)
1711 if(prob_col->y[j] > 0)
1720 for(j=0; j<w_size; j++)
1722 w_norm += fabs(w[j]);
1727 while(x->index != -1)
1729 int ind = x->index-1;
1730 double val = x->value;
1731 exp_wTx[ind] += w[j]*val;
1733 xjneg_sum[j] += C[GETI(ind)]*val;
1739 exp_wTx[j] = exp(exp_wTx[j]);
1740 double tau_tmp = 1/(1+exp_wTx[j]);
1741 tau[j] = C[GETI(j)]*tau_tmp;
1742 D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
1745 while(newton_iter < max_newton_iter)
1749 active_size = w_size;
1751 for(s=0; s<active_size; s++)
1759 while(x->index != -1)
1761 int ind = x->index-1;
1762 Hdiag[j] += x->value*x->value*D[ind];
1763 tmp += x->value*tau[ind];
1766 Grad[j] = -tmp + xjneg_sum[j];
1768 double Gp = Grad[j]+1;
1769 double Gn = Grad[j]-1;
1770 double violation = 0;
1777 //outer-level shrinking
1778 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1781 swap(index[s], index[active_size]);
1787 violation = fabs(Gp);
1789 violation = fabs(Gn);
1791 Gmax_new = max(Gmax_new, violation);
1792 Gnorm1_new += violation;
1795 if(newton_iter == 0)
1796 Gnorm1_init = Gnorm1_new;
1798 if(Gnorm1_new <= eps*Gnorm1_init)
1803 QP_active_size = active_size;
1805 for(int i=0; i<l; i++)
1808 // optimize QP over wpd
1809 while(iter < max_iter)
1814 for(j=0; j<QP_active_size; j++)
1816 int i = j+rand()%(QP_active_size-j);
1817 swap(index[i], index[j]);
1820 for(s=0; s<QP_active_size; s++)
1826 G = Grad[j] + (wpd[j]-w[j])*nu;
1827 while(x->index != -1)
1829 int ind = x->index-1;
1830 G += x->value*D[ind]*xTd[ind];
1836 double violation = 0;
1843 //inner-level shrinking
1844 else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1847 swap(index[s], index[QP_active_size]);
1853 violation = fabs(Gp);
1855 violation = fabs(Gn);
1857 QP_Gmax_new = max(QP_Gmax_new, violation);
1858 QP_Gnorm1_new += violation;
1860 // obtain solution of one-variable problem
1863 else if(Gn > H*wpd[j])
1868 if(fabs(z) < 1.0e-12)
1870 z = min(max(z,-10.0),10.0);
1875 sparse_operator::axpy(z, x, xTd);
1880 if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
1883 if(QP_active_size == active_size)
1885 //active set reactivation
1888 QP_active_size = active_size;
1894 QP_Gmax_old = QP_Gmax_new;
1897 if(iter >= max_iter)
1898 info("WARNING: reaching max number of inner iterations\n");
1902 for(j=0; j<w_size; j++)
1904 delta += Grad[j]*(wpd[j]-w[j]);
1906 w_norm_new += fabs(wpd[j]);
1908 delta += (w_norm_new-w_norm);
1911 for(int i=0; i<l; i++)
1913 negsum_xTd += C[GETI(i)]*xTd[i];
1916 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1918 cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
1920 for(int i=0; i<l; i++)
1922 double exp_xTd = exp(xTd[i]);
1923 exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
1924 cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
1929 w_norm = w_norm_new;
1930 for(j=0; j<w_size; j++)
1932 for(int i=0; i<l; i++)
1934 exp_wTx[i] = exp_wTx_new[i];
1935 double tau_tmp = 1/(1+exp_wTx[i]);
1936 tau[i] = C[GETI(i)]*tau_tmp;
1937 D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
1944 for(j=0; j<w_size; j++)
1946 wpd[j] = (w[j]+wpd[j])*0.5;
1948 w_norm_new += fabs(wpd[j]);
1952 for(int i=0; i<l; i++)
1957 // Recompute some info due to too many line search steps
1958 if(num_linesearch >= max_num_linesearch)
1960 for(int i=0; i<l; i++)
1963 for(int i=0; i<w_size; i++)
1965 if(w[i]==0) continue;
1967 sparse_operator::axpy(w[i], x, exp_wTx);
1970 for(int i=0; i<l; i++)
1971 exp_wTx[i] = exp(exp_wTx[i]);
1978 Gmax_old = Gmax_new;
1980 info("iter %3d #CD cycles %d\n", newton_iter, iter);
1983 info("=========================\n");
1984 info("optimization finished, #iter = %d\n", newton_iter);
1985 if(newton_iter >= max_newton_iter)
1986 info("WARNING: reaching max number of iterations\n");
1988 // calculate objective value
1992 for(j=0; j<w_size; j++)
2000 v += C[GETI(j)]*log(1+1/exp_wTx[j]);
2002 v += C[GETI(j)]*log(1+exp_wTx[j]);
2004 info("Objective value = %lf\n", v);
2005 info("#nonzeros/#features = %d/%d\n", nnz, w_size);
2012 delete [] xjneg_sum;
2015 delete [] exp_wTx_new;
2020 // transpose matrix X from row format to column format
2021 static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
2027 size_t *col_ptr = new size_t [n+1];
2028 feature_node *x_space;
2031 prob_col->y = new double[l];
2032 prob_col->x = new feature_node*[n];
2035 prob_col->y[i] = prob->y[i];
2037 for(i=0; i<n+1; i++)
2041 feature_node *x = prob->x[i];
2042 while(x->index != -1)
2045 col_ptr[x->index]++;
2049 for(i=1; i<n+1; i++)
2050 col_ptr[i] += col_ptr[i-1] + 1;
2052 x_space = new feature_node[nnz+n];
2054 prob_col->x[i] = &x_space[col_ptr[i]];
2058 feature_node *x = prob->x[i];
2059 while(x->index != -1)
2061 int ind = x->index-1;
2062 x_space[col_ptr[ind]].index = i+1; // starts from 1
2063 x_space[col_ptr[ind]].value = x->value;
2069 x_space[col_ptr[i]].index = -1;
2071 *x_space_ret = x_space;
2076 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2077 // perm, length l, must be allocated before calling this subroutine
2078 static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2081 int max_nr_class = 16;
2083 int *label = Malloc(int,max_nr_class);
2084 int *count = Malloc(int,max_nr_class);
2085 int *data_label = Malloc(int,l);
2090 int this_label = (int)prob->y[i];
2092 for(j=0;j<nr_class;j++)
2094 if(this_label == label[j])
2103 if(nr_class == max_nr_class)
2106 label = (int *)realloc(label,max_nr_class*sizeof(int));
2107 count = (int *)realloc(count,max_nr_class*sizeof(int));
2109 label[nr_class] = this_label;
2110 count[nr_class] = 1;
2116 // Labels are ordered by their first occurrence in the training set.
2117 // However, for two-class sets with -1/+1 labels and -1 appears first,
2118 // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2120 if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2122 swap(label[0],label[1]);
2123 swap(count[0],count[1]);
2126 if(data_label[i] == 0)
2133 int *start = Malloc(int,nr_class);
2135 for(i=1;i<nr_class;i++)
2136 start[i] = start[i-1]+count[i-1];
2139 perm[start[data_label[i]]] = i;
2140 ++start[data_label[i]];
2143 for(i=1;i<nr_class;i++)
2144 start[i] = start[i-1]+count[i-1];
2146 *nr_class_ret = nr_class;
2153 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2155 //inner and outer tolerances for TRON
2156 double eps = param->eps;
2157 double eps_cg = 0.1;
2158 if(param->init_sol != NULL)
2163 for(int i=0;i<prob->l;i++)
2166 neg = prob->l - pos;
2167 double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
2169 function *fun_obj=NULL;
2170 switch(param->solver_type)
2174 double *C = new double[prob->l];
2175 for(int i = 0; i < prob->l; i++)
2182 fun_obj=new l2r_lr_fun(prob, C);
2183 TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2184 tron_obj.set_print_string(liblinear_print_string);
2190 case L2R_L2LOSS_SVC:
2192 double *C = new double[prob->l];
2193 for(int i = 0; i < prob->l; i++)
2200 fun_obj=new l2r_l2_svc_fun(prob, C);
2201 TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2202 tron_obj.set_print_string(liblinear_print_string);
2208 case L2R_L2LOSS_SVC_DUAL:
2209 solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
2211 case L2R_L1LOSS_SVC_DUAL:
2212 solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
2214 case L1R_L2LOSS_SVC:
2217 feature_node *x_space = NULL;
2218 transpose(prob, &x_space ,&prob_col);
2219 solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn);
2220 delete [] prob_col.y;
2221 delete [] prob_col.x;
2228 feature_node *x_space = NULL;
2229 transpose(prob, &x_space ,&prob_col);
2230 solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn);
2231 delete [] prob_col.y;
2232 delete [] prob_col.x;
2237 solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
2239 case L2R_L2LOSS_SVR:
2241 double *C = new double[prob->l];
2242 for(int i = 0; i < prob->l; i++)
2245 fun_obj=new l2r_l2_svr_fun(prob, C, param->p);
2246 TRON tron_obj(fun_obj, param->eps);
2247 tron_obj.set_print_string(liblinear_print_string);
2254 case L2R_L1LOSS_SVR_DUAL:
2255 solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL);
2257 case L2R_L2LOSS_SVR_DUAL:
2258 solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL);
2261 fprintf(stderr, "ERROR: unknown solver_type\n");
2266 // Calculate the initial C for parameter selection
2267 static double calc_start_C(const problem *prob, const parameter *param)
2272 for(i=0; i<prob->l; i++)
2275 feature_node *xi=prob->x[i];
2276 while(xi->index != -1)
2278 double val = xi->value;
2287 if(param->solver_type == L2R_LR)
2288 min_C = 1.0 / (prob->l * max_xTx);
2289 else if(param->solver_type == L2R_L2LOSS_SVC)
2290 min_C = 1.0 / (2 * prob->l * max_xTx);
2292 return pow( 2, floor(log(min_C) / log(2.0)) );
2297 // Interface functions
2299 model* train(const problem *prob, const parameter *param)
2304 int w_size = prob->n;
2305 model *model_ = Malloc(model,1);
2308 model_->nr_feature=n-1;
2310 model_->nr_feature=n;
2311 model_->param = *param;
2312 model_->bias = prob->bias;
2314 if(check_regression_model(model_))
2316 model_->w = Malloc(double, w_size);
2317 for(i=0; i<w_size; i++)
2319 model_->nr_class = 2;
2320 model_->label = NULL;
2321 train_one(prob, param, model_->w, 0, 0);
2329 int *perm = Malloc(int,l);
2331 // group training data of the same class
2332 group_classes(prob,&nr_class,&label,&start,&count,perm);
2334 model_->nr_class=nr_class;
2335 model_->label = Malloc(int,nr_class);
2336 for(i=0;i<nr_class;i++)
2337 model_->label[i] = label[i];
2339 // calculate weighted C
2340 double *weighted_C = Malloc(double, nr_class);
2341 for(i=0;i<nr_class;i++)
2342 weighted_C[i] = param->C;
2343 for(i=0;i<param->nr_weight;i++)
2345 for(j=0;j<nr_class;j++)
2346 if(param->weight_label[i] == label[j])
2349 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2351 weighted_C[j] *= param->weight[i];
2354 // constructing the subproblem
2355 feature_node **x = Malloc(feature_node *,l);
2357 x[i] = prob->x[perm[i]];
2363 sub_prob.x = Malloc(feature_node *,sub_prob.l);
2364 sub_prob.y = Malloc(double,sub_prob.l);
2366 for(k=0; k<sub_prob.l; k++)
2367 sub_prob.x[k] = x[k];
2369 // multi-class svm by Crammer and Singer
2370 if(param->solver_type == MCSVM_CS)
2372 model_->w=Malloc(double, n*nr_class);
2373 for(i=0;i<nr_class;i++)
2374 for(j=start[i];j<start[i]+count[i];j++)
2376 Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
2377 Solver.Solve(model_->w);
2383 model_->w=Malloc(double, w_size);
2385 int e0 = start[0]+count[0];
2389 for(; k<sub_prob.l; k++)
2392 if(param->init_sol != NULL)
2393 for(i=0;i<w_size;i++)
2394 model_->w[i] = param->init_sol[i];
2396 for(i=0;i<w_size;i++)
2399 train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
2403 model_->w=Malloc(double, w_size*nr_class);
2404 double *w=Malloc(double, w_size);
2405 for(i=0;i<nr_class;i++)
2408 int ei = si+count[i];
2415 for(; k<sub_prob.l; k++)
2418 if(param->init_sol != NULL)
2419 for(j=0;j<w_size;j++)
2420 w[j] = param->init_sol[j*nr_class+i];
2422 for(j=0;j<w_size;j++)
2425 train_one(&sub_prob, param, w, weighted_C[i], param->C);
2427 for(j=0;j<w_size;j++)
2428 model_->w[j*nr_class+i] = w[j];
2447 void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
2452 int *perm = Malloc(int,l);
2456 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2458 fold_start = Malloc(int,nr_fold+1);
2459 for(i=0;i<l;i++) perm[i]=i;
2462 int j = i+rand()%(l-i);
2463 swap(perm[i],perm[j]);
2465 for(i=0;i<=nr_fold;i++)
2466 fold_start[i]=i*l/nr_fold;
2468 for(i=0;i<nr_fold;i++)
2470 int begin = fold_start[i];
2471 int end = fold_start[i+1];
2473 struct problem subprob;
2475 subprob.bias = prob->bias;
2476 subprob.n = prob->n;
2477 subprob.l = l-(end-begin);
2478 subprob.x = Malloc(struct feature_node*,subprob.l);
2479 subprob.y = Malloc(double,subprob.l);
2482 for(j=0;j<begin;j++)
2484 subprob.x[k] = prob->x[perm[j]];
2485 subprob.y[k] = prob->y[perm[j]];
2490 subprob.x[k] = prob->x[perm[j]];
2491 subprob.y[k] = prob->y[perm[j]];
2494 struct model *submodel = train(&subprob,param);
2495 for(j=begin;j<end;j++)
2496 target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2497 free_and_destroy_model(&submodel);
2505 void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
2511 int *perm = Malloc(int, l);
2512 double *target = Malloc(double, prob->l);
2513 struct problem *subprob = Malloc(problem,nr_fold);
2515 // variables for warm start
2517 double **prev_w = Malloc(double*, nr_fold);
2518 for(i = 0; i < nr_fold; i++)
2520 int num_unchanged_w = 0;
2521 struct parameter param1 = *param;
2522 void (*default_print_string) (const char *) = liblinear_print_string;
2527 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2529 fold_start = Malloc(int,nr_fold+1);
2530 for(i=0;i<l;i++) perm[i]=i;
2533 int j = i+rand()%(l-i);
2534 swap(perm[i],perm[j]);
2536 for(i=0;i<=nr_fold;i++)
2537 fold_start[i]=i*l/nr_fold;
2539 for(i=0;i<nr_fold;i++)
2541 int begin = fold_start[i];
2542 int end = fold_start[i+1];
2545 subprob[i].bias = prob->bias;
2546 subprob[i].n = prob->n;
2547 subprob[i].l = l-(end-begin);
2548 subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
2549 subprob[i].y = Malloc(double,subprob[i].l);
2552 for(j=0;j<begin;j++)
2554 subprob[i].x[k] = prob->x[perm[j]];
2555 subprob[i].y[k] = prob->y[perm[j]];
2560 subprob[i].x[k] = prob->x[perm[j]];
2561 subprob[i].y[k] = prob->y[perm[j]];
2569 start_C = calc_start_C(prob,param);
2572 while(param1.C <= max_C)
2574 //Output disabled for running CV at a particular C
2575 set_print_string_function(&print_null);
2577 for(i=0; i<nr_fold; i++)
2580 int begin = fold_start[i];
2581 int end = fold_start[i+1];
2583 param1.init_sol = prev_w[i];
2584 struct model *submodel = train(&subprob[i],¶m1);
2587 if(submodel->nr_class == 2)
2588 total_w_size = subprob[i].n;
2590 total_w_size = subprob[i].n * submodel->nr_class;
2592 if(prev_w[i] == NULL)
2594 prev_w[i] = Malloc(double, total_w_size);
2595 for(j=0; j<total_w_size; j++)
2596 prev_w[i][j] = submodel->w[j];
2598 else if(num_unchanged_w >= 0)
2600 double norm_w_diff = 0;
2601 for(j=0; j<total_w_size; j++)
2603 norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2604 prev_w[i][j] = submodel->w[j];
2606 norm_w_diff = sqrt(norm_w_diff);
2608 if(norm_w_diff > 1e-15)
2609 num_unchanged_w = -1;
2613 for(j=0; j<total_w_size; j++)
2614 prev_w[i][j] = submodel->w[j];
2617 for(j=begin; j<end; j++)
2618 target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2620 free_and_destroy_model(&submodel);
2622 set_print_string_function(default_print_string);
2624 int total_correct = 0;
2625 for(i=0; i<prob->l; i++)
2626 if(target[i] == prob->y[i])
2628 double current_rate = (double)total_correct/prob->l;
2629 if(current_rate > *best_rate)
2632 *best_rate = current_rate;
2635 info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
2637 if(num_unchanged_w == 3)
2639 param1.C = param1.C*ratio;
2642 if(param1.C > max_C && max_C > start_C)
2643 info("warning: maximum C reached.\n");
2647 for(i=0; i<nr_fold; i++)
2657 double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
2662 n=model_->nr_feature+1;
2664 n=model_->nr_feature;
2665 double *w=model_->w;
2666 int nr_class=model_->nr_class;
2669 if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
2674 const feature_node *lx=x;
2677 for(; (idx=lx->index)!=-1; lx++)
2679 // the dimension of testing data may exceed that of training
2682 dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2687 if(check_regression_model(model_))
2688 return dec_values[0];
2690 return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2694 int dec_max_idx = 0;
2695 for(i=1;i<nr_class;i++)
2697 if(dec_values[i] > dec_values[dec_max_idx])
2700 return model_->label[dec_max_idx];
2704 double predict(const model *model_, const feature_node *x)
2706 double *dec_values = Malloc(double, model_->nr_class);
2707 double label=predict_values(model_, x, dec_values);
2712 double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
2714 if(check_probability_model(model_))
2717 int nr_class=model_->nr_class;
2724 double label=predict_values(model_, x, prob_estimates);
2726 prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
2728 if(nr_class==2) // for binary classification
2729 prob_estimates[1]=1.-prob_estimates[0];
2733 for(i=0; i<nr_class; i++)
2734 sum+=prob_estimates[i];
2736 for(i=0; i<nr_class; i++)
2737 prob_estimates[i]=prob_estimates[i]/sum;
2746 static const char *solver_type_table[]=
2748 "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2749 "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
2751 "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL
2754 int save_model(const char *model_file_name, const struct model *model_)
2757 int nr_feature=model_->nr_feature;
2759 const parameter& param = model_->param;
2766 FILE *fp = fopen(model_file_name,"w");
2767 if(fp==NULL) return -1;
2769 char *old_locale = setlocale(LC_ALL, NULL);
2772 old_locale = strdup(old_locale);
2774 setlocale(LC_ALL, "C");
2777 if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
2780 nr_w=model_->nr_class;
2782 fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
2783 fprintf(fp, "nr_class %d\n", model_->nr_class);
2787 fprintf(fp, "label");
2788 for(i=0; i<model_->nr_class; i++)
2789 fprintf(fp, " %d", model_->label[i]);
2793 fprintf(fp, "nr_feature %d\n", nr_feature);
2795 fprintf(fp, "bias %.17g\n", model_->bias);
2798 for(i=0; i<w_size; i++)
2801 for(j=0; j<nr_w; j++)
2802 fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
2806 setlocale(LC_ALL, old_locale);
2809 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2814 // FSCANF helps to handle fscanf failures.
2815 // Its do-while block avoids the ambiguity when
2820 #define FSCANF(_stream, _format, _var)do\
2822 if (fscanf(_stream, _format, _var) != 1)\
2824 fprintf(stderr, "ERROR: fscanf failed to read the model\n");\
2828 // EXIT_LOAD_MODEL should NOT end with a semicolon.
2829 #define EXIT_LOAD_MODEL()\
2831 setlocale(LC_ALL, old_locale);\
2832 free(model_->label);\
2837 struct model *load_model(const char *model_file_name)
2839 FILE *fp = fopen(model_file_name,"r");
2840 if(fp==NULL) return NULL;
2847 model *model_ = Malloc(model,1);
2848 parameter& param = model_->param;
2849 // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
2850 param.nr_weight = 0;
2851 param.weight_label = NULL;
2852 param.weight = NULL;
2853 param.init_sol = NULL;
2855 model_->label = NULL;
2857 char *old_locale = setlocale(LC_ALL, NULL);
2860 old_locale = strdup(old_locale);
2862 setlocale(LC_ALL, "C");
2867 FSCANF(fp,"%80s",cmd);
2868 if(strcmp(cmd,"solver_type")==0)
2870 FSCANF(fp,"%80s",cmd);
2872 for(i=0;solver_type_table[i];i++)
2874 if(strcmp(solver_type_table[i],cmd)==0)
2876 param.solver_type=i;
2880 if(solver_type_table[i] == NULL)
2882 fprintf(stderr,"unknown solver type.\n");
2886 else if(strcmp(cmd,"nr_class")==0)
2888 FSCANF(fp,"%d",&nr_class);
2889 model_->nr_class=nr_class;
2891 else if(strcmp(cmd,"nr_feature")==0)
2893 FSCANF(fp,"%d",&nr_feature);
2894 model_->nr_feature=nr_feature;
2896 else if(strcmp(cmd,"bias")==0)
2898 FSCANF(fp,"%lf",&bias);
2901 else if(strcmp(cmd,"w")==0)
2905 else if(strcmp(cmd,"label")==0)
2907 int nr_class = model_->nr_class;
2908 model_->label = Malloc(int,nr_class);
2909 for(int i=0;i<nr_class;i++)
2910 FSCANF(fp,"%d",&model_->label[i]);
2914 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2919 nr_feature=model_->nr_feature;
2926 if(nr_class==2 && param.solver_type != MCSVM_CS)
2931 model_->w=Malloc(double, w_size*nr_w);
2932 for(i=0; i<w_size; i++)
2935 for(j=0; j<nr_w; j++)
2936 FSCANF(fp, "%lf ", &model_->w[i*nr_w+j]);
2939 setlocale(LC_ALL, old_locale);
2942 if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2947 int get_nr_feature(const model *model_)
2949 return model_->nr_feature;
2952 int get_nr_class(const model *model_)
2954 return model_->nr_class;
2957 void get_labels(const model *model_, int* label)
2959 if (model_->label != NULL)
2960 for(int i=0;i<model_->nr_class;i++)
2961 label[i] = model_->label[i];
2964 // use inline here for better performance (around 20% faster than the non-inline one)
2965 static inline double get_w_value(const struct model *model_, int idx, int label_idx)
2967 int nr_class = model_->nr_class;
2968 int solver_type = model_->param.solver_type;
2969 const double *w = model_->w;
2971 if(idx < 0 || idx > model_->nr_feature)
2973 if(check_regression_model(model_))
2977 if(label_idx < 0 || label_idx >= nr_class)
2979 if(nr_class == 2 && solver_type != MCSVM_CS)
2987 return w[idx*nr_class+label_idx];
2991 // feat_idx: starting from 1 to nr_feature
2992 // label_idx: starting from 0 to nr_class-1 for classification models;
2993 // for regression models, label_idx is ignored.
2994 double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
2996 if(feat_idx > model_->nr_feature)
2998 return get_w_value(model_, feat_idx-1, label_idx);
3001 double get_decfun_bias(const struct model *model_, int label_idx)
3003 int bias_idx = model_->nr_feature;
3004 double bias = model_->bias;
3008 return bias*get_w_value(model_, bias_idx, label_idx);
3011 void free_model_content(struct model *model_ptr)
3013 if(model_ptr->w != NULL)
3015 if(model_ptr->label != NULL)
3016 free(model_ptr->label);
3019 void free_and_destroy_model(struct model **model_ptr_ptr)
3021 struct model *model_ptr = *model_ptr_ptr;
3022 if(model_ptr != NULL)
3024 free_model_content(model_ptr);
3029 void destroy_param(parameter* param)
3031 if(param->weight_label != NULL)
3032 free(param->weight_label);
3033 if(param->weight != NULL)
3034 free(param->weight);
3035 if(param->init_sol != NULL)
3036 free(param->init_sol);
3039 const char *check_parameter(const problem *prob, const parameter *param)
3050 if(param->solver_type != L2R_LR
3051 && param->solver_type != L2R_L2LOSS_SVC_DUAL
3052 && param->solver_type != L2R_L2LOSS_SVC
3053 && param->solver_type != L2R_L1LOSS_SVC_DUAL
3054 && param->solver_type != MCSVM_CS
3055 && param->solver_type != L1R_L2LOSS_SVC
3056 && param->solver_type != L1R_LR
3057 && param->solver_type != L2R_LR_DUAL
3058 && param->solver_type != L2R_L2LOSS_SVR
3059 && param->solver_type != L2R_L2LOSS_SVR_DUAL
3060 && param->solver_type != L2R_L1LOSS_SVR_DUAL)
3061 return "unknown solver type";
3063 if(param->init_sol != NULL
3064 && param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC)
3065 return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC";
3070 int check_probability_model(const struct model *model_)
3072 return (model_->param.solver_type==L2R_LR ||
3073 model_->param.solver_type==L2R_LR_DUAL ||
3074 model_->param.solver_type==L1R_LR);
3077 int check_regression_model(const struct model *model_)
3079 return (model_->param.solver_type==L2R_L2LOSS_SVR ||
3080 model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
3081 model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
3084 void set_print_string_function(void (*print_func)(const char*))
3086 if (print_func == NULL)
3087 liblinear_print_string = &print_string_stdout;
3089 liblinear_print_string = print_func;