9 int liblinear_version = LIBLINEAR_VERSION;
10 typedef signed char schar;
11 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
13 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
16 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
18 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
21 memcpy((void *)dst,(void *)src,sizeof(T)*n);
24 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
26 static void print_string_stdout(const char *s)
31 static void print_null(const char *s) {}
33 static void (*liblinear_print_string) (const char *) = &print_string_stdout;
36 static void info(const char *fmt,...)
43 (*liblinear_print_string)(buf);
46 static void info(const char *fmt,...) {}
51 static double nrm2_sq(const feature_node *x)
56 ret += x->value*x->value;
62 static double dot(const double *s, const feature_node *x)
67 ret += s[x->index-1]*x->value;
73 static double sparse_dot(const feature_node *x1, const feature_node *x2)
76 while(x1->index != -1 && x2->index != -1)
78 if(x1->index == x2->index)
80 ret += x1->value * x2->value;
86 if(x1->index > x2->index)
95 static void axpy(const double a, const feature_node *x, double *y)
99 y[x->index-1] += a*x->value;
105 // L2-regularized empirical risk minimization
106 // min_w w^Tw/2 + \sum C_i \xi(w^Tx_i), where \xi() is the loss
108 class l2r_erm_fun: public function
111 l2r_erm_fun(const problem *prob, const parameter *param, double *C);
114 double fun(double *w);
115 double linesearch_and_update(double *w, double *d, double *f, double *g, double alpha);
116 int get_nr_variable(void);
119 virtual double C_times_loss(int i, double wx_i) = 0;
120 void Xv(double *v, double *Xv);
121 void XTv(double *v, double *XTv);
126 double *tmp; // a working array
131 l2r_erm_fun::l2r_erm_fun(const problem *prob, const parameter *param, double *C)
140 this->regularize_bias = param->regularize_bias;
143 l2r_erm_fun::~l2r_erm_fun()
149 double l2r_erm_fun::fun(double *w)
154 int w_size=get_nr_variable();
159 for(i=0;i<w_size;i++)
161 if(regularize_bias == 0)
162 wTw -= w[w_size-1]*w[w_size-1];
164 f += C_times_loss(i, wx[i]);
170 int l2r_erm_fun::get_nr_variable(void)
175 // On entry *f must be the function value of w
176 // On exit w is updated and *f is the new function value
177 double l2r_erm_fun::linesearch_and_update(double *w, double *s, double *f, double *g, double alpha)
185 int w_size = get_nr_variable();
186 int max_num_linesearch = 20;
190 for (i=0;i<w_size;i++)
196 if(regularize_bias == 0)
198 // bias not used in calculating (w + \alpha s)^T (w + \alpha s)
199 sTs -= s[w_size-1] * s[w_size-1];
200 wTs -= s[w_size-1] * w[w_size-1];
203 int num_linesearch = 0;
204 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
209 double inner_product = tmp[i] * alpha + wx[i];
210 loss += C_times_loss(i, inner_product);
212 *f = loss + (alpha * alpha * sTs + wTw) / 2.0 + alpha * wTs;
213 if (*f - fold <= eta * alpha * gTs)
216 wx[i] += alpha * tmp[i];
223 if (num_linesearch >= max_num_linesearch)
229 for (i=0;i<w_size;i++)
230 w[i] += alpha * s[i];
232 wTw += alpha * alpha * sTs + 2* alpha * wTs;
236 void l2r_erm_fun::Xv(double *v, double *Xv)
240 feature_node **x=prob->x;
243 Xv[i]=sparse_operator::dot(v, x[i]);
246 void l2r_erm_fun::XTv(double *v, double *XTv)
250 int w_size=get_nr_variable();
251 feature_node **x=prob->x;
253 for(i=0;i<w_size;i++)
256 sparse_operator::axpy(v[i], x[i], XTv);
259 class l2r_lr_fun: public l2r_erm_fun
262 l2r_lr_fun(const problem *prob, const parameter *param, double *C);
265 void grad(double *w, double *g);
266 void Hv(double *s, double *Hs);
268 void get_diag_preconditioner(double *M);
272 double C_times_loss(int i, double wx_i);
275 l2r_lr_fun::l2r_lr_fun(const problem *prob, const parameter *param, double *C):
276 l2r_erm_fun(prob, param, C)
282 l2r_lr_fun::~l2r_lr_fun()
287 double l2r_lr_fun::C_times_loss(int i, double wx_i)
289 double ywx_i = wx_i * prob->y[i];
291 return C[i]*log(1 + exp(-ywx_i));
293 return C[i]*(-ywx_i + log(1 + exp(ywx_i)));
296 void l2r_lr_fun::grad(double *w, double *g)
301 int w_size=get_nr_variable();
305 tmp[i] = 1/(1 + exp(-y[i]*wx[i]));
306 D[i] = tmp[i]*(1-tmp[i]);
307 tmp[i] = C[i]*(tmp[i]-1)*y[i];
311 for(i=0;i<w_size;i++)
313 if(regularize_bias == 0)
314 g[w_size-1] -= w[w_size-1];
317 void l2r_lr_fun::get_diag_preconditioner(double *M)
321 int w_size=get_nr_variable();
322 feature_node **x = prob->x;
324 for (i=0; i<w_size; i++)
326 if(regularize_bias == 0)
331 feature_node *xi = x[i];
332 while (xi->index!=-1)
334 M[xi->index-1] += xi->value*xi->value*C[i]*D[i];
340 void l2r_lr_fun::Hv(double *s, double *Hs)
344 int w_size=get_nr_variable();
345 feature_node **x=prob->x;
347 for(i=0;i<w_size;i++)
351 feature_node * const xi=x[i];
352 double xTs = sparse_operator::dot(s, xi);
356 sparse_operator::axpy(xTs, xi, Hs);
358 for(i=0;i<w_size;i++)
359 Hs[i] = s[i] + Hs[i];
360 if(regularize_bias == 0)
361 Hs[w_size-1] -= s[w_size-1];
364 class l2r_l2_svc_fun: public l2r_erm_fun
367 l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C);
370 void grad(double *w, double *g);
371 void Hv(double *s, double *Hs);
373 void get_diag_preconditioner(double *M);
376 void subXTv(double *v, double *XTv);
382 double C_times_loss(int i, double wx_i);
385 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C):
386 l2r_erm_fun(prob, param, C)
388 I = new int[prob->l];
391 l2r_l2_svc_fun::~l2r_l2_svc_fun()
396 double l2r_l2_svc_fun::C_times_loss(int i, double wx_i)
398 double d = 1 - prob->y[i] * wx_i;
405 void l2r_l2_svc_fun::grad(double *w, double *g)
410 int w_size=get_nr_variable();
415 tmp[i] = wx[i] * y[i];
418 tmp[sizeI] = C[i]*y[i]*(tmp[i]-1);
425 for(i=0;i<w_size;i++)
426 g[i] = w[i] + 2*g[i];
427 if(regularize_bias == 0)
428 g[w_size-1] -= w[w_size-1];
431 void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
434 int w_size=get_nr_variable();
435 feature_node **x = prob->x;
437 for (i=0; i<w_size; i++)
439 if(regularize_bias == 0)
442 for (i=0; i<sizeI; i++)
445 feature_node *xi = x[idx];
446 while (xi->index!=-1)
448 M[xi->index-1] += xi->value*xi->value*C[idx]*2;
454 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
457 int w_size=get_nr_variable();
458 feature_node **x=prob->x;
460 for(i=0;i<w_size;i++)
464 feature_node * const xi=x[I[i]];
465 double xTs = sparse_operator::dot(s, xi);
469 sparse_operator::axpy(xTs, xi, Hs);
471 for(i=0;i<w_size;i++)
472 Hs[i] = s[i] + 2*Hs[i];
473 if(regularize_bias == 0)
474 Hs[w_size-1] -= s[w_size-1];
477 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
480 int w_size=get_nr_variable();
481 feature_node **x=prob->x;
483 for(i=0;i<w_size;i++)
486 sparse_operator::axpy(v[i], x[I[i]], XTv);
489 class l2r_l2_svr_fun: public l2r_l2_svc_fun
492 l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C);
494 void grad(double *w, double *g);
497 double C_times_loss(int i, double wx_i);
501 l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C):
502 l2r_l2_svc_fun(prob, param, C)
505 this->regularize_bias = param->regularize_bias;
508 double l2r_l2_svr_fun::C_times_loss(int i, double wx_i)
510 double d = wx_i - prob->y[i];
512 return C[i]*(d+p)*(d+p);
514 return C[i]*(d-p)*(d-p);
518 void l2r_l2_svr_fun::grad(double *w, double *g)
523 int w_size=get_nr_variable();
531 // generate index set I
534 tmp[sizeI] = C[i]*(d+p);
540 tmp[sizeI] = C[i]*(d-p);
548 for(i=0;i<w_size;i++)
549 g[i] = w[i] + 2*g[i];
550 if(regularize_bias == 0)
551 g[w_size-1] -= w[w_size-1];
554 // A coordinate descent algorithm for
555 // multi-class support vector machines by Crammer and Singer
557 // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
558 // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
560 // where e^m_i = 0 if y_i = m,
561 // e^m_i = 1 if y_i != m,
562 // C^m_i = C if m = y_i,
563 // C^m_i = 0 if m != y_i,
564 // and w_m(\alpha) = \sum_i \alpha^m_i x_i
568 // eps is the stopping tolerance
570 // solution will be put in w
572 // See Appendix of LIBLINEAR paper, Fan et al. (2008)
574 #define GETI(i) ((int) prob->y[i])
575 // To support weights for instances, use GETI(i) (i)
577 class Solver_MCSVM_CS
580 Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
582 void Solve(double *w);
584 void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
585 bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
594 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
596 this->w_size = prob->n;
598 this->nr_class = nr_class;
600 this->max_iter = max_iter;
602 this->B = new double[nr_class];
603 this->G = new double[nr_class];
604 this->C = weighted_C;
607 Solver_MCSVM_CS::~Solver_MCSVM_CS()
613 int compare_double(const void *a, const void *b)
615 if(*(double *)a > *(double *)b)
617 if(*(double *)a < *(double *)b)
622 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
627 clone(D, B, active_i);
630 qsort(D, active_i, sizeof(double), compare_double);
632 double beta = D[0] - A_i*C_yi;
633 for(r=1;r<active_i && beta<r*D[r];r++)
637 for(r=0;r<active_i;r++)
640 alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
642 alpha_new[r] = min((double)0, (beta - B[r])/A_i);
647 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
652 if(alpha_i == bound && G[m] < minG)
657 void Solver_MCSVM_CS::Solve(double *w)
661 double *alpha = new double[l*nr_class];
662 double *alpha_new = new double[nr_class];
663 int *index = new int[l];
664 double *QD = new double[l];
665 int *d_ind = new int[nr_class];
666 double *d_val = new double[nr_class];
667 int *alpha_index = new int[nr_class*l];
668 int *y_index = new int[l];
670 int *active_size_i = new int[l];
671 double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
672 bool start_from_all = true;
674 // Initial alpha can be set here. Note that
675 // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
676 // alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
677 // alpha[i*nr_class+m] <= 0 if prob->y[i] != m
678 // If initial alpha isn't zero, uncomment the for loop below to initialize w
679 for(i=0;i<l*nr_class;i++)
682 for(i=0;i<w_size*nr_class;i++)
686 for(m=0;m<nr_class;m++)
687 alpha_index[i*nr_class+m] = m;
688 feature_node *xi = prob->x[i];
690 while(xi->index != -1)
692 double val = xi->value;
695 // Uncomment the for loop if initial alpha isn't zero
696 // for(m=0; m<nr_class; m++)
697 // w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
700 active_size_i[i] = nr_class;
701 y_index[i] = (int)prob->y[i];
705 while(iter < max_iter)
707 double stopping = -INF;
708 for(i=0;i<active_size;i++)
710 int j = i+rand()%(active_size-i);
711 swap(index[i], index[j]);
713 for(s=0;s<active_size;s++)
717 double *alpha_i = &alpha[i*nr_class];
718 int *alpha_index_i = &alpha_index[i*nr_class];
722 for(m=0;m<active_size_i[i];m++)
724 if(y_index[i] < active_size_i[i])
727 feature_node *xi = prob->x[i];
728 while(xi->index!= -1)
730 double *w_i = &w[(xi->index-1)*nr_class];
731 for(m=0;m<active_size_i[i];m++)
732 G[m] += w_i[alpha_index_i[m]]*(xi->value);
738 for(m=0;m<active_size_i[i];m++)
740 if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
745 if(y_index[i] < active_size_i[i])
746 if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
747 minG = G[y_index[i]];
749 for(m=0;m<active_size_i[i];m++)
751 if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
754 while(active_size_i[i]>m)
756 if(!be_shrunk(i, active_size_i[i], y_index[i],
757 alpha_i[alpha_index_i[active_size_i[i]]], minG))
759 swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
760 swap(G[m], G[active_size_i[i]]);
761 if(y_index[i] == active_size_i[i])
763 else if(y_index[i] == m)
764 y_index[i] = active_size_i[i];
772 if(active_size_i[i] <= 1)
775 swap(index[s], index[active_size]);
780 if(maxG-minG <= 1e-12)
783 stopping = max(maxG - minG, stopping);
785 for(m=0;m<active_size_i[i];m++)
786 B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
788 solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
790 for(m=0;m<active_size_i[i];m++)
792 double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
793 alpha_i[alpha_index_i[m]] = alpha_new[m];
796 d_ind[nz_d] = alpha_index_i[m];
803 while(xi->index != -1)
805 double *w_i = &w[(xi->index-1)*nr_class];
807 w_i[d_ind[m]] += d_val[m]*xi->value;
819 if(stopping < eps_shrink)
821 if(stopping < eps && start_from_all == true)
827 active_size_i[i] = nr_class;
829 eps_shrink = max(eps_shrink/2, eps);
830 start_from_all = true;
834 start_from_all = false;
837 info("\noptimization finished, #iter = %d\n",iter);
838 if (iter >= max_iter)
839 info("\nWARNING: reaching max number of iterations\n");
841 // calculate objective value
844 for(i=0;i<w_size*nr_class;i++)
847 for(i=0;i<l*nr_class;i++)
850 if(fabs(alpha[i]) > 0)
854 v -= alpha[i*nr_class+(int)prob->y[i]];
855 info("Objective value = %lf\n",v);
856 info("nSV = %d\n",nSV);
864 delete [] alpha_index;
866 delete [] active_size_i;
869 // A coordinate descent algorithm for
870 // L1-loss and L2-loss SVM dual problems
872 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
873 // s.t. 0 <= \alpha_i <= upper_bound_i,
875 // where Qij = yi yj xi^T xj and
876 // D is a diagonal matrix
879 // upper_bound_i = Cp if y_i = 1
880 // upper_bound_i = Cn if y_i = -1
883 // upper_bound_i = INF
884 // D_ii = 1/(2*Cp) if y_i = 1
885 // D_ii = 1/(2*Cn) if y_i = -1
889 // eps is the stopping tolerance
891 // solution will be put in w
893 // this function returns the number of iterations
895 // See Algorithm 3 of Hsieh et al., ICML 2008
898 #define GETI(i) (y[i]+1)
899 // To support weights for instances, use GETI(i) (i)
901 static int solve_l2r_l1l2_svc(const problem *prob, const parameter *param, double *w, double Cp, double Cn, int max_iter=500)
904 int w_size = prob->n;
905 double eps = param->eps;
906 int solver_type = param->solver_type;
909 double *QD = new double[l];
910 int *index = new int[l];
911 double *alpha = new double[l];
912 schar *y = new schar[l];
915 // PG: projected gradient, for shrinking and stopping
917 double PGmax_old = INF;
918 double PGmin_old = -INF;
919 double PGmax_new, PGmin_new;
921 // default solver_type: L2R_L2LOSS_SVC_DUAL
922 double diag[3] = {0.5/Cn, 0, 0.5/Cp};
923 double upper_bound[3] = {INF, 0, INF};
924 if(solver_type == L2R_L1LOSS_SVC_DUAL)
944 // Initial alpha can be set here. Note that
945 // 0 <= alpha[i] <= upper_bound[GETI(i)]
949 for(i=0; i<w_size; i++)
953 QD[i] = diag[GETI(i)];
955 feature_node * const xi = prob->x[i];
956 QD[i] += sparse_operator::nrm2_sq(xi);
957 sparse_operator::axpy(y[i]*alpha[i], xi, w);
962 while (iter < max_iter)
967 for (i=0; i<active_size; i++)
969 int j = i+rand()%(active_size-i);
970 swap(index[i], index[j]);
973 for (s=0; s<active_size; s++)
976 const schar yi = y[i];
977 feature_node * const xi = prob->x[i];
979 G = yi*sparse_operator::dot(w, xi)-1;
981 C = upper_bound[GETI(i)];
982 G += alpha[i]*diag[GETI(i)];
990 swap(index[s], index[active_size]);
997 else if (alpha[i] == C)
1002 swap(index[s], index[active_size]);
1012 PGmax_new = max(PGmax_new, PG);
1013 PGmin_new = min(PGmin_new, PG);
1015 if(fabs(PG) > 1.0e-12)
1017 double alpha_old = alpha[i];
1018 alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
1019 d = (alpha[i] - alpha_old)*yi;
1020 sparse_operator::axpy(d, xi, w);
1028 if(PGmax_new - PGmin_new <= eps)
1030 if(active_size == l)
1041 PGmax_old = PGmax_new;
1042 PGmin_old = PGmin_new;
1049 info("\noptimization finished, #iter = %d\n",iter);
1051 // calculate objective value
1055 for(i=0; i<w_size; i++)
1059 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
1063 info("Objective value = %lf\n",v/2);
1064 info("nSV = %d\n",nSV);
1075 // A coordinate descent algorithm for
1076 // L1-loss and L2-loss epsilon-SVR dual problem
1078 // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
1079 // s.t. -upper_bound_i <= \beta_i <= upper_bound_i,
1081 // where Qij = xi^T xj and
1082 // D is a diagonal matrix
1085 // upper_bound_i = C
1088 // upper_bound_i = INF
1089 // lambda_i = 1/(2*C)
1093 // eps is the stopping tolerance
1095 // solution will be put in w
1097 // this function returns the number of iterations
1099 // See Algorithm 4 of Ho and Lin, 2012
1103 // To support weights for instances, use GETI(i) (i)
1105 static int solve_l2r_l1l2_svr(const problem *prob, const parameter *param, double *w, int max_iter=500)
1107 const int solver_type = param->solver_type;
1109 double C = param->C;
1110 double p = param->p;
1111 int w_size = prob->n;
1112 double eps = param->eps;
1114 int active_size = l;
1115 int *index = new int[l];
1118 double Gmax_old = INF;
1119 double Gmax_new, Gnorm1_new;
1120 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1121 double *beta = new double[l];
1122 double *QD = new double[l];
1123 double *y = prob->y;
1125 // L2R_L2LOSS_SVR_DUAL
1126 double lambda[1], upper_bound[1];
1128 upper_bound[0] = INF;
1130 if(solver_type == L2R_L1LOSS_SVR_DUAL)
1136 // Initial beta can be set here. Note that
1137 // -upper_bound <= beta[i] <= upper_bound
1141 for(i=0; i<w_size; i++)
1145 feature_node * const xi = prob->x[i];
1146 QD[i] = sparse_operator::nrm2_sq(xi);
1147 sparse_operator::axpy(beta[i], xi, w);
1153 while(iter < max_iter)
1158 for(i=0; i<active_size; i++)
1160 int j = i+rand()%(active_size-i);
1161 swap(index[i], index[j]);
1164 for(s=0; s<active_size; s++)
1167 G = -y[i] + lambda[GETI(i)]*beta[i];
1168 H = QD[i] + lambda[GETI(i)];
1170 feature_node * const xi = prob->x[i];
1171 G += sparse_operator::dot(w, xi);
1175 double violation = 0;
1182 else if(Gp>Gmax_old && Gn<-Gmax_old)
1185 swap(index[s], index[active_size]);
1190 else if(beta[i] >= upper_bound[GETI(i)])
1194 else if(Gp < -Gmax_old)
1197 swap(index[s], index[active_size]);
1202 else if(beta[i] <= -upper_bound[GETI(i)])
1206 else if(Gn > Gmax_old)
1209 swap(index[s], index[active_size]);
1214 else if(beta[i] > 0)
1215 violation = fabs(Gp);
1217 violation = fabs(Gn);
1219 Gmax_new = max(Gmax_new, violation);
1220 Gnorm1_new += violation;
1222 // obtain Newton direction d
1225 else if(Gn > H*beta[i])
1230 if(fabs(d) < 1.0e-12)
1233 double beta_old = beta[i];
1234 beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
1235 d = beta[i]-beta_old;
1238 sparse_operator::axpy(d, xi, w);
1242 Gnorm1_init = Gnorm1_new;
1247 if(Gnorm1_new <= eps*Gnorm1_init)
1249 if(active_size == l)
1260 Gmax_old = Gmax_new;
1263 info("\noptimization finished, #iter = %d\n", iter);
1265 // calculate objective value
1268 for(i=0; i<w_size; i++)
1273 v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
1278 info("Objective value = %lf\n", v);
1279 info("nSV = %d\n",nSV);
1289 // A coordinate descent algorithm for
1290 // the dual of L2-regularized logistic regression problems
1292 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1293 // s.t. 0 <= \alpha_i <= upper_bound_i,
1295 // where Qij = yi yj xi^T xj and
1296 // upper_bound_i = Cp if y_i = 1
1297 // upper_bound_i = Cn if y_i = -1
1301 // eps is the stopping tolerance
1303 // solution will be put in w
1305 // this function returns the number of iterations
1307 // See Algorithm 5 of Yu et al., MLJ 2010
1310 #define GETI(i) (y[i]+1)
1311 // To support weights for instances, use GETI(i) (i)
1313 static int solve_l2r_lr_dual(const problem *prob, const parameter *param, double *w, double Cp, double Cn, int max_iter=500)
1316 int w_size = prob->n;
1317 double eps = param->eps;
1319 double *xTx = new double[l];
1320 int *index = new int[l];
1321 double *alpha = new double[2*l]; // store alpha and C - alpha
1322 schar *y = new schar[l];
1323 int max_inner_iter = 100; // for inner Newton
1324 double innereps = 1e-2;
1325 double innereps_min = min(1e-8, eps);
1326 double upper_bound[3] = {Cn, 0, Cp};
1340 // Initial alpha can be set here. Note that
1341 // 0 < alpha[i] < upper_bound[GETI(i)]
1342 // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1345 alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
1346 alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1349 for(i=0; i<w_size; i++)
1353 feature_node * const xi = prob->x[i];
1354 xTx[i] = sparse_operator::nrm2_sq(xi);
1355 sparse_operator::axpy(y[i]*alpha[2*i], xi, w);
1359 while (iter < max_iter)
1363 int j = i+rand()%(l-i);
1364 swap(index[i], index[j]);
1366 int newton_iter = 0;
1371 const schar yi = y[i];
1372 double C = upper_bound[GETI(i)];
1373 double ywTx = 0, xisq = xTx[i];
1374 feature_node * const xi = prob->x[i];
1375 ywTx = yi*sparse_operator::dot(w, xi);
1376 double a = xisq, b = ywTx;
1378 // Decide to minimize g_1(z) or g_2(z)
1379 int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1380 if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1387 // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1388 double alpha_old = alpha[ind1];
1389 double z = alpha_old;
1392 double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1393 Gmax = max(Gmax, fabs(gp));
1395 // Newton method on the sub-problem
1396 const double eta = 0.1; // xi in the paper
1398 while (inner_iter <= max_inner_iter)
1400 if(fabs(gp) < innereps)
1402 double gpp = a + C/(C-z)/z;
1403 double tmpz = z - gp/gpp;
1406 else // tmpz in (0, C)
1408 gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1413 if(inner_iter > 0) // update w
1417 sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w);
1428 if(newton_iter <= l/10)
1429 innereps = max(innereps_min, 0.1*innereps);
1433 info("\noptimization finished, #iter = %d\n",iter);
1435 // calculate objective value
1438 for(i=0; i<w_size; i++)
1442 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1443 - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1444 info("Objective value = %lf\n", v);
1454 // A coordinate descent algorithm for
1455 // L1-regularized L2-loss support vector classification
1457 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1461 // eps is the stopping tolerance
1463 // solution will be put in w
1465 // this function returns the number of iterations
1467 // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1469 // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
1470 // must have been added to the original data. (see -B and -R option)
1473 #define GETI(i) (y[i]+1)
1474 // To support weights for instances, use GETI(i) (i)
1476 static int solve_l1r_l2_svc(const problem *prob_col, const parameter* param, double *w, double Cp, double Cn, double eps)
1478 int l = prob_col->l;
1479 int w_size = prob_col->n;
1480 int regularize_bias = param->regularize_bias;
1482 int max_iter = 1000;
1483 int active_size = w_size;
1484 int max_num_linesearch = 20;
1486 double sigma = 0.01;
1487 double d, G_loss, G, H;
1488 double Gmax_old = INF;
1489 double Gmax_new, Gnorm1_new;
1490 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1491 double d_old, d_diff;
1492 double loss_old = 0, loss_new;
1493 double appxcond, cond;
1495 int *index = new int[w_size];
1496 schar *y = new schar[l];
1497 double *b = new double[l]; // b = 1-ywTx
1498 double *xj_sq = new double[w_size];
1501 double C[3] = {Cn,0,Cp};
1503 // Initial w can be set here.
1504 for(j=0; j<w_size; j++)
1510 if(prob_col->y[j] > 0)
1515 for(j=0; j<w_size; j++)
1520 while(x->index != -1)
1522 int ind = x->index-1;
1523 x->value *= y[ind]; // x->value stores yi*xij
1524 double val = x->value;
1526 xj_sq[j] += C[GETI(ind)]*val*val;
1531 while(iter < max_iter)
1536 for(j=0; j<active_size; j++)
1538 int i = j+rand()%(active_size-j);
1539 swap(index[i], index[j]);
1542 for(s=0; s<active_size; s++)
1549 while(x->index != -1)
1551 int ind = x->index-1;
1554 double val = x->value;
1555 double tmp = C[GETI(ind)]*val;
1556 G_loss -= tmp*b[ind];
1567 double violation = 0;
1568 double Gp = 0, Gn = 0;
1569 if(j == w_size-1 && regularize_bias == 0)
1570 violation = fabs(G);
1581 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1584 swap(index[s], index[active_size]);
1590 violation = fabs(Gp);
1592 violation = fabs(Gn);
1594 Gmax_new = max(Gmax_new, violation);
1595 Gnorm1_new += violation;
1597 // obtain Newton direction d
1598 if(j == w_size-1 && regularize_bias == 0)
1604 else if(Gn > H*w[j])
1610 if(fabs(d) < 1.0e-12)
1614 if(j == w_size-1 && regularize_bias == 0)
1617 delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1620 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1623 if(j == w_size-1 && regularize_bias == 0)
1624 cond = -sigma*delta;
1626 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1628 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1632 sparse_operator::axpy(d_diff, x, b);
1636 if(num_linesearch == 0)
1641 while(x->index != -1)
1643 int ind = x->index-1;
1645 loss_old += C[GETI(ind)]*b[ind]*b[ind];
1646 double b_new = b[ind] + d_diff*x->value;
1649 loss_new += C[GETI(ind)]*b_new*b_new;
1657 while(x->index != -1)
1659 int ind = x->index-1;
1660 double b_new = b[ind] + d_diff*x->value;
1663 loss_new += C[GETI(ind)]*b_new*b_new;
1668 cond = cond + loss_new - loss_old;
1681 // recompute b[] if line search takes too many steps
1682 if(num_linesearch >= max_num_linesearch)
1685 for(int i=0; i<l; i++)
1688 for(int i=0; i<w_size; i++)
1690 if(w[i]==0) continue;
1692 sparse_operator::axpy(-w[i], x, b);
1698 Gnorm1_init = Gnorm1_new;
1703 if(Gnorm1_new <= eps*Gnorm1_init)
1705 if(active_size == w_size)
1709 active_size = w_size;
1716 Gmax_old = Gmax_new;
1719 info("\noptimization finished, #iter = %d\n", iter);
1720 if(iter >= max_iter)
1721 info("\nWARNING: reaching max number of iterations\n");
1723 // calculate objective value
1727 for(j=0; j<w_size; j++)
1730 while(x->index != -1)
1732 x->value *= prob_col->y[x->index-1]; // restore x->value
1741 if (regularize_bias == 0)
1742 v -= fabs(w[w_size-1]);
1745 v += C[GETI(j)]*b[j]*b[j];
1747 info("Objective value = %lf\n", v);
1748 info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1758 // A coordinate descent algorithm for
1759 // L1-regularized logistic regression problems
1761 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1765 // eps is the stopping tolerance
1767 // solution will be put in w
1769 // this function returns the number of iterations
1771 // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1773 // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
1774 // must have been added to the original data. (see -B and -R option)
1777 #define GETI(i) (y[i]+1)
1778 // To support weights for instances, use GETI(i) (i)
1780 static int solve_l1r_lr(const problem *prob_col, const parameter *param, double *w, double Cp, double Cn, double eps)
1782 int l = prob_col->l;
1783 int w_size = prob_col->n;
1784 int regularize_bias = param->regularize_bias;
1785 int j, s, newton_iter=0, iter=0;
1786 int max_newton_iter = 100;
1787 int max_iter = 1000;
1788 int max_num_linesearch = 20;
1793 double inner_eps = 1;
1794 double sigma = 0.01;
1795 double w_norm, w_norm_new;
1797 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1798 double Gmax_old = INF;
1799 double Gmax_new, Gnorm1_new;
1800 double QP_Gmax_old = INF;
1801 double QP_Gmax_new, QP_Gnorm1_new;
1802 double delta, negsum_xTd, cond;
1804 int *index = new int[w_size];
1805 schar *y = new schar[l];
1806 double *Hdiag = new double[w_size];
1807 double *Grad = new double[w_size];
1808 double *wpd = new double[w_size];
1809 double *xjneg_sum = new double[w_size];
1810 double *xTd = new double[l];
1811 double *exp_wTx = new double[l];
1812 double *exp_wTx_new = new double[l];
1813 double *tau = new double[l];
1814 double *D = new double[l];
1817 double C[3] = {Cn,0,Cp};
1819 // Initial w can be set here.
1820 for(j=0; j<w_size; j++)
1825 if(prob_col->y[j] > 0)
1834 for(j=0; j<w_size; j++)
1836 w_norm += fabs(w[j]);
1841 while(x->index != -1)
1843 int ind = x->index-1;
1844 double val = x->value;
1845 exp_wTx[ind] += w[j]*val;
1847 xjneg_sum[j] += C[GETI(ind)]*val;
1851 if (regularize_bias == 0)
1852 w_norm -= fabs(w[w_size-1]);
1856 exp_wTx[j] = exp(exp_wTx[j]);
1857 double tau_tmp = 1/(1+exp_wTx[j]);
1858 tau[j] = C[GETI(j)]*tau_tmp;
1859 D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
1862 while(newton_iter < max_newton_iter)
1866 active_size = w_size;
1868 for(s=0; s<active_size; s++)
1876 while(x->index != -1)
1878 int ind = x->index-1;
1879 Hdiag[j] += x->value*x->value*D[ind];
1880 tmp += x->value*tau[ind];
1883 Grad[j] = -tmp + xjneg_sum[j];
1885 double violation = 0;
1886 if (j == w_size-1 && regularize_bias == 0)
1887 violation = fabs(Grad[j]);
1890 double Gp = Grad[j]+1;
1891 double Gn = Grad[j]-1;
1898 //outer-level shrinking
1899 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1902 swap(index[s], index[active_size]);
1908 violation = fabs(Gp);
1910 violation = fabs(Gn);
1912 Gmax_new = max(Gmax_new, violation);
1913 Gnorm1_new += violation;
1916 if(newton_iter == 0)
1917 Gnorm1_init = Gnorm1_new;
1919 if(Gnorm1_new <= eps*Gnorm1_init)
1924 QP_active_size = active_size;
1926 for(int i=0; i<l; i++)
1929 // optimize QP over wpd
1930 while(iter < max_iter)
1935 for(j=0; j<QP_active_size; j++)
1937 int i = j+rand()%(QP_active_size-j);
1938 swap(index[i], index[j]);
1941 for(s=0; s<QP_active_size; s++)
1947 G = Grad[j] + (wpd[j]-w[j])*nu;
1948 while(x->index != -1)
1950 int ind = x->index-1;
1951 G += x->value*D[ind]*xTd[ind];
1955 double violation = 0;
1956 if (j == w_size-1 && regularize_bias == 0)
1958 // bias term not shrunken
1959 violation = fabs(G);
1972 //inner-level shrinking
1973 else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1976 swap(index[s], index[QP_active_size]);
1982 violation = fabs(Gp);
1984 violation = fabs(Gn);
1986 // obtain solution of one-variable problem
1989 else if(Gn > H*wpd[j])
1994 QP_Gmax_new = max(QP_Gmax_new, violation);
1995 QP_Gnorm1_new += violation;
1997 if(fabs(z) < 1.0e-12)
1999 z = min(max(z,-10.0),10.0);
2004 sparse_operator::axpy(z, x, xTd);
2009 if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
2012 if(QP_active_size == active_size)
2014 //active set reactivation
2017 QP_active_size = active_size;
2023 QP_Gmax_old = QP_Gmax_new;
2026 if(iter >= max_iter)
2027 info("WARNING: reaching max number of inner iterations\n");
2031 for(j=0; j<w_size; j++)
2033 delta += Grad[j]*(wpd[j]-w[j]);
2035 w_norm_new += fabs(wpd[j]);
2037 if (regularize_bias == 0)
2038 w_norm_new -= fabs(wpd[w_size-1]);
2039 delta += (w_norm_new-w_norm);
2042 for(int i=0; i<l; i++)
2044 negsum_xTd += C[GETI(i)]*xTd[i];
2047 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
2049 cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
2051 for(int i=0; i<l; i++)
2053 double exp_xTd = exp(xTd[i]);
2054 exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
2055 cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
2060 w_norm = w_norm_new;
2061 for(j=0; j<w_size; j++)
2063 for(int i=0; i<l; i++)
2065 exp_wTx[i] = exp_wTx_new[i];
2066 double tau_tmp = 1/(1+exp_wTx[i]);
2067 tau[i] = C[GETI(i)]*tau_tmp;
2068 D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
2075 for(j=0; j<w_size; j++)
2077 wpd[j] = (w[j]+wpd[j])*0.5;
2079 w_norm_new += fabs(wpd[j]);
2081 if (regularize_bias == 0)
2082 w_norm_new -= fabs(wpd[w_size-1]);
2085 for(int i=0; i<l; i++)
2090 // Recompute some info due to too many line search steps
2091 if(num_linesearch >= max_num_linesearch)
2093 for(int i=0; i<l; i++)
2096 for(int i=0; i<w_size; i++)
2098 if(w[i]==0) continue;
2100 sparse_operator::axpy(w[i], x, exp_wTx);
2103 for(int i=0; i<l; i++)
2104 exp_wTx[i] = exp(exp_wTx[i]);
2111 Gmax_old = Gmax_new;
2113 info("iter %3d #CD cycles %d\n", newton_iter, iter);
2116 info("=========================\n");
2117 info("optimization finished, #iter = %d\n", newton_iter);
2118 if(newton_iter >= max_newton_iter)
2119 info("WARNING: reaching max number of iterations\n");
2121 // calculate objective value
2125 for(j=0; j<w_size; j++)
2131 if (regularize_bias == 0)
2132 v -= fabs(w[w_size-1]);
2135 v += C[GETI(j)]*log(1+1/exp_wTx[j]);
2137 v += C[GETI(j)]*log(1+exp_wTx[j]);
2139 info("Objective value = %lf\n", v);
2140 info("#nonzeros/#features = %d/%d\n", nnz, w_size);
2147 delete [] xjneg_sum;
2150 delete [] exp_wTx_new;
2158 enum HEAP_TYPE { MIN, MAX };
2163 heap(int max_size, HEAP_TYPE type)
2166 a = new feature_node[max_size];
2173 bool cmp(const feature_node& left, const feature_node& right)
2176 return left.value > right.value;
2178 return left.value < right.value;
2184 void push(feature_node node)
2206 while(i*2+1 < _size)
2210 if(r < _size && cmp(a[l], a[r]))
2227 // A two-level coordinate descent algorithm for
2228 // a scaled one-class SVM dual problem
2230 // min_\alpha 0.5(\alpha^T Q \alpha),
2231 // s.t. 0 <= \alpha_i <= 1 and
2232 // e^T \alpha = \nu l
2234 // where Qij = xi^T xj
2238 // eps is the stopping tolerance
2240 // solution will be put in w and rho
2242 // this function returns the number of iterations
2244 // See Algorithm 7 in supplementary materials of Chou et al., SDM 2020.
2246 static int solve_oneclass_svm(const problem *prob, const parameter *param, double *w, double *rho)
2249 int w_size = prob->n;
2250 double eps = param->eps;
2251 double nu = param->nu;
2252 int i, j, s, iter = 0;
2254 double Qij, quad_coef, delta, sum;
2256 double *QD = new double[l];
2257 double *G = new double[l];
2258 int *index = new int[l];
2259 double *alpha = new double[l];
2261 int max_iter = 1000;
2262 int active_size = l;
2264 double negGmax; // max { -grad(f)_i | alpha_i < 1 }
2265 double negGmin; // min { -grad(f)_i | alpha_i > 0 }
2267 int *most_violating_i = new int[l];
2268 int *most_violating_j = new int[l];
2270 int n = (int)(nu*l); // # of alpha's at upper bound
2275 for(i=n+1; i<l; i++)
2278 for(i=0; i<w_size; i++)
2282 feature_node * const xi = prob->x[i];
2283 QD[i] = sparse_operator::nrm2_sq(xi);
2284 sparse_operator::axpy(alpha[i], xi, w);
2289 while (iter < max_iter)
2294 for (s=0; s<active_size; s++)
2297 feature_node * const xi = prob->x[i];
2298 G[i] = sparse_operator::dot(w, xi);
2300 negGmax = max(negGmax, -G[i]);
2302 negGmin = min(negGmin, -G[i]);
2305 if (negGmax - negGmin < eps)
2307 if (active_size == l)
2317 for(s=0; s<active_size; s++)
2320 if ((alpha[i] == 1 && -G[i] > negGmax) ||
2321 (alpha[i] == 0 && -G[i] < negGmin))
2324 swap(index[s], index[active_size]);
2329 max_inner_iter = max(active_size/10, 1);
2330 struct heap min_heap = heap(max_inner_iter, heap::MIN);
2331 struct heap max_heap = heap(max_inner_iter, heap::MAX);
2332 struct feature_node node;
2333 for(s=0; s<active_size; s++)
2341 if (min_heap.size() < max_inner_iter)
2342 min_heap.push(node);
2343 else if (min_heap.top().value < node.value)
2346 min_heap.push(node);
2352 if (max_heap.size() < max_inner_iter)
2353 max_heap.push(node);
2354 else if (max_heap.top().value > node.value)
2357 max_heap.push(node);
2361 max_inner_iter = min(min_heap.size(), max_heap.size());
2362 while (max_heap.size() > max_inner_iter)
2364 while (min_heap.size() > max_inner_iter)
2367 for (s=max_inner_iter-1; s>=0; s--)
2369 most_violating_i[s] = min_heap.top().index;
2370 most_violating_j[s] = max_heap.top().index;
2375 for (s=0; s<max_inner_iter; s++)
2377 i = most_violating_i[s];
2378 j = most_violating_j[s];
2380 if ((alpha[i] == 0 && alpha[j] == 0) ||
2381 (alpha[i] == 1 && alpha[j] == 1))
2384 feature_node const * xi = prob->x[i];
2385 feature_node const * xj = prob->x[j];
2387 Gi = sparse_operator::dot(w, xi);
2388 Gj = sparse_operator::dot(w, xj);
2390 int violating_pair = 0;
2391 if (alpha[i] < 1 && alpha[j] > 0 && -Gj + 1e-12 < -Gi)
2394 if (alpha[i] > 0 && alpha[j] < 1 && -Gi + 1e-12 < -Gj)
2396 if (violating_pair == 0)
2399 Qij = sparse_operator::sparse_dot(xi, xj);
2400 quad_coef = QD[i] + QD[j] - 2*Qij;
2403 delta = (Gi - Gj) / quad_coef;
2404 old_alpha_i = alpha[i];
2405 sum = alpha[i] + alpha[j];
2406 alpha[i] = alpha[i] - delta;
2407 alpha[j] = alpha[j] + delta;
2440 delta = alpha[i] - old_alpha_i;
2441 sparse_operator::axpy(delta, xi, w);
2442 sparse_operator::axpy(-delta, xj, w);
2448 info("\noptimization finished, #iter = %d\n",iter);
2449 if (iter >= max_iter)
2450 info("\nWARNING: reaching max number of iterations\n\n");
2452 // calculate object value
2454 for(i=0; i<w_size; i++)
2462 info("Objective value = %lf\n", v/2);
2463 info("nSV = %d\n", nSV);
2467 double ub = INF, lb = -INF, sum_free = 0;
2470 double G = sparse_operator::dot(w, prob->x[i]);
2473 else if (alpha[i] == 0)
2483 *rho = sum_free/nr_free;
2487 info("rho = %lf\n", *rho);
2493 delete [] most_violating_i;
2494 delete [] most_violating_j;
2499 // transpose matrix X from row format to column format
2500 static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
2506 size_t *col_ptr = new size_t [n+1];
2507 feature_node *x_space;
2510 prob_col->y = new double[l];
2511 prob_col->x = new feature_node*[n];
2514 prob_col->y[i] = prob->y[i];
2516 for(i=0; i<n+1; i++)
2520 feature_node *x = prob->x[i];
2521 while(x->index != -1)
2524 col_ptr[x->index]++;
2528 for(i=1; i<n+1; i++)
2529 col_ptr[i] += col_ptr[i-1] + 1;
2531 x_space = new feature_node[nnz+n];
2533 prob_col->x[i] = &x_space[col_ptr[i]];
2537 feature_node *x = prob->x[i];
2538 while(x->index != -1)
2540 int ind = x->index-1;
2541 x_space[col_ptr[ind]].index = i+1; // starts from 1
2542 x_space[col_ptr[ind]].value = x->value;
2548 x_space[col_ptr[i]].index = -1;
2550 *x_space_ret = x_space;
2555 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2556 // perm, length l, must be allocated before calling this subroutine
2557 static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2560 int max_nr_class = 16;
2562 int *label = Malloc(int,max_nr_class);
2563 int *count = Malloc(int,max_nr_class);
2564 int *data_label = Malloc(int,l);
2569 int this_label = (int)prob->y[i];
2571 for(j=0;j<nr_class;j++)
2573 if(this_label == label[j])
2582 if(nr_class == max_nr_class)
2585 label = (int *)realloc(label,max_nr_class*sizeof(int));
2586 count = (int *)realloc(count,max_nr_class*sizeof(int));
2588 label[nr_class] = this_label;
2589 count[nr_class] = 1;
2595 // Labels are ordered by their first occurrence in the training set.
2596 // However, for two-class sets with -1/+1 labels and -1 appears first,
2597 // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2599 if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2601 swap(label[0],label[1]);
2602 swap(count[0],count[1]);
2605 if(data_label[i] == 0)
2612 int *start = Malloc(int,nr_class);
2614 for(i=1;i<nr_class;i++)
2615 start[i] = start[i-1]+count[i-1];
2618 perm[start[data_label[i]]] = i;
2619 ++start[data_label[i]];
2622 for(i=1;i<nr_class;i++)
2623 start[i] = start[i-1]+count[i-1];
2625 *nr_class_ret = nr_class;
2632 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2634 int solver_type = param->solver_type;
2635 int dual_solver_max_iter = 300;
2638 bool is_regression = (solver_type==L2R_L2LOSS_SVR ||
2639 solver_type==L2R_L1LOSS_SVR_DUAL ||
2640 solver_type==L2R_L2LOSS_SVR_DUAL);
2642 // Some solvers use Cp,Cn but not C array; extensions possible but no plan for now
2643 double *C = new double[prob->l];
2644 double primal_solver_tol = param->eps;
2647 for(int i=0;i<prob->l;i++)
2653 for(int i=0;i<prob->l;i++)
2663 int neg = prob->l - pos;
2664 primal_solver_tol = param->eps*max(min(pos,neg), 1)/prob->l;
2671 l2r_lr_fun fun_obj(prob, param, C);
2672 NEWTON newton_obj(&fun_obj, primal_solver_tol);
2673 newton_obj.set_print_string(liblinear_print_string);
2674 newton_obj.newton(w);
2677 case L2R_L2LOSS_SVC:
2679 l2r_l2_svc_fun fun_obj(prob, param, C);
2680 NEWTON newton_obj(&fun_obj, primal_solver_tol);
2681 newton_obj.set_print_string(liblinear_print_string);
2682 newton_obj.newton(w);
2685 case L2R_L2LOSS_SVC_DUAL:
2687 iter = solve_l2r_l1l2_svc(prob, param, w, Cp, Cn, dual_solver_max_iter);
2688 if(iter >= dual_solver_max_iter)
2690 info("\nWARNING: reaching max number of iterations\nSwitching to use -s 2\n\n");
2691 // primal_solver_tol obtained from eps for dual may be too loose
2692 primal_solver_tol *= 0.1;
2693 l2r_l2_svc_fun fun_obj(prob, param, C);
2694 NEWTON newton_obj(&fun_obj, primal_solver_tol);
2695 newton_obj.set_print_string(liblinear_print_string);
2696 newton_obj.newton(w);
2700 case L2R_L1LOSS_SVC_DUAL:
2702 solve_l2r_l1l2_svc(prob, param, w, Cp, Cn, dual_solver_max_iter);
2705 case L1R_L2LOSS_SVC:
2708 feature_node *x_space = NULL;
2709 transpose(prob, &x_space ,&prob_col);
2710 solve_l1r_l2_svc(&prob_col, param, w, Cp, Cn, primal_solver_tol);
2711 delete [] prob_col.y;
2712 delete [] prob_col.x;
2719 feature_node *x_space = NULL;
2720 transpose(prob, &x_space ,&prob_col);
2721 solve_l1r_lr(&prob_col, param, w, Cp, Cn, primal_solver_tol);
2722 delete [] prob_col.y;
2723 delete [] prob_col.x;
2729 iter = solve_l2r_lr_dual(prob, param, w, Cp, Cn, dual_solver_max_iter);
2730 if(iter >= dual_solver_max_iter)
2732 info("\nWARNING: reaching max number of iterations\nSwitching to use -s 0\n\n");
2733 // primal_solver_tol obtained from eps for dual may be too loose
2734 primal_solver_tol *= 0.1;
2735 l2r_lr_fun fun_obj(prob, param, C);
2736 NEWTON newton_obj(&fun_obj, primal_solver_tol);
2737 newton_obj.set_print_string(liblinear_print_string);
2738 newton_obj.newton(w);
2742 case L2R_L2LOSS_SVR:
2744 l2r_l2_svr_fun fun_obj(prob, param, C);
2745 NEWTON newton_obj(&fun_obj, primal_solver_tol);
2746 newton_obj.set_print_string(liblinear_print_string);
2747 newton_obj.newton(w);
2751 case L2R_L1LOSS_SVR_DUAL:
2753 solve_l2r_l1l2_svr(prob, param, w, dual_solver_max_iter);
2756 case L2R_L2LOSS_SVR_DUAL:
2758 iter = solve_l2r_l1l2_svr(prob, param, w, dual_solver_max_iter);
2759 if(iter >= dual_solver_max_iter)
2761 info("\nWARNING: reaching max number of iterations\nSwitching to use -s 11\n\n");
2762 // primal_solver_tol obtained from eps for dual may be too loose
2763 primal_solver_tol *= 0.001;
2764 l2r_l2_svr_fun fun_obj(prob, param, C);
2765 NEWTON newton_obj(&fun_obj, primal_solver_tol);
2766 newton_obj.set_print_string(liblinear_print_string);
2767 newton_obj.newton(w);
2772 fprintf(stderr, "ERROR: unknown solver_type\n");
2779 // Calculate the initial C for parameter selection
2780 static double calc_start_C(const problem *prob, const parameter *param)
2783 double xTx, max_xTx;
2785 for(i=0; i<prob->l; i++)
2788 feature_node *xi=prob->x[i];
2789 while(xi->index != -1)
2791 double val = xi->value;
2800 if(param->solver_type == L2R_LR)
2801 min_C = 1.0 / (prob->l * max_xTx);
2802 else if(param->solver_type == L2R_L2LOSS_SVC)
2803 min_C = 1.0 / (2 * prob->l * max_xTx);
2804 else if(param->solver_type == L2R_L2LOSS_SVR)
2806 double sum_y, loss, y_abs;
2807 double delta2 = 0.1;
2808 sum_y = 0, loss = 0;
2809 for(i=0; i<prob->l; i++)
2811 y_abs = fabs(prob->y[i]);
2813 loss += max(y_abs - param->p, 0.0) * max(y_abs - param->p, 0.0);
2816 min_C = delta2 * delta2 * loss / (8 * sum_y * sum_y * max_xTx);
2821 return pow( 2, floor(log(min_C) / log(2.0)) );
2824 static double calc_max_p(const problem *prob)
2828 for(i = 0; i < prob->l; i++)
2829 max_p = max(max_p, fabs(prob->y[i]));
2834 static void find_parameter_C(const problem *prob, parameter *param_tmp, double start_C, double max_C, double *best_C, double *best_score, const int *fold_start, const int *perm, const problem *subprob, int nr_fold)
2838 double *target = Malloc(double, prob->l);
2840 // variables for warm start
2842 double **prev_w = Malloc(double*, nr_fold);
2843 for(i = 0; i < nr_fold; i++)
2845 int num_unchanged_w = 0;
2846 void (*default_print_string) (const char *) = liblinear_print_string;
2848 if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
2850 else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
2854 param_tmp->C = start_C;
2855 while(param_tmp->C <= max_C)
2857 //Output disabled for running CV at a particular C
2858 set_print_string_function(&print_null);
2860 for(i=0; i<nr_fold; i++)
2863 int begin = fold_start[i];
2864 int end = fold_start[i+1];
2866 param_tmp->init_sol = prev_w[i];
2867 struct model *submodel = train(&subprob[i],param_tmp);
2870 if(submodel->nr_class == 2)
2871 total_w_size = subprob[i].n;
2873 total_w_size = subprob[i].n * submodel->nr_class;
2875 if(prev_w[i] == NULL)
2877 prev_w[i] = Malloc(double, total_w_size);
2878 for(j=0; j<total_w_size; j++)
2879 prev_w[i][j] = submodel->w[j];
2881 else if(num_unchanged_w >= 0)
2883 double norm_w_diff = 0;
2884 for(j=0; j<total_w_size; j++)
2886 norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2887 prev_w[i][j] = submodel->w[j];
2889 norm_w_diff = sqrt(norm_w_diff);
2891 if(norm_w_diff > 1e-15)
2892 num_unchanged_w = -1;
2896 for(j=0; j<total_w_size; j++)
2897 prev_w[i][j] = submodel->w[j];
2900 for(j=begin; j<end; j++)
2901 target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2903 free_and_destroy_model(&submodel);
2905 set_print_string_function(default_print_string);
2907 if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
2909 int total_correct = 0;
2910 for(i=0; i<prob->l; i++)
2911 if(target[i] == prob->y[i])
2913 double current_rate = (double)total_correct/prob->l;
2914 if(current_rate > *best_score)
2916 *best_C = param_tmp->C;
2917 *best_score = current_rate;
2920 info("log2c=%7.2f\trate=%g\n",log(param_tmp->C)/log(2.0),100.0*current_rate);
2922 else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
2924 double total_error = 0.0;
2925 for(i=0; i<prob->l; i++)
2927 double y = prob->y[i];
2928 double v = target[i];
2929 total_error += (v-y)*(v-y);
2931 double current_error = total_error/prob->l;
2932 if(current_error < *best_score)
2934 *best_C = param_tmp->C;
2935 *best_score = current_error;
2938 info("log2c=%7.2f\tp=%7.2f\tMean squared error=%g\n",log(param_tmp->C)/log(2.0),param_tmp->p,current_error);
2942 if(num_unchanged_w == 5)
2944 param_tmp->C = param_tmp->C*ratio;
2947 if(param_tmp->C > max_C)
2948 info("WARNING: maximum C reached.\n");
2950 for(i=0; i<nr_fold; i++)
2957 // Interface functions
2959 model* train(const problem *prob, const parameter *param)
2964 int w_size = prob->n;
2965 model *model_ = Malloc(model,1);
2968 model_->nr_feature=n-1;
2970 model_->nr_feature=n;
2971 model_->param = *param;
2972 model_->bias = prob->bias;
2974 if(check_regression_model(model_))
2976 model_->w = Malloc(double, w_size);
2978 if(param->init_sol != NULL)
2979 for(i=0;i<w_size;i++)
2980 model_->w[i] = param->init_sol[i];
2982 for(i=0;i<w_size;i++)
2985 model_->nr_class = 2;
2986 model_->label = NULL;
2987 train_one(prob, param, model_->w, 0, 0);
2989 else if(check_oneclass_model(model_))
2991 model_->w = Malloc(double, w_size);
2992 model_->nr_class = 2;
2993 model_->label = NULL;
2994 solve_oneclass_svm(prob, param, model_->w, &(model_->rho));
3002 int *perm = Malloc(int,l);
3004 // group training data of the same class
3005 group_classes(prob,&nr_class,&label,&start,&count,perm);
3007 model_->nr_class=nr_class;
3008 model_->label = Malloc(int,nr_class);
3009 for(i=0;i<nr_class;i++)
3010 model_->label[i] = label[i];
3012 // calculate weighted C
3013 double *weighted_C = Malloc(double, nr_class);
3014 for(i=0;i<nr_class;i++)
3015 weighted_C[i] = param->C;
3016 for(i=0;i<param->nr_weight;i++)
3018 for(j=0;j<nr_class;j++)
3019 if(param->weight_label[i] == label[j])
3022 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
3024 weighted_C[j] *= param->weight[i];
3027 // constructing the subproblem
3028 feature_node **x = Malloc(feature_node *,l);
3030 x[i] = prob->x[perm[i]];
3036 sub_prob.x = Malloc(feature_node *,sub_prob.l);
3037 sub_prob.y = Malloc(double,sub_prob.l);
3039 for(k=0; k<sub_prob.l; k++)
3040 sub_prob.x[k] = x[k];
3042 // multi-class svm by Crammer and Singer
3043 if(param->solver_type == MCSVM_CS)
3045 model_->w=Malloc(double, n*nr_class);
3046 for(i=0;i<nr_class;i++)
3047 for(j=start[i];j<start[i]+count[i];j++)
3049 Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
3050 Solver.Solve(model_->w);
3056 model_->w=Malloc(double, w_size);
3058 int e0 = start[0]+count[0];
3062 for(; k<sub_prob.l; k++)
3065 if(param->init_sol != NULL)
3066 for(i=0;i<w_size;i++)
3067 model_->w[i] = param->init_sol[i];
3069 for(i=0;i<w_size;i++)
3072 train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
3076 model_->w=Malloc(double, w_size*nr_class);
3077 double *w=Malloc(double, w_size);
3078 for(i=0;i<nr_class;i++)
3081 int ei = si+count[i];
3088 for(; k<sub_prob.l; k++)
3091 if(param->init_sol != NULL)
3092 for(j=0;j<w_size;j++)
3093 w[j] = param->init_sol[j*nr_class+i];
3095 for(j=0;j<w_size;j++)
3098 train_one(&sub_prob, param, w, weighted_C[i], param->C);
3100 for(j=0;j<w_size;j++)
3101 model_->w[j*nr_class+i] = w[j];
3120 void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
3125 int *perm = Malloc(int,l);
3129 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
3131 fold_start = Malloc(int,nr_fold+1);
3132 for(i=0;i<l;i++) perm[i]=i;
3135 int j = i+rand()%(l-i);
3136 swap(perm[i],perm[j]);
3138 for(i=0;i<=nr_fold;i++)
3139 fold_start[i]=i*l/nr_fold;
3141 for(i=0;i<nr_fold;i++)
3143 int begin = fold_start[i];
3144 int end = fold_start[i+1];
3146 struct problem subprob;
3148 subprob.bias = prob->bias;
3149 subprob.n = prob->n;
3150 subprob.l = l-(end-begin);
3151 subprob.x = Malloc(struct feature_node*,subprob.l);
3152 subprob.y = Malloc(double,subprob.l);
3155 for(j=0;j<begin;j++)
3157 subprob.x[k] = prob->x[perm[j]];
3158 subprob.y[k] = prob->y[perm[j]];
3163 subprob.x[k] = prob->x[perm[j]];
3164 subprob.y[k] = prob->y[perm[j]];
3167 struct model *submodel = train(&subprob,param);
3168 for(j=begin;j<end;j++)
3169 target[perm[j]] = predict(submodel,prob->x[perm[j]]);
3170 free_and_destroy_model(&submodel);
3179 void find_parameters(const problem *prob, const parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score)
3186 int *perm = Malloc(int, l);
3187 struct problem *subprob = Malloc(problem,nr_fold);
3192 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
3194 fold_start = Malloc(int,nr_fold+1);
3195 for(i=0;i<l;i++) perm[i]=i;
3198 int j = i+rand()%(l-i);
3199 swap(perm[i],perm[j]);
3201 for(i=0;i<=nr_fold;i++)
3202 fold_start[i]=i*l/nr_fold;
3204 for(i=0;i<nr_fold;i++)
3206 int begin = fold_start[i];
3207 int end = fold_start[i+1];
3210 subprob[i].bias = prob->bias;
3211 subprob[i].n = prob->n;
3212 subprob[i].l = l-(end-begin);
3213 subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
3214 subprob[i].y = Malloc(double,subprob[i].l);
3217 for(j=0;j<begin;j++)
3219 subprob[i].x[k] = prob->x[perm[j]];
3220 subprob[i].y[k] = prob->y[perm[j]];
3225 subprob[i].x[k] = prob->x[perm[j]];
3226 subprob[i].y[k] = prob->y[perm[j]];
3231 struct parameter param_tmp = *param;
3233 if(param->solver_type == L2R_LR || param->solver_type == L2R_L2LOSS_SVC)
3236 start_C = calc_start_C(prob, ¶m_tmp);
3237 double max_C = 1024;
3238 start_C = min(start_C, max_C);
3239 double best_C_tmp, best_score_tmp;
3241 find_parameter_C(prob, ¶m_tmp, start_C, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
3243 *best_C = best_C_tmp;
3244 *best_score = best_score_tmp;
3246 else if(param->solver_type == L2R_L2LOSS_SVR)
3248 double max_p = calc_max_p(prob);
3249 int num_p_steps = 20;
3250 double max_C = 1048576;
3255 i = min((int)(start_p/(max_p/num_p_steps)), i);
3258 param_tmp.p = i*max_p/num_p_steps;
3261 start_C_tmp = calc_start_C(prob, ¶m_tmp);
3263 start_C_tmp = start_C;
3264 start_C_tmp = min(start_C_tmp, max_C);
3265 double best_C_tmp, best_score_tmp;
3267 find_parameter_C(prob, ¶m_tmp, start_C_tmp, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
3269 if(best_score_tmp < *best_score)
3271 *best_p = param_tmp.p;
3272 *best_C = best_C_tmp;
3273 *best_score = best_score_tmp;
3280 for(i=0; i<nr_fold; i++)
3288 double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
3293 n=model_->nr_feature+1;
3295 n=model_->nr_feature;
3296 double *w=model_->w;
3297 int nr_class=model_->nr_class;
3300 if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
3305 const feature_node *lx=x;
3308 for(; (idx=lx->index)!=-1; lx++)
3310 // the dimension of testing data may exceed that of training
3313 dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
3315 if(check_oneclass_model(model_))
3316 dec_values[0] -= model_->rho;
3320 if(check_regression_model(model_))
3321 return dec_values[0];
3322 else if(check_oneclass_model(model_))
3323 return (dec_values[0]>0)?1:-1;
3325 return (dec_values[0]>0)?model_->label[0]:model_->label[1];
3329 int dec_max_idx = 0;
3330 for(i=1;i<nr_class;i++)
3332 if(dec_values[i] > dec_values[dec_max_idx])
3335 return model_->label[dec_max_idx];
3339 double predict(const model *model_, const feature_node *x)
3341 double *dec_values = Malloc(double, model_->nr_class);
3342 double label=predict_values(model_, x, dec_values);
3347 double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
3349 if(check_probability_model(model_))
3352 int nr_class=model_->nr_class;
3359 double label=predict_values(model_, x, prob_estimates);
3361 prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
3363 if(nr_class==2) // for binary classification
3364 prob_estimates[1]=1.-prob_estimates[0];
3368 for(i=0; i<nr_class; i++)
3369 sum+=prob_estimates[i];
3371 for(i=0; i<nr_class; i++)
3372 prob_estimates[i]=prob_estimates[i]/sum;
3381 static const char *solver_type_table[]=
3383 "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
3384 "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
3386 "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL",
3387 "", "", "", "", "", "", "",
3388 "ONECLASS_SVM", NULL
3391 int save_model(const char *model_file_name, const struct model *model_)
3394 int nr_feature=model_->nr_feature;
3396 const parameter& param = model_->param;
3403 FILE *fp = fopen(model_file_name,"w");
3404 if(fp==NULL) return -1;
3406 char *old_locale = setlocale(LC_ALL, NULL);
3409 old_locale = strdup(old_locale);
3411 setlocale(LC_ALL, "C");
3414 if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
3417 nr_w=model_->nr_class;
3419 fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
3420 fprintf(fp, "nr_class %d\n", model_->nr_class);
3424 fprintf(fp, "label");
3425 for(i=0; i<model_->nr_class; i++)
3426 fprintf(fp, " %d", model_->label[i]);
3430 fprintf(fp, "nr_feature %d\n", nr_feature);
3432 fprintf(fp, "bias %.17g\n", model_->bias);
3434 if(check_oneclass_model(model_))
3435 fprintf(fp, "rho %.17g\n", model_->rho);
3438 for(i=0; i<w_size; i++)
3441 for(j=0; j<nr_w; j++)
3442 fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
3446 setlocale(LC_ALL, old_locale);
3449 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
3454 // FSCANF helps to handle fscanf failures.
3455 // Its do-while block avoids the ambiguity when
3460 #define FSCANF(_stream, _format, _var)do\
3462 if (fscanf(_stream, _format, _var) != 1)\
3464 fprintf(stderr, "ERROR: fscanf failed to read the model\n");\
3468 // EXIT_LOAD_MODEL should NOT end with a semicolon.
3469 #define EXIT_LOAD_MODEL()\
3471 setlocale(LC_ALL, old_locale);\
3472 free(model_->label);\
3477 struct model *load_model(const char *model_file_name)
3479 FILE *fp = fopen(model_file_name,"r");
3480 if(fp==NULL) return NULL;
3488 model *model_ = Malloc(model,1);
3489 parameter& param = model_->param;
3490 // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
3491 param.nr_weight = 0;
3492 param.weight_label = NULL;
3493 param.weight = NULL;
3494 param.init_sol = NULL;
3496 model_->label = NULL;
3498 char *old_locale = setlocale(LC_ALL, NULL);
3501 old_locale = strdup(old_locale);
3503 setlocale(LC_ALL, "C");
3508 FSCANF(fp,"%80s",cmd);
3509 if(strcmp(cmd,"solver_type")==0)
3511 FSCANF(fp,"%80s",cmd);
3513 for(i=0;solver_type_table[i];i++)
3515 if(strcmp(solver_type_table[i],cmd)==0)
3517 param.solver_type=i;
3521 if(solver_type_table[i] == NULL)
3523 fprintf(stderr,"unknown solver type.\n");
3527 else if(strcmp(cmd,"nr_class")==0)
3529 FSCANF(fp,"%d",&nr_class);
3530 model_->nr_class=nr_class;
3532 else if(strcmp(cmd,"nr_feature")==0)
3534 FSCANF(fp,"%d",&nr_feature);
3535 model_->nr_feature=nr_feature;
3537 else if(strcmp(cmd,"bias")==0)
3539 FSCANF(fp,"%lf",&bias);
3542 else if(strcmp(cmd,"rho")==0)
3544 FSCANF(fp,"%lf",&rho);
3547 else if(strcmp(cmd,"w")==0)
3551 else if(strcmp(cmd,"label")==0)
3553 int nr_class = model_->nr_class;
3554 model_->label = Malloc(int,nr_class);
3555 for(int i=0;i<nr_class;i++)
3556 FSCANF(fp,"%d",&model_->label[i]);
3560 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
3565 nr_feature=model_->nr_feature;
3572 if(nr_class==2 && param.solver_type != MCSVM_CS)
3577 model_->w=Malloc(double, w_size*nr_w);
3578 for(i=0; i<w_size; i++)
3581 for(j=0; j<nr_w; j++)
3582 FSCANF(fp, "%lf ", &model_->w[i*nr_w+j]);
3585 setlocale(LC_ALL, old_locale);
3588 if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
3593 int get_nr_feature(const model *model_)
3595 return model_->nr_feature;
3598 int get_nr_class(const model *model_)
3600 return model_->nr_class;
3603 void get_labels(const model *model_, int* label)
3605 if (model_->label != NULL)
3606 for(int i=0;i<model_->nr_class;i++)
3607 label[i] = model_->label[i];
3610 // use inline here for better performance (around 20% faster than the non-inline one)
3611 static inline double get_w_value(const struct model *model_, int idx, int label_idx)
3613 int nr_class = model_->nr_class;
3614 int solver_type = model_->param.solver_type;
3615 const double *w = model_->w;
3617 if(idx < 0 || idx > model_->nr_feature)
3619 if(check_regression_model(model_) || check_oneclass_model(model_))
3623 if(label_idx < 0 || label_idx >= nr_class)
3625 if(nr_class == 2 && solver_type != MCSVM_CS)
3633 return w[idx*nr_class+label_idx];
3637 // feat_idx: starting from 1 to nr_feature
3638 // label_idx: starting from 0 to nr_class-1 for classification models;
3639 // for regression and one-class SVM models, label_idx is
3641 double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
3643 if(feat_idx > model_->nr_feature)
3645 return get_w_value(model_, feat_idx-1, label_idx);
3648 double get_decfun_bias(const struct model *model_, int label_idx)
3650 if(check_oneclass_model(model_))
3652 fprintf(stderr, "ERROR: get_decfun_bias can not be called for a one-class SVM model\n");
3655 int bias_idx = model_->nr_feature;
3656 double bias = model_->bias;
3660 return bias*get_w_value(model_, bias_idx, label_idx);
3663 double get_decfun_rho(const struct model *model_)
3665 if(check_oneclass_model(model_))
3669 fprintf(stderr, "ERROR: get_decfun_rho can be called only for a one-class SVM model\n");
3674 void free_model_content(struct model *model_ptr)
3676 if(model_ptr->w != NULL)
3678 if(model_ptr->label != NULL)
3679 free(model_ptr->label);
3682 void free_and_destroy_model(struct model **model_ptr_ptr)
3684 struct model *model_ptr = *model_ptr_ptr;
3685 if(model_ptr != NULL)
3687 free_model_content(model_ptr);
3692 void destroy_param(parameter* param)
3694 if(param->weight_label != NULL)
3695 free(param->weight_label);
3696 if(param->weight != NULL)
3697 free(param->weight);
3698 if(param->init_sol != NULL)
3699 free(param->init_sol);
3702 const char *check_parameter(const problem *prob, const parameter *param)
3713 if(prob->bias >= 0 && param->solver_type == ONECLASS_SVM)
3714 return "prob->bias >=0, but this is ignored in ONECLASS_SVM";
3716 if(param->regularize_bias == 0)
3718 if(prob->bias != 1.0)
3719 return "To not regularize bias, must specify -B 1 along with -R";
3720 if(param->solver_type != L2R_LR
3721 && param->solver_type != L2R_L2LOSS_SVC
3722 && param->solver_type != L1R_L2LOSS_SVC
3723 && param->solver_type != L1R_LR
3724 && param->solver_type != L2R_L2LOSS_SVR)
3725 return "-R option supported only for solver L2R_LR, L2R_L2LOSS_SVC, L1R_L2LOSS_SVC, L1R_LR, and L2R_L2LOSS_SVR";
3728 if(param->solver_type != L2R_LR
3729 && param->solver_type != L2R_L2LOSS_SVC_DUAL
3730 && param->solver_type != L2R_L2LOSS_SVC
3731 && param->solver_type != L2R_L1LOSS_SVC_DUAL
3732 && param->solver_type != MCSVM_CS
3733 && param->solver_type != L1R_L2LOSS_SVC
3734 && param->solver_type != L1R_LR
3735 && param->solver_type != L2R_LR_DUAL
3736 && param->solver_type != L2R_L2LOSS_SVR
3737 && param->solver_type != L2R_L2LOSS_SVR_DUAL
3738 && param->solver_type != L2R_L1LOSS_SVR_DUAL
3739 && param->solver_type != ONECLASS_SVM)
3740 return "unknown solver type";
3742 if(param->init_sol != NULL
3743 && param->solver_type != L2R_LR
3744 && param->solver_type != L2R_L2LOSS_SVC
3745 && param->solver_type != L2R_L2LOSS_SVR)
3746 return "Initial-solution specification supported only for solvers L2R_LR, L2R_L2LOSS_SVC, and L2R_L2LOSS_SVR";
3751 int check_probability_model(const struct model *model_)
3753 return (model_->param.solver_type==L2R_LR ||
3754 model_->param.solver_type==L2R_LR_DUAL ||
3755 model_->param.solver_type==L1R_LR);
3758 int check_regression_model(const struct model *model_)
3760 return (model_->param.solver_type==L2R_L2LOSS_SVR ||
3761 model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
3762 model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
3765 int check_oneclass_model(const struct model *model_)
3767 return model_->param.solver_type == ONECLASS_SVM;
3770 void set_print_string_function(void (*print_func)(const char*))
3772 if (print_func == NULL)
3773 liblinear_print_string = &print_string_stdout;
3775 liblinear_print_string = print_func;