9 int liblinear_version = LIBLINEAR_VERSION;
10 typedef signed char schar;
11 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
13 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
16 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
18 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
21 memcpy((void *)dst,(void *)src,sizeof(T)*n);
24 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
26 static void print_string_stdout(const char *s)
31 static void print_null(const char *s) {}
33 static void (*liblinear_print_string) (const char *) = &print_string_stdout;
36 static void info(const char *fmt,...)
43 (*liblinear_print_string)(buf);
46 static void info(const char *fmt,...) {}
51 static double nrm2_sq(const feature_node *x)
56 ret += x->value*x->value;
62 static double dot(const double *s, const feature_node *x)
67 ret += s[x->index-1]*x->value;
73 static double sparse_dot(const feature_node *x1, const feature_node *x2)
76 while(x1->index != -1 && x2->index != -1)
78 if(x1->index == x2->index)
80 ret += x1->value * x2->value;
86 if(x1->index > x2->index)
95 static void axpy(const double a, const feature_node *x, double *y)
99 y[x->index-1] += a*x->value;
105 // L2-regularized empirical risk minimization
106 // min_w w^Tw/2 + \sum C_i \xi(w^Tx_i), where \xi() is the loss
108 class l2r_erm_fun: public function
111 l2r_erm_fun(const problem *prob, const parameter *param, double *C);
114 double fun(double *w);
115 double linesearch_and_update(double *w, double *d, double *f, double *g, double alpha);
116 int get_nr_variable(void);
119 virtual double C_times_loss(int i, double wx_i) = 0;
120 void Xv(double *v, double *Xv);
121 void XTv(double *v, double *XTv);
126 double *tmp; // a working array
131 l2r_erm_fun::l2r_erm_fun(const problem *prob, const parameter *param, double *C)
140 this->regularize_bias = param->regularize_bias;
143 l2r_erm_fun::~l2r_erm_fun()
149 double l2r_erm_fun::fun(double *w)
154 int w_size=get_nr_variable();
159 for(i=0;i<w_size;i++)
161 if(regularize_bias == 0)
162 wTw -= w[w_size-1]*w[w_size-1];
164 f += C_times_loss(i, wx[i]);
170 int l2r_erm_fun::get_nr_variable(void)
175 // On entry *f must be the function value of w
176 // On exit w is updated and *f is the new function value
177 double l2r_erm_fun::linesearch_and_update(double *w, double *s, double *f, double *g, double alpha)
185 int w_size = get_nr_variable();
186 int max_num_linesearch = 20;
190 for (i=0;i<w_size;i++)
196 if(regularize_bias == 0)
198 // bias not used in calculating (w + \alpha s)^T (w + \alpha s)
199 sTs -= s[w_size-1] * s[w_size-1];
200 wTs -= s[w_size-1] * w[w_size-1];
203 int num_linesearch = 0;
204 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
209 double inner_product = tmp[i] * alpha + wx[i];
210 loss += C_times_loss(i, inner_product);
212 *f = loss + (alpha * alpha * sTs + wTw) / 2.0 + alpha * wTs;
213 if (*f - fold <= eta * alpha * gTs)
216 wx[i] += alpha * tmp[i];
223 if (num_linesearch >= max_num_linesearch)
229 for (i=0;i<w_size;i++)
230 w[i] += alpha * s[i];
232 wTw += alpha * alpha * sTs + 2* alpha * wTs;
236 void l2r_erm_fun::Xv(double *v, double *Xv)
240 feature_node **x=prob->x;
243 Xv[i]=sparse_operator::dot(v, x[i]);
246 void l2r_erm_fun::XTv(double *v, double *XTv)
250 int w_size=get_nr_variable();
251 feature_node **x=prob->x;
253 for(i=0;i<w_size;i++)
256 sparse_operator::axpy(v[i], x[i], XTv);
259 class l2r_lr_fun: public l2r_erm_fun
262 l2r_lr_fun(const problem *prob, const parameter *param, double *C);
265 void grad(double *w, double *g);
266 void Hv(double *s, double *Hs);
268 void get_diag_preconditioner(double *M);
272 double C_times_loss(int i, double wx_i);
275 l2r_lr_fun::l2r_lr_fun(const problem *prob, const parameter *param, double *C):
276 l2r_erm_fun(prob, param, C)
282 l2r_lr_fun::~l2r_lr_fun()
287 double l2r_lr_fun::C_times_loss(int i, double wx_i)
289 double ywx_i = wx_i * prob->y[i];
291 return C[i]*log(1 + exp(-ywx_i));
293 return C[i]*(-ywx_i + log(1 + exp(ywx_i)));
296 void l2r_lr_fun::grad(double *w, double *g)
301 int w_size=get_nr_variable();
305 tmp[i] = 1/(1 + exp(-y[i]*wx[i]));
306 D[i] = tmp[i]*(1-tmp[i]);
307 tmp[i] = C[i]*(tmp[i]-1)*y[i];
311 for(i=0;i<w_size;i++)
313 if(regularize_bias == 0)
314 g[w_size-1] -= w[w_size-1];
317 void l2r_lr_fun::get_diag_preconditioner(double *M)
321 int w_size=get_nr_variable();
322 feature_node **x = prob->x;
324 for (i=0; i<w_size; i++)
326 if(regularize_bias == 0)
331 feature_node *xi = x[i];
332 while (xi->index!=-1)
334 M[xi->index-1] += xi->value*xi->value*C[i]*D[i];
340 void l2r_lr_fun::Hv(double *s, double *Hs)
344 int w_size=get_nr_variable();
345 feature_node **x=prob->x;
347 for(i=0;i<w_size;i++)
351 feature_node * const xi=x[i];
352 double xTs = sparse_operator::dot(s, xi);
356 sparse_operator::axpy(xTs, xi, Hs);
358 for(i=0;i<w_size;i++)
359 Hs[i] = s[i] + Hs[i];
360 if(regularize_bias == 0)
361 Hs[w_size-1] -= s[w_size-1];
364 class l2r_l2_svc_fun: public l2r_erm_fun
367 l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C);
370 void grad(double *w, double *g);
371 void Hv(double *s, double *Hs);
373 void get_diag_preconditioner(double *M);
376 void subXTv(double *v, double *XTv);
382 double C_times_loss(int i, double wx_i);
385 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C):
386 l2r_erm_fun(prob, param, C)
388 I = new int[prob->l];
391 l2r_l2_svc_fun::~l2r_l2_svc_fun()
396 double l2r_l2_svc_fun::C_times_loss(int i, double wx_i)
398 double d = 1 - prob->y[i] * wx_i;
405 void l2r_l2_svc_fun::grad(double *w, double *g)
410 int w_size=get_nr_variable();
415 tmp[i] = wx[i] * y[i];
418 tmp[sizeI] = C[i]*y[i]*(tmp[i]-1);
425 for(i=0;i<w_size;i++)
426 g[i] = w[i] + 2*g[i];
427 if(regularize_bias == 0)
428 g[w_size-1] -= w[w_size-1];
431 void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
434 int w_size=get_nr_variable();
435 feature_node **x = prob->x;
437 for (i=0; i<w_size; i++)
439 if(regularize_bias == 0)
442 for (i=0; i<sizeI; i++)
445 feature_node *xi = x[idx];
446 while (xi->index!=-1)
448 M[xi->index-1] += xi->value*xi->value*C[idx]*2;
454 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
457 int w_size=get_nr_variable();
458 feature_node **x=prob->x;
460 for(i=0;i<w_size;i++)
464 feature_node * const xi=x[I[i]];
465 double xTs = sparse_operator::dot(s, xi);
469 sparse_operator::axpy(xTs, xi, Hs);
471 for(i=0;i<w_size;i++)
472 Hs[i] = s[i] + 2*Hs[i];
473 if(regularize_bias == 0)
474 Hs[w_size-1] -= s[w_size-1];
477 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
480 int w_size=get_nr_variable();
481 feature_node **x=prob->x;
483 for(i=0;i<w_size;i++)
486 sparse_operator::axpy(v[i], x[I[i]], XTv);
489 class l2r_l2_svr_fun: public l2r_l2_svc_fun
492 l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C);
494 void grad(double *w, double *g);
497 double C_times_loss(int i, double wx_i);
501 l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C):
502 l2r_l2_svc_fun(prob, param, C)
505 this->regularize_bias = param->regularize_bias;
508 double l2r_l2_svr_fun::C_times_loss(int i, double wx_i)
510 double d = wx_i - prob->y[i];
512 return C[i]*(d+p)*(d+p);
514 return C[i]*(d-p)*(d-p);
518 void l2r_l2_svr_fun::grad(double *w, double *g)
523 int w_size=get_nr_variable();
531 // generate index set I
534 tmp[sizeI] = C[i]*(d+p);
540 tmp[sizeI] = C[i]*(d-p);
548 for(i=0;i<w_size;i++)
549 g[i] = w[i] + 2*g[i];
550 if(regularize_bias == 0)
551 g[w_size-1] -= w[w_size-1];
554 // A coordinate descent algorithm for
555 // multi-class support vector machines by Crammer and Singer
557 // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
558 // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
560 // where e^m_i = 0 if y_i = m,
561 // e^m_i = 1 if y_i != m,
562 // C^m_i = C if m = y_i,
563 // C^m_i = 0 if m != y_i,
564 // and w_m(\alpha) = \sum_i \alpha^m_i x_i
568 // eps is the stopping tolerance
570 // solution will be put in w
572 // See Appendix of LIBLINEAR paper, Fan et al. (2008)
574 #define GETI(i) ((int) prob->y[i])
575 // To support weights for instances, use GETI(i) (i)
577 class Solver_MCSVM_CS
580 Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
582 void Solve(double *w);
584 void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
585 bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
594 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
596 this->w_size = prob->n;
598 this->nr_class = nr_class;
600 this->max_iter = max_iter;
602 this->B = new double[nr_class];
603 this->G = new double[nr_class];
604 this->C = weighted_C;
607 Solver_MCSVM_CS::~Solver_MCSVM_CS()
613 int compare_double(const void *a, const void *b)
615 if(*(double *)a > *(double *)b)
617 if(*(double *)a < *(double *)b)
622 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
627 clone(D, B, active_i);
630 qsort(D, active_i, sizeof(double), compare_double);
632 double beta = D[0] - A_i*C_yi;
633 for(r=1;r<active_i && beta<r*D[r];r++)
637 for(r=0;r<active_i;r++)
640 alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
642 alpha_new[r] = min((double)0, (beta - B[r])/A_i);
647 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
652 if(alpha_i == bound && G[m] < minG)
657 void Solver_MCSVM_CS::Solve(double *w)
661 double *alpha = new double[l*nr_class];
662 double *alpha_new = new double[nr_class];
663 int *index = new int[l];
664 double *QD = new double[l];
665 int *d_ind = new int[nr_class];
666 double *d_val = new double[nr_class];
667 int *alpha_index = new int[nr_class*l];
668 int *y_index = new int[l];
670 int *active_size_i = new int[l];
671 double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
672 bool start_from_all = true;
674 // Initial alpha can be set here. Note that
675 // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
676 // alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
677 // alpha[i*nr_class+m] <= 0 if prob->y[i] != m
678 // If initial alpha isn't zero, uncomment the for loop below to initialize w
679 for(i=0;i<l*nr_class;i++)
682 for(i=0;i<w_size*nr_class;i++)
686 for(m=0;m<nr_class;m++)
687 alpha_index[i*nr_class+m] = m;
688 feature_node *xi = prob->x[i];
690 while(xi->index != -1)
692 double val = xi->value;
695 // Uncomment the for loop if initial alpha isn't zero
696 // for(m=0; m<nr_class; m++)
697 // w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
700 active_size_i[i] = nr_class;
701 y_index[i] = (int)prob->y[i];
705 while(iter < max_iter)
707 double stopping = -INF;
708 for(i=0;i<active_size;i++)
710 int j = i+rand()%(active_size-i);
711 swap(index[i], index[j]);
713 for(s=0;s<active_size;s++)
717 double *alpha_i = &alpha[i*nr_class];
718 int *alpha_index_i = &alpha_index[i*nr_class];
722 for(m=0;m<active_size_i[i];m++)
724 if(y_index[i] < active_size_i[i])
727 feature_node *xi = prob->x[i];
728 while(xi->index!= -1)
730 double *w_i = &w[(xi->index-1)*nr_class];
731 for(m=0;m<active_size_i[i];m++)
732 G[m] += w_i[alpha_index_i[m]]*(xi->value);
738 for(m=0;m<active_size_i[i];m++)
740 if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
745 if(y_index[i] < active_size_i[i])
746 if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
747 minG = G[y_index[i]];
749 for(m=0;m<active_size_i[i];m++)
751 if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
754 while(active_size_i[i]>m)
756 if(!be_shrunk(i, active_size_i[i], y_index[i],
757 alpha_i[alpha_index_i[active_size_i[i]]], minG))
759 swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
760 swap(G[m], G[active_size_i[i]]);
761 if(y_index[i] == active_size_i[i])
763 else if(y_index[i] == m)
764 y_index[i] = active_size_i[i];
772 if(active_size_i[i] <= 1)
775 swap(index[s], index[active_size]);
780 if(maxG-minG <= 1e-12)
783 stopping = max(maxG - minG, stopping);
785 for(m=0;m<active_size_i[i];m++)
786 B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
788 solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
790 for(m=0;m<active_size_i[i];m++)
792 double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
793 alpha_i[alpha_index_i[m]] = alpha_new[m];
796 d_ind[nz_d] = alpha_index_i[m];
803 while(xi->index != -1)
805 double *w_i = &w[(xi->index-1)*nr_class];
807 w_i[d_ind[m]] += d_val[m]*xi->value;
819 if(stopping < eps_shrink)
821 if(stopping < eps && start_from_all == true)
827 active_size_i[i] = nr_class;
829 eps_shrink = max(eps_shrink/2, eps);
830 start_from_all = true;
834 start_from_all = false;
837 info("\noptimization finished, #iter = %d\n",iter);
838 if (iter >= max_iter)
839 info("\nWARNING: reaching max number of iterations\n");
841 // calculate objective value
844 for(i=0;i<w_size*nr_class;i++)
847 for(i=0;i<l*nr_class;i++)
850 if(fabs(alpha[i]) > 0)
854 v -= alpha[i*nr_class+(int)prob->y[i]];
855 info("Objective value = %lf\n",v);
856 info("nSV = %d\n",nSV);
864 delete [] alpha_index;
866 delete [] active_size_i;
869 // A coordinate descent algorithm for
870 // L1-loss and L2-loss SVM dual problems
872 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
873 // s.t. 0 <= \alpha_i <= upper_bound_i,
875 // where Qij = yi yj xi^T xj and
876 // D is a diagonal matrix
879 // upper_bound_i = Cp if y_i = 1
880 // upper_bound_i = Cn if y_i = -1
883 // upper_bound_i = INF
884 // D_ii = 1/(2*Cp) if y_i = 1
885 // D_ii = 1/(2*Cn) if y_i = -1
889 // eps is the stopping tolerance
891 // solution will be put in w
893 // See Algorithm 3 of Hsieh et al., ICML 2008
896 #define GETI(i) (y[i]+1)
897 // To support weights for instances, use GETI(i) (i)
899 static void solve_l2r_l1l2_svc(
900 const problem *prob, double *w, double eps,
901 double Cp, double Cn, int solver_type)
904 int w_size = prob->n;
907 double *QD = new double[l];
909 int *index = new int[l];
910 double *alpha = new double[l];
911 schar *y = new schar[l];
914 // PG: projected gradient, for shrinking and stopping
916 double PGmax_old = INF;
917 double PGmin_old = -INF;
918 double PGmax_new, PGmin_new;
920 // default solver_type: L2R_L2LOSS_SVC_DUAL
921 double diag[3] = {0.5/Cn, 0, 0.5/Cp};
922 double upper_bound[3] = {INF, 0, INF};
923 if(solver_type == L2R_L1LOSS_SVC_DUAL)
943 // Initial alpha can be set here. Note that
944 // 0 <= alpha[i] <= upper_bound[GETI(i)]
948 for(i=0; i<w_size; i++)
952 QD[i] = diag[GETI(i)];
954 feature_node * const xi = prob->x[i];
955 QD[i] += sparse_operator::nrm2_sq(xi);
956 sparse_operator::axpy(y[i]*alpha[i], xi, w);
961 while (iter < max_iter)
966 for (i=0; i<active_size; i++)
968 int j = i+rand()%(active_size-i);
969 swap(index[i], index[j]);
972 for (s=0; s<active_size; s++)
975 const schar yi = y[i];
976 feature_node * const xi = prob->x[i];
978 G = yi*sparse_operator::dot(w, xi)-1;
980 C = upper_bound[GETI(i)];
981 G += alpha[i]*diag[GETI(i)];
989 swap(index[s], index[active_size]);
996 else if (alpha[i] == C)
1001 swap(index[s], index[active_size]);
1011 PGmax_new = max(PGmax_new, PG);
1012 PGmin_new = min(PGmin_new, PG);
1014 if(fabs(PG) > 1.0e-12)
1016 double alpha_old = alpha[i];
1017 alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
1018 d = (alpha[i] - alpha_old)*yi;
1019 sparse_operator::axpy(d, xi, w);
1027 if(PGmax_new - PGmin_new <= eps)
1029 if(active_size == l)
1040 PGmax_old = PGmax_new;
1041 PGmin_old = PGmin_new;
1048 info("\noptimization finished, #iter = %d\n",iter);
1049 if (iter >= max_iter)
1050 info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
1052 // calculate objective value
1056 for(i=0; i<w_size; i++)
1060 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
1064 info("Objective value = %lf\n",v/2);
1065 info("nSV = %d\n",nSV);
1074 // A coordinate descent algorithm for
1075 // L1-loss and L2-loss epsilon-SVR dual problem
1077 // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
1078 // s.t. -upper_bound_i <= \beta_i <= upper_bound_i,
1080 // where Qij = xi^T xj and
1081 // D is a diagonal matrix
1084 // upper_bound_i = C
1087 // upper_bound_i = INF
1088 // lambda_i = 1/(2*C)
1092 // eps is the stopping tolerance
1094 // solution will be put in w
1096 // See Algorithm 4 of Ho and Lin, 2012
1100 // To support weights for instances, use GETI(i) (i)
1102 static void solve_l2r_l1l2_svr(
1103 const problem *prob, double *w, const parameter *param,
1107 double C = param->C;
1108 double p = param->p;
1109 int w_size = prob->n;
1110 double eps = param->eps;
1112 int max_iter = 1000;
1113 int active_size = l;
1114 int *index = new int[l];
1117 double Gmax_old = INF;
1118 double Gmax_new, Gnorm1_new;
1119 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1120 double *beta = new double[l];
1121 double *QD = new double[l];
1122 double *y = prob->y;
1124 // L2R_L2LOSS_SVR_DUAL
1125 double lambda[1], upper_bound[1];
1127 upper_bound[0] = INF;
1129 if(solver_type == L2R_L1LOSS_SVR_DUAL)
1135 // Initial beta can be set here. Note that
1136 // -upper_bound <= beta[i] <= upper_bound
1140 for(i=0; i<w_size; i++)
1144 feature_node * const xi = prob->x[i];
1145 QD[i] = sparse_operator::nrm2_sq(xi);
1146 sparse_operator::axpy(beta[i], xi, w);
1152 while(iter < max_iter)
1157 for(i=0; i<active_size; i++)
1159 int j = i+rand()%(active_size-i);
1160 swap(index[i], index[j]);
1163 for(s=0; s<active_size; s++)
1166 G = -y[i] + lambda[GETI(i)]*beta[i];
1167 H = QD[i] + lambda[GETI(i)];
1169 feature_node * const xi = prob->x[i];
1170 G += sparse_operator::dot(w, xi);
1174 double violation = 0;
1181 else if(Gp>Gmax_old && Gn<-Gmax_old)
1184 swap(index[s], index[active_size]);
1189 else if(beta[i] >= upper_bound[GETI(i)])
1193 else if(Gp < -Gmax_old)
1196 swap(index[s], index[active_size]);
1201 else if(beta[i] <= -upper_bound[GETI(i)])
1205 else if(Gn > Gmax_old)
1208 swap(index[s], index[active_size]);
1213 else if(beta[i] > 0)
1214 violation = fabs(Gp);
1216 violation = fabs(Gn);
1218 Gmax_new = max(Gmax_new, violation);
1219 Gnorm1_new += violation;
1221 // obtain Newton direction d
1224 else if(Gn > H*beta[i])
1229 if(fabs(d) < 1.0e-12)
1232 double beta_old = beta[i];
1233 beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
1234 d = beta[i]-beta_old;
1237 sparse_operator::axpy(d, xi, w);
1241 Gnorm1_init = Gnorm1_new;
1246 if(Gnorm1_new <= eps*Gnorm1_init)
1248 if(active_size == l)
1259 Gmax_old = Gmax_new;
1262 info("\noptimization finished, #iter = %d\n", iter);
1263 if(iter >= max_iter)
1264 info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
1266 // calculate objective value
1269 for(i=0; i<w_size; i++)
1274 v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
1279 info("Objective value = %lf\n", v);
1280 info("nSV = %d\n",nSV);
1288 // A coordinate descent algorithm for
1289 // the dual of L2-regularized logistic regression problems
1291 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1292 // s.t. 0 <= \alpha_i <= upper_bound_i,
1294 // where Qij = yi yj xi^T xj and
1295 // upper_bound_i = Cp if y_i = 1
1296 // upper_bound_i = Cn if y_i = -1
1300 // eps is the stopping tolerance
1302 // solution will be put in w
1304 // See Algorithm 5 of Yu et al., MLJ 2010
1307 #define GETI(i) (y[i]+1)
1308 // To support weights for instances, use GETI(i) (i)
1310 void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
1313 int w_size = prob->n;
1315 double *xTx = new double[l];
1316 int max_iter = 1000;
1317 int *index = new int[l];
1318 double *alpha = new double[2*l]; // store alpha and C - alpha
1319 schar *y = new schar[l];
1320 int max_inner_iter = 100; // for inner Newton
1321 double innereps = 1e-2;
1322 double innereps_min = min(1e-8, eps);
1323 double upper_bound[3] = {Cn, 0, Cp};
1337 // Initial alpha can be set here. Note that
1338 // 0 < alpha[i] < upper_bound[GETI(i)]
1339 // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1342 alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
1343 alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1346 for(i=0; i<w_size; i++)
1350 feature_node * const xi = prob->x[i];
1351 xTx[i] = sparse_operator::nrm2_sq(xi);
1352 sparse_operator::axpy(y[i]*alpha[2*i], xi, w);
1356 while (iter < max_iter)
1360 int j = i+rand()%(l-i);
1361 swap(index[i], index[j]);
1363 int newton_iter = 0;
1368 const schar yi = y[i];
1369 double C = upper_bound[GETI(i)];
1370 double ywTx = 0, xisq = xTx[i];
1371 feature_node * const xi = prob->x[i];
1372 ywTx = yi*sparse_operator::dot(w, xi);
1373 double a = xisq, b = ywTx;
1375 // Decide to minimize g_1(z) or g_2(z)
1376 int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1377 if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1384 // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1385 double alpha_old = alpha[ind1];
1386 double z = alpha_old;
1389 double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1390 Gmax = max(Gmax, fabs(gp));
1392 // Newton method on the sub-problem
1393 const double eta = 0.1; // xi in the paper
1395 while (inner_iter <= max_inner_iter)
1397 if(fabs(gp) < innereps)
1399 double gpp = a + C/(C-z)/z;
1400 double tmpz = z - gp/gpp;
1403 else // tmpz in (0, C)
1405 gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1410 if(inner_iter > 0) // update w
1414 sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w);
1425 if(newton_iter <= l/10)
1426 innereps = max(innereps_min, 0.1*innereps);
1430 info("\noptimization finished, #iter = %d\n",iter);
1431 if (iter >= max_iter)
1432 info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1434 // calculate objective value
1437 for(i=0; i<w_size; i++)
1441 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1442 - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1443 info("Objective value = %lf\n", v);
1451 // A coordinate descent algorithm for
1452 // L1-regularized L2-loss support vector classification
1454 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1458 // eps is the stopping tolerance
1460 // solution will be put in w
1462 // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1464 // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
1465 // must have been added to the original data. (see -B and -R option)
1468 #define GETI(i) (y[i]+1)
1469 // To support weights for instances, use GETI(i) (i)
1471 static void solve_l1r_l2_svc(
1472 problem *prob_col, double *w, double eps,
1473 double Cp, double Cn, int regularize_bias)
1475 int l = prob_col->l;
1476 int w_size = prob_col->n;
1478 int max_iter = 1000;
1479 int active_size = w_size;
1480 int max_num_linesearch = 20;
1482 double sigma = 0.01;
1483 double d, G_loss, G, H;
1484 double Gmax_old = INF;
1485 double Gmax_new, Gnorm1_new;
1486 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1487 double d_old, d_diff;
1488 double loss_old = 0, loss_new;
1489 double appxcond, cond;
1491 int *index = new int[w_size];
1492 schar *y = new schar[l];
1493 double *b = new double[l]; // b = 1-ywTx
1494 double *xj_sq = new double[w_size];
1497 double C[3] = {Cn,0,Cp};
1499 // Initial w can be set here.
1500 for(j=0; j<w_size; j++)
1506 if(prob_col->y[j] > 0)
1511 for(j=0; j<w_size; j++)
1516 while(x->index != -1)
1518 int ind = x->index-1;
1519 x->value *= y[ind]; // x->value stores yi*xij
1520 double val = x->value;
1522 xj_sq[j] += C[GETI(ind)]*val*val;
1527 while(iter < max_iter)
1532 for(j=0; j<active_size; j++)
1534 int i = j+rand()%(active_size-j);
1535 swap(index[i], index[j]);
1538 for(s=0; s<active_size; s++)
1545 while(x->index != -1)
1547 int ind = x->index-1;
1550 double val = x->value;
1551 double tmp = C[GETI(ind)]*val;
1552 G_loss -= tmp*b[ind];
1563 double violation = 0;
1564 double Gp = 0, Gn = 0;
1565 if(j == w_size-1 && regularize_bias == 0)
1566 violation = fabs(G);
1577 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1580 swap(index[s], index[active_size]);
1586 violation = fabs(Gp);
1588 violation = fabs(Gn);
1590 Gmax_new = max(Gmax_new, violation);
1591 Gnorm1_new += violation;
1593 // obtain Newton direction d
1594 if(j == w_size-1 && regularize_bias == 0)
1600 else if(Gn > H*w[j])
1606 if(fabs(d) < 1.0e-12)
1610 if(j == w_size-1 && regularize_bias == 0)
1613 delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1616 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1619 if(j == w_size-1 && regularize_bias == 0)
1620 cond = -sigma*delta;
1622 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1624 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1628 sparse_operator::axpy(d_diff, x, b);
1632 if(num_linesearch == 0)
1637 while(x->index != -1)
1639 int ind = x->index-1;
1641 loss_old += C[GETI(ind)]*b[ind]*b[ind];
1642 double b_new = b[ind] + d_diff*x->value;
1645 loss_new += C[GETI(ind)]*b_new*b_new;
1653 while(x->index != -1)
1655 int ind = x->index-1;
1656 double b_new = b[ind] + d_diff*x->value;
1659 loss_new += C[GETI(ind)]*b_new*b_new;
1664 cond = cond + loss_new - loss_old;
1677 // recompute b[] if line search takes too many steps
1678 if(num_linesearch >= max_num_linesearch)
1681 for(int i=0; i<l; i++)
1684 for(int i=0; i<w_size; i++)
1686 if(w[i]==0) continue;
1688 sparse_operator::axpy(-w[i], x, b);
1694 Gnorm1_init = Gnorm1_new;
1699 if(Gnorm1_new <= eps*Gnorm1_init)
1701 if(active_size == w_size)
1705 active_size = w_size;
1712 Gmax_old = Gmax_new;
1715 info("\noptimization finished, #iter = %d\n", iter);
1716 if(iter >= max_iter)
1717 info("\nWARNING: reaching max number of iterations\n");
1719 // calculate objective value
1723 for(j=0; j<w_size; j++)
1726 while(x->index != -1)
1728 x->value *= prob_col->y[x->index-1]; // restore x->value
1737 if (regularize_bias == 0)
1738 v -= fabs(w[w_size-1]);
1741 v += C[GETI(j)]*b[j]*b[j];
1743 info("Objective value = %lf\n", v);
1744 info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1752 // A coordinate descent algorithm for
1753 // L1-regularized logistic regression problems
1755 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1759 // eps is the stopping tolerance
1761 // solution will be put in w
1763 // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1765 // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
1766 // must have been added to the original data. (see -B and -R option)
1769 #define GETI(i) (y[i]+1)
1770 // To support weights for instances, use GETI(i) (i)
1772 static void solve_l1r_lr(
1773 const problem *prob_col, double *w, double eps,
1774 double Cp, double Cn, int regularize_bias)
1776 int l = prob_col->l;
1777 int w_size = prob_col->n;
1778 int j, s, newton_iter=0, iter=0;
1779 int max_newton_iter = 100;
1780 int max_iter = 1000;
1781 int max_num_linesearch = 20;
1786 double inner_eps = 1;
1787 double sigma = 0.01;
1788 double w_norm, w_norm_new;
1790 double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1791 double Gmax_old = INF;
1792 double Gmax_new, Gnorm1_new;
1793 double QP_Gmax_old = INF;
1794 double QP_Gmax_new, QP_Gnorm1_new;
1795 double delta, negsum_xTd, cond;
1797 int *index = new int[w_size];
1798 schar *y = new schar[l];
1799 double *Hdiag = new double[w_size];
1800 double *Grad = new double[w_size];
1801 double *wpd = new double[w_size];
1802 double *xjneg_sum = new double[w_size];
1803 double *xTd = new double[l];
1804 double *exp_wTx = new double[l];
1805 double *exp_wTx_new = new double[l];
1806 double *tau = new double[l];
1807 double *D = new double[l];
1810 double C[3] = {Cn,0,Cp};
1812 // Initial w can be set here.
1813 for(j=0; j<w_size; j++)
1818 if(prob_col->y[j] > 0)
1827 for(j=0; j<w_size; j++)
1829 w_norm += fabs(w[j]);
1834 while(x->index != -1)
1836 int ind = x->index-1;
1837 double val = x->value;
1838 exp_wTx[ind] += w[j]*val;
1840 xjneg_sum[j] += C[GETI(ind)]*val;
1844 if (regularize_bias == 0)
1845 w_norm -= fabs(w[w_size-1]);
1849 exp_wTx[j] = exp(exp_wTx[j]);
1850 double tau_tmp = 1/(1+exp_wTx[j]);
1851 tau[j] = C[GETI(j)]*tau_tmp;
1852 D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
1855 while(newton_iter < max_newton_iter)
1859 active_size = w_size;
1861 for(s=0; s<active_size; s++)
1869 while(x->index != -1)
1871 int ind = x->index-1;
1872 Hdiag[j] += x->value*x->value*D[ind];
1873 tmp += x->value*tau[ind];
1876 Grad[j] = -tmp + xjneg_sum[j];
1878 double violation = 0;
1879 if (j == w_size-1 && regularize_bias == 0)
1880 violation = fabs(Grad[j]);
1883 double Gp = Grad[j]+1;
1884 double Gn = Grad[j]-1;
1891 //outer-level shrinking
1892 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1895 swap(index[s], index[active_size]);
1901 violation = fabs(Gp);
1903 violation = fabs(Gn);
1905 Gmax_new = max(Gmax_new, violation);
1906 Gnorm1_new += violation;
1909 if(newton_iter == 0)
1910 Gnorm1_init = Gnorm1_new;
1912 if(Gnorm1_new <= eps*Gnorm1_init)
1917 QP_active_size = active_size;
1919 for(int i=0; i<l; i++)
1922 // optimize QP over wpd
1923 while(iter < max_iter)
1928 for(j=0; j<QP_active_size; j++)
1930 int i = j+rand()%(QP_active_size-j);
1931 swap(index[i], index[j]);
1934 for(s=0; s<QP_active_size; s++)
1940 G = Grad[j] + (wpd[j]-w[j])*nu;
1941 while(x->index != -1)
1943 int ind = x->index-1;
1944 G += x->value*D[ind]*xTd[ind];
1948 double violation = 0;
1949 if (j == w_size-1 && regularize_bias == 0)
1951 // bias term not shrunken
1952 violation = fabs(G);
1965 //inner-level shrinking
1966 else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1969 swap(index[s], index[QP_active_size]);
1975 violation = fabs(Gp);
1977 violation = fabs(Gn);
1979 // obtain solution of one-variable problem
1982 else if(Gn > H*wpd[j])
1987 QP_Gmax_new = max(QP_Gmax_new, violation);
1988 QP_Gnorm1_new += violation;
1990 if(fabs(z) < 1.0e-12)
1992 z = min(max(z,-10.0),10.0);
1997 sparse_operator::axpy(z, x, xTd);
2002 if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
2005 if(QP_active_size == active_size)
2007 //active set reactivation
2010 QP_active_size = active_size;
2016 QP_Gmax_old = QP_Gmax_new;
2019 if(iter >= max_iter)
2020 info("WARNING: reaching max number of inner iterations\n");
2024 for(j=0; j<w_size; j++)
2026 delta += Grad[j]*(wpd[j]-w[j]);
2028 w_norm_new += fabs(wpd[j]);
2030 if (regularize_bias == 0)
2031 w_norm_new -= fabs(wpd[w_size-1]);
2032 delta += (w_norm_new-w_norm);
2035 for(int i=0; i<l; i++)
2037 negsum_xTd += C[GETI(i)]*xTd[i];
2040 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
2042 cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
2044 for(int i=0; i<l; i++)
2046 double exp_xTd = exp(xTd[i]);
2047 exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
2048 cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
2053 w_norm = w_norm_new;
2054 for(j=0; j<w_size; j++)
2056 for(int i=0; i<l; i++)
2058 exp_wTx[i] = exp_wTx_new[i];
2059 double tau_tmp = 1/(1+exp_wTx[i]);
2060 tau[i] = C[GETI(i)]*tau_tmp;
2061 D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
2068 for(j=0; j<w_size; j++)
2070 wpd[j] = (w[j]+wpd[j])*0.5;
2072 w_norm_new += fabs(wpd[j]);
2074 if (regularize_bias == 0)
2075 w_norm_new -= fabs(wpd[w_size-1]);
2078 for(int i=0; i<l; i++)
2083 // Recompute some info due to too many line search steps
2084 if(num_linesearch >= max_num_linesearch)
2086 for(int i=0; i<l; i++)
2089 for(int i=0; i<w_size; i++)
2091 if(w[i]==0) continue;
2093 sparse_operator::axpy(w[i], x, exp_wTx);
2096 for(int i=0; i<l; i++)
2097 exp_wTx[i] = exp(exp_wTx[i]);
2104 Gmax_old = Gmax_new;
2106 info("iter %3d #CD cycles %d\n", newton_iter, iter);
2109 info("=========================\n");
2110 info("optimization finished, #iter = %d\n", newton_iter);
2111 if(newton_iter >= max_newton_iter)
2112 info("WARNING: reaching max number of iterations\n");
2114 // calculate objective value
2118 for(j=0; j<w_size; j++)
2124 if (regularize_bias == 0)
2125 v -= fabs(w[w_size-1]);
2128 v += C[GETI(j)]*log(1+1/exp_wTx[j]);
2130 v += C[GETI(j)]*log(1+exp_wTx[j]);
2132 info("Objective value = %lf\n", v);
2133 info("#nonzeros/#features = %d/%d\n", nnz, w_size);
2140 delete [] xjneg_sum;
2143 delete [] exp_wTx_new;
2149 enum HEAP_TYPE { MIN, MAX };
2154 heap(int max_size, HEAP_TYPE type)
2157 a = new feature_node[max_size];
2164 bool cmp(const feature_node& left, const feature_node& right)
2167 return left.value > right.value;
2169 return left.value < right.value;
2175 void push(feature_node node)
2197 while(i*2+1 < _size)
2201 if(r < _size && cmp(a[l], a[r]))
2218 // A two-level coordinate descent algorithm for
2219 // a scaled one-class SVM dual problem
2221 // min_\alpha 0.5(\alpha^T Q \alpha),
2222 // s.t. 0 <= \alpha_i <= 1 and
2223 // e^T \alpha = \nu l
2225 // where Qij = xi^T xj
2229 // eps is the stopping tolerance
2231 // solution will be put in w and rho
2233 // See Algorithm 7 in supplementary materials of Chou et al., SDM 2020.
2235 static void solve_oneclass_svm(const problem *prob, double *w, double *rho, double eps, double nu)
2238 int w_size = prob->n;
2239 int i, j, s, iter = 0;
2241 double Qij, quad_coef, delta, sum;
2243 double *QD = new double[l];
2244 double *G = new double[l];
2245 int *index = new int[l];
2246 double *alpha = new double[l];
2248 int max_iter = 1000;
2249 int active_size = l;
2251 double negGmax; // max { -grad(f)_i | alpha_i < 1 }
2252 double negGmin; // min { -grad(f)_i | alpha_i > 0 }
2254 int *most_violating_i = new int[l];
2255 int *most_violating_j = new int[l];
2257 int n = (int)(nu*l); // # of alpha's at upper bound
2262 for(i=n+1; i<l; i++)
2265 for(i=0; i<w_size; i++)
2269 feature_node * const xi = prob->x[i];
2270 QD[i] = sparse_operator::nrm2_sq(xi);
2271 sparse_operator::axpy(alpha[i], xi, w);
2276 while (iter < max_iter)
2281 for (s=0; s<active_size; s++)
2284 feature_node * const xi = prob->x[i];
2285 G[i] = sparse_operator::dot(w, xi);
2287 negGmax = max(negGmax, -G[i]);
2289 negGmin = min(negGmin, -G[i]);
2292 if (negGmax - negGmin < eps)
2294 if (active_size == l)
2304 for(s=0; s<active_size; s++)
2307 if ((alpha[i] == 1 && -G[i] > negGmax) ||
2308 (alpha[i] == 0 && -G[i] < negGmin))
2311 swap(index[s], index[active_size]);
2316 max_inner_iter = max(active_size/10, 1);
2317 struct heap min_heap = heap(max_inner_iter, heap::MIN);
2318 struct heap max_heap = heap(max_inner_iter, heap::MAX);
2319 struct feature_node node;
2320 for(s=0; s<active_size; s++)
2328 if (min_heap.size() < max_inner_iter)
2329 min_heap.push(node);
2330 else if (min_heap.top().value < node.value)
2333 min_heap.push(node);
2339 if (max_heap.size() < max_inner_iter)
2340 max_heap.push(node);
2341 else if (max_heap.top().value > node.value)
2344 max_heap.push(node);
2348 max_inner_iter = min(min_heap.size(), max_heap.size());
2349 while (max_heap.size() > max_inner_iter)
2351 while (min_heap.size() > max_inner_iter)
2354 for (s=max_inner_iter-1; s>=0; s--)
2356 most_violating_i[s] = min_heap.top().index;
2357 most_violating_j[s] = max_heap.top().index;
2362 for (s=0; s<max_inner_iter; s++)
2364 i = most_violating_i[s];
2365 j = most_violating_j[s];
2367 if ((alpha[i] == 0 && alpha[j] == 0) ||
2368 (alpha[i] == 1 && alpha[j] == 1))
2371 feature_node const * xi = prob->x[i];
2372 feature_node const * xj = prob->x[j];
2374 Gi = sparse_operator::dot(w, xi);
2375 Gj = sparse_operator::dot(w, xj);
2377 int violating_pair = 0;
2378 if (alpha[i] < 1 && alpha[j] > 0 && -Gj + 1e-12 < -Gi)
2381 if (alpha[i] > 0 && alpha[j] < 1 && -Gi + 1e-12 < -Gj)
2383 if (violating_pair == 0)
2386 Qij = sparse_operator::sparse_dot(xi, xj);
2387 quad_coef = QD[i] + QD[j] - 2*Qij;
2390 delta = (Gi - Gj) / quad_coef;
2391 old_alpha_i = alpha[i];
2392 sum = alpha[i] + alpha[j];
2393 alpha[i] = alpha[i] - delta;
2394 alpha[j] = alpha[j] + delta;
2427 delta = alpha[i] - old_alpha_i;
2428 sparse_operator::axpy(delta, xi, w);
2429 sparse_operator::axpy(-delta, xj, w);
2435 info("\noptimization finished, #iter = %d\n",iter);
2436 if (iter >= max_iter)
2437 info("\nWARNING: reaching max number of iterations\n\n");
2439 // calculate object value
2441 for(i=0; i<w_size; i++)
2449 info("Objective value = %lf\n", v/2);
2450 info("nSV = %d\n", nSV);
2454 double ub = INF, lb = -INF, sum_free = 0;
2457 double G = sparse_operator::dot(w, prob->x[i]);
2460 else if (alpha[i] == 1)
2470 *rho = sum_free/nr_free;
2474 info("rho = %lf\n", *rho);
2480 delete [] most_violating_i;
2481 delete [] most_violating_j;
2484 // transpose matrix X from row format to column format
2485 static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
2491 size_t *col_ptr = new size_t [n+1];
2492 feature_node *x_space;
2495 prob_col->y = new double[l];
2496 prob_col->x = new feature_node*[n];
2499 prob_col->y[i] = prob->y[i];
2501 for(i=0; i<n+1; i++)
2505 feature_node *x = prob->x[i];
2506 while(x->index != -1)
2509 col_ptr[x->index]++;
2513 for(i=1; i<n+1; i++)
2514 col_ptr[i] += col_ptr[i-1] + 1;
2516 x_space = new feature_node[nnz+n];
2518 prob_col->x[i] = &x_space[col_ptr[i]];
2522 feature_node *x = prob->x[i];
2523 while(x->index != -1)
2525 int ind = x->index-1;
2526 x_space[col_ptr[ind]].index = i+1; // starts from 1
2527 x_space[col_ptr[ind]].value = x->value;
2533 x_space[col_ptr[i]].index = -1;
2535 *x_space_ret = x_space;
2540 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2541 // perm, length l, must be allocated before calling this subroutine
2542 static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2545 int max_nr_class = 16;
2547 int *label = Malloc(int,max_nr_class);
2548 int *count = Malloc(int,max_nr_class);
2549 int *data_label = Malloc(int,l);
2554 int this_label = (int)prob->y[i];
2556 for(j=0;j<nr_class;j++)
2558 if(this_label == label[j])
2567 if(nr_class == max_nr_class)
2570 label = (int *)realloc(label,max_nr_class*sizeof(int));
2571 count = (int *)realloc(count,max_nr_class*sizeof(int));
2573 label[nr_class] = this_label;
2574 count[nr_class] = 1;
2580 // Labels are ordered by their first occurrence in the training set.
2581 // However, for two-class sets with -1/+1 labels and -1 appears first,
2582 // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2584 if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2586 swap(label[0],label[1]);
2587 swap(count[0],count[1]);
2590 if(data_label[i] == 0)
2597 int *start = Malloc(int,nr_class);
2599 for(i=1;i<nr_class;i++)
2600 start[i] = start[i-1]+count[i-1];
2603 perm[start[data_label[i]]] = i;
2604 ++start[data_label[i]];
2607 for(i=1;i<nr_class;i++)
2608 start[i] = start[i-1]+count[i-1];
2610 *nr_class_ret = nr_class;
2617 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2619 double eps = param->eps;
2623 for(int i=0;i<prob->l;i++)
2626 neg = prob->l - pos;
2627 double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
2629 function *fun_obj=NULL;
2630 switch(param->solver_type)
2634 double *C = new double[prob->l];
2635 for(int i = 0; i < prob->l; i++)
2642 fun_obj=new l2r_lr_fun(prob, param, C);
2643 NEWTON newton_obj(fun_obj, primal_solver_tol);
2644 newton_obj.set_print_string(liblinear_print_string);
2645 newton_obj.newton(w);
2650 case L2R_L2LOSS_SVC:
2652 double *C = new double[prob->l];
2653 for(int i = 0; i < prob->l; i++)
2660 fun_obj=new l2r_l2_svc_fun(prob, param, C);
2661 NEWTON newton_obj(fun_obj, primal_solver_tol);
2662 newton_obj.set_print_string(liblinear_print_string);
2663 newton_obj.newton(w);
2668 case L2R_L2LOSS_SVC_DUAL:
2669 solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
2671 case L2R_L1LOSS_SVC_DUAL:
2672 solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
2674 case L1R_L2LOSS_SVC:
2677 feature_node *x_space = NULL;
2678 transpose(prob, &x_space ,&prob_col);
2679 solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias);
2680 delete [] prob_col.y;
2681 delete [] prob_col.x;
2688 feature_node *x_space = NULL;
2689 transpose(prob, &x_space ,&prob_col);
2690 solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias);
2691 delete [] prob_col.y;
2692 delete [] prob_col.x;
2697 solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
2699 case L2R_L2LOSS_SVR:
2701 double *C = new double[prob->l];
2702 for(int i = 0; i < prob->l; i++)
2705 fun_obj=new l2r_l2_svr_fun(prob, param, C);
2706 NEWTON newton_obj(fun_obj, param->eps);
2707 newton_obj.set_print_string(liblinear_print_string);
2708 newton_obj.newton(w);
2714 case L2R_L1LOSS_SVR_DUAL:
2715 solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL);
2717 case L2R_L2LOSS_SVR_DUAL:
2718 solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL);
2721 fprintf(stderr, "ERROR: unknown solver_type\n");
2726 // Calculate the initial C for parameter selection
2727 static double calc_start_C(const problem *prob, const parameter *param)
2730 double xTx, max_xTx;
2732 for(i=0; i<prob->l; i++)
2735 feature_node *xi=prob->x[i];
2736 while(xi->index != -1)
2738 double val = xi->value;
2747 if(param->solver_type == L2R_LR)
2748 min_C = 1.0 / (prob->l * max_xTx);
2749 else if(param->solver_type == L2R_L2LOSS_SVC)
2750 min_C = 1.0 / (2 * prob->l * max_xTx);
2751 else if(param->solver_type == L2R_L2LOSS_SVR)
2753 double sum_y, loss, y_abs;
2754 double delta2 = 0.1;
2755 sum_y = 0, loss = 0;
2756 for(i=0; i<prob->l; i++)
2758 y_abs = fabs(prob->y[i]);
2760 loss += max(y_abs - param->p, 0.0) * max(y_abs - param->p, 0.0);
2763 min_C = delta2 * delta2 * loss / (8 * sum_y * sum_y * max_xTx);
2768 return pow( 2, floor(log(min_C) / log(2.0)) );
2771 static double calc_max_p(const problem *prob, const parameter *param)
2775 for(i = 0; i < prob->l; i++)
2776 max_p = max(max_p, fabs(prob->y[i]));
2781 static void find_parameter_C(const problem *prob, parameter *param_tmp, double start_C, double max_C, double *best_C, double *best_score, const int *fold_start, const int *perm, const problem *subprob, int nr_fold)
2785 double *target = Malloc(double, prob->l);
2787 // variables for warm start
2789 double **prev_w = Malloc(double*, nr_fold);
2790 for(i = 0; i < nr_fold; i++)
2792 int num_unchanged_w = 0;
2793 void (*default_print_string) (const char *) = liblinear_print_string;
2795 if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
2797 else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
2801 param_tmp->C = start_C;
2802 while(param_tmp->C <= max_C)
2804 //Output disabled for running CV at a particular C
2805 set_print_string_function(&print_null);
2807 for(i=0; i<nr_fold; i++)
2810 int begin = fold_start[i];
2811 int end = fold_start[i+1];
2813 param_tmp->init_sol = prev_w[i];
2814 struct model *submodel = train(&subprob[i],param_tmp);
2817 if(submodel->nr_class == 2)
2818 total_w_size = subprob[i].n;
2820 total_w_size = subprob[i].n * submodel->nr_class;
2822 if(prev_w[i] == NULL)
2824 prev_w[i] = Malloc(double, total_w_size);
2825 for(j=0; j<total_w_size; j++)
2826 prev_w[i][j] = submodel->w[j];
2828 else if(num_unchanged_w >= 0)
2830 double norm_w_diff = 0;
2831 for(j=0; j<total_w_size; j++)
2833 norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2834 prev_w[i][j] = submodel->w[j];
2836 norm_w_diff = sqrt(norm_w_diff);
2838 if(norm_w_diff > 1e-15)
2839 num_unchanged_w = -1;
2843 for(j=0; j<total_w_size; j++)
2844 prev_w[i][j] = submodel->w[j];
2847 for(j=begin; j<end; j++)
2848 target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2850 free_and_destroy_model(&submodel);
2852 set_print_string_function(default_print_string);
2854 if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
2856 int total_correct = 0;
2857 for(i=0; i<prob->l; i++)
2858 if(target[i] == prob->y[i])
2860 double current_rate = (double)total_correct/prob->l;
2861 if(current_rate > *best_score)
2863 *best_C = param_tmp->C;
2864 *best_score = current_rate;
2867 info("log2c=%7.2f\trate=%g\n",log(param_tmp->C)/log(2.0),100.0*current_rate);
2869 else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
2871 double total_error = 0.0;
2872 for(i=0; i<prob->l; i++)
2874 double y = prob->y[i];
2875 double v = target[i];
2876 total_error += (v-y)*(v-y);
2878 double current_error = total_error/prob->l;
2879 if(current_error < *best_score)
2881 *best_C = param_tmp->C;
2882 *best_score = current_error;
2885 info("log2c=%7.2f\tp=%7.2f\tMean squared error=%g\n",log(param_tmp->C)/log(2.0),param_tmp->p,current_error);
2889 if(num_unchanged_w == 5)
2891 param_tmp->C = param_tmp->C*ratio;
2894 if(param_tmp->C > max_C)
2895 info("WARNING: maximum C reached.\n");
2897 for(i=0; i<nr_fold; i++)
2904 // Interface functions
2906 model* train(const problem *prob, const parameter *param)
2911 int w_size = prob->n;
2912 model *model_ = Malloc(model,1);
2915 model_->nr_feature=n-1;
2917 model_->nr_feature=n;
2918 model_->param = *param;
2919 model_->bias = prob->bias;
2921 if(check_regression_model(model_))
2923 model_->w = Malloc(double, w_size);
2925 if(param->init_sol != NULL)
2926 for(i=0;i<w_size;i++)
2927 model_->w[i] = param->init_sol[i];
2929 for(i=0;i<w_size;i++)
2932 model_->nr_class = 2;
2933 model_->label = NULL;
2934 train_one(prob, param, model_->w, 0, 0);
2936 else if(check_oneclass_model(model_))
2938 model_->w = Malloc(double, w_size);
2939 model_->nr_class = 2;
2940 model_->label = NULL;
2941 solve_oneclass_svm(prob, model_->w, &(model_->rho), param->eps, param->nu);
2949 int *perm = Malloc(int,l);
2951 // group training data of the same class
2952 group_classes(prob,&nr_class,&label,&start,&count,perm);
2954 model_->nr_class=nr_class;
2955 model_->label = Malloc(int,nr_class);
2956 for(i=0;i<nr_class;i++)
2957 model_->label[i] = label[i];
2959 // calculate weighted C
2960 double *weighted_C = Malloc(double, nr_class);
2961 for(i=0;i<nr_class;i++)
2962 weighted_C[i] = param->C;
2963 for(i=0;i<param->nr_weight;i++)
2965 for(j=0;j<nr_class;j++)
2966 if(param->weight_label[i] == label[j])
2969 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2971 weighted_C[j] *= param->weight[i];
2974 // constructing the subproblem
2975 feature_node **x = Malloc(feature_node *,l);
2977 x[i] = prob->x[perm[i]];
2983 sub_prob.x = Malloc(feature_node *,sub_prob.l);
2984 sub_prob.y = Malloc(double,sub_prob.l);
2986 for(k=0; k<sub_prob.l; k++)
2987 sub_prob.x[k] = x[k];
2989 // multi-class svm by Crammer and Singer
2990 if(param->solver_type == MCSVM_CS)
2992 model_->w=Malloc(double, n*nr_class);
2993 for(i=0;i<nr_class;i++)
2994 for(j=start[i];j<start[i]+count[i];j++)
2996 Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
2997 Solver.Solve(model_->w);
3003 model_->w=Malloc(double, w_size);
3005 int e0 = start[0]+count[0];
3009 for(; k<sub_prob.l; k++)
3012 if(param->init_sol != NULL)
3013 for(i=0;i<w_size;i++)
3014 model_->w[i] = param->init_sol[i];
3016 for(i=0;i<w_size;i++)
3019 train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
3023 model_->w=Malloc(double, w_size*nr_class);
3024 double *w=Malloc(double, w_size);
3025 for(i=0;i<nr_class;i++)
3028 int ei = si+count[i];
3035 for(; k<sub_prob.l; k++)
3038 if(param->init_sol != NULL)
3039 for(j=0;j<w_size;j++)
3040 w[j] = param->init_sol[j*nr_class+i];
3042 for(j=0;j<w_size;j++)
3045 train_one(&sub_prob, param, w, weighted_C[i], param->C);
3047 for(j=0;j<w_size;j++)
3048 model_->w[j*nr_class+i] = w[j];
3067 void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
3072 int *perm = Malloc(int,l);
3076 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
3078 fold_start = Malloc(int,nr_fold+1);
3079 for(i=0;i<l;i++) perm[i]=i;
3082 int j = i+rand()%(l-i);
3083 swap(perm[i],perm[j]);
3085 for(i=0;i<=nr_fold;i++)
3086 fold_start[i]=i*l/nr_fold;
3088 for(i=0;i<nr_fold;i++)
3090 int begin = fold_start[i];
3091 int end = fold_start[i+1];
3093 struct problem subprob;
3095 subprob.bias = prob->bias;
3096 subprob.n = prob->n;
3097 subprob.l = l-(end-begin);
3098 subprob.x = Malloc(struct feature_node*,subprob.l);
3099 subprob.y = Malloc(double,subprob.l);
3102 for(j=0;j<begin;j++)
3104 subprob.x[k] = prob->x[perm[j]];
3105 subprob.y[k] = prob->y[perm[j]];
3110 subprob.x[k] = prob->x[perm[j]];
3111 subprob.y[k] = prob->y[perm[j]];
3114 struct model *submodel = train(&subprob,param);
3115 for(j=begin;j<end;j++)
3116 target[perm[j]] = predict(submodel,prob->x[perm[j]]);
3117 free_and_destroy_model(&submodel);
3126 void find_parameters(const problem *prob, const parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score)
3133 int *perm = Malloc(int, l);
3134 struct problem *subprob = Malloc(problem,nr_fold);
3139 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
3141 fold_start = Malloc(int,nr_fold+1);
3142 for(i=0;i<l;i++) perm[i]=i;
3145 int j = i+rand()%(l-i);
3146 swap(perm[i],perm[j]);
3148 for(i=0;i<=nr_fold;i++)
3149 fold_start[i]=i*l/nr_fold;
3151 for(i=0;i<nr_fold;i++)
3153 int begin = fold_start[i];
3154 int end = fold_start[i+1];
3157 subprob[i].bias = prob->bias;
3158 subprob[i].n = prob->n;
3159 subprob[i].l = l-(end-begin);
3160 subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
3161 subprob[i].y = Malloc(double,subprob[i].l);
3164 for(j=0;j<begin;j++)
3166 subprob[i].x[k] = prob->x[perm[j]];
3167 subprob[i].y[k] = prob->y[perm[j]];
3172 subprob[i].x[k] = prob->x[perm[j]];
3173 subprob[i].y[k] = prob->y[perm[j]];
3179 struct parameter param_tmp = *param;
3181 if(param->solver_type == L2R_LR || param->solver_type == L2R_L2LOSS_SVC)
3184 start_C = calc_start_C(prob, ¶m_tmp);
3185 double max_C = 1024;
3186 start_C = min(start_C, max_C);
3187 double best_C_tmp, best_score_tmp;
3189 find_parameter_C(prob, ¶m_tmp, start_C, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
3191 *best_C = best_C_tmp;
3192 *best_score = best_score_tmp;
3194 else if(param->solver_type == L2R_L2LOSS_SVR)
3196 double max_p = calc_max_p(prob, ¶m_tmp);
3197 int num_p_steps = 20;
3198 double max_C = 1048576;
3203 i = min((int)(start_p/(max_p/num_p_steps)), i);
3206 param_tmp.p = i*max_p/num_p_steps;
3209 start_C_tmp = calc_start_C(prob, ¶m_tmp);
3211 start_C_tmp = start_C;
3212 start_C_tmp = min(start_C_tmp, max_C);
3213 double best_C_tmp, best_score_tmp;
3215 find_parameter_C(prob, ¶m_tmp, start_C_tmp, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
3217 if(best_score_tmp < *best_score)
3219 *best_p = param_tmp.p;
3220 *best_C = best_C_tmp;
3221 *best_score = best_score_tmp;
3228 for(i=0; i<nr_fold; i++)
3236 double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
3241 n=model_->nr_feature+1;
3243 n=model_->nr_feature;
3244 double *w=model_->w;
3245 int nr_class=model_->nr_class;
3248 if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
3253 const feature_node *lx=x;
3256 for(; (idx=lx->index)!=-1; lx++)
3258 // the dimension of testing data may exceed that of training
3261 dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
3263 if(check_oneclass_model(model_))
3264 dec_values[0] -= model_->rho;
3268 if(check_regression_model(model_))
3269 return dec_values[0];
3270 else if(check_oneclass_model(model_))
3271 return (dec_values[0]>0)?1:-1;
3273 return (dec_values[0]>0)?model_->label[0]:model_->label[1];
3277 int dec_max_idx = 0;
3278 for(i=1;i<nr_class;i++)
3280 if(dec_values[i] > dec_values[dec_max_idx])
3283 return model_->label[dec_max_idx];
3287 double predict(const model *model_, const feature_node *x)
3289 double *dec_values = Malloc(double, model_->nr_class);
3290 double label=predict_values(model_, x, dec_values);
3295 double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
3297 if(check_probability_model(model_))
3300 int nr_class=model_->nr_class;
3307 double label=predict_values(model_, x, prob_estimates);
3309 prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
3311 if(nr_class==2) // for binary classification
3312 prob_estimates[1]=1.-prob_estimates[0];
3316 for(i=0; i<nr_class; i++)
3317 sum+=prob_estimates[i];
3319 for(i=0; i<nr_class; i++)
3320 prob_estimates[i]=prob_estimates[i]/sum;
3329 static const char *solver_type_table[]=
3331 "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
3332 "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
3334 "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL",
3335 "", "", "", "", "", "", "",
3336 "ONECLASS_SVM", NULL
3339 int save_model(const char *model_file_name, const struct model *model_)
3342 int nr_feature=model_->nr_feature;
3344 const parameter& param = model_->param;
3351 FILE *fp = fopen(model_file_name,"w");
3352 if(fp==NULL) return -1;
3354 char *old_locale = setlocale(LC_ALL, NULL);
3357 old_locale = strdup(old_locale);
3359 setlocale(LC_ALL, "C");
3362 if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
3365 nr_w=model_->nr_class;
3367 fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
3368 fprintf(fp, "nr_class %d\n", model_->nr_class);
3372 fprintf(fp, "label");
3373 for(i=0; i<model_->nr_class; i++)
3374 fprintf(fp, " %d", model_->label[i]);
3378 fprintf(fp, "nr_feature %d\n", nr_feature);
3380 fprintf(fp, "bias %.17g\n", model_->bias);
3382 if(check_oneclass_model(model_))
3383 fprintf(fp, "rho %.17g\n", model_->rho);
3386 for(i=0; i<w_size; i++)
3389 for(j=0; j<nr_w; j++)
3390 fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
3394 setlocale(LC_ALL, old_locale);
3397 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
3402 // FSCANF helps to handle fscanf failures.
3403 // Its do-while block avoids the ambiguity when
3408 #define FSCANF(_stream, _format, _var)do\
3410 if (fscanf(_stream, _format, _var) != 1)\
3412 fprintf(stderr, "ERROR: fscanf failed to read the model\n");\
3416 // EXIT_LOAD_MODEL should NOT end with a semicolon.
3417 #define EXIT_LOAD_MODEL()\
3419 setlocale(LC_ALL, old_locale);\
3420 free(model_->label);\
3425 struct model *load_model(const char *model_file_name)
3427 FILE *fp = fopen(model_file_name,"r");
3428 if(fp==NULL) return NULL;
3436 model *model_ = Malloc(model,1);
3437 parameter& param = model_->param;
3438 // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
3439 param.nr_weight = 0;
3440 param.weight_label = NULL;
3441 param.weight = NULL;
3442 param.init_sol = NULL;
3444 model_->label = NULL;
3446 char *old_locale = setlocale(LC_ALL, NULL);
3449 old_locale = strdup(old_locale);
3451 setlocale(LC_ALL, "C");
3456 FSCANF(fp,"%80s",cmd);
3457 if(strcmp(cmd,"solver_type")==0)
3459 FSCANF(fp,"%80s",cmd);
3461 for(i=0;solver_type_table[i];i++)
3463 if(strcmp(solver_type_table[i],cmd)==0)
3465 param.solver_type=i;
3469 if(solver_type_table[i] == NULL)
3471 fprintf(stderr,"unknown solver type.\n");
3475 else if(strcmp(cmd,"nr_class")==0)
3477 FSCANF(fp,"%d",&nr_class);
3478 model_->nr_class=nr_class;
3480 else if(strcmp(cmd,"nr_feature")==0)
3482 FSCANF(fp,"%d",&nr_feature);
3483 model_->nr_feature=nr_feature;
3485 else if(strcmp(cmd,"bias")==0)
3487 FSCANF(fp,"%lf",&bias);
3490 else if(strcmp(cmd,"rho")==0)
3492 FSCANF(fp,"%lf",&rho);
3495 else if(strcmp(cmd,"w")==0)
3499 else if(strcmp(cmd,"label")==0)
3501 int nr_class = model_->nr_class;
3502 model_->label = Malloc(int,nr_class);
3503 for(int i=0;i<nr_class;i++)
3504 FSCANF(fp,"%d",&model_->label[i]);
3508 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
3513 nr_feature=model_->nr_feature;
3520 if(nr_class==2 && param.solver_type != MCSVM_CS)
3525 model_->w=Malloc(double, w_size*nr_w);
3526 for(i=0; i<w_size; i++)
3529 for(j=0; j<nr_w; j++)
3530 FSCANF(fp, "%lf ", &model_->w[i*nr_w+j]);
3533 setlocale(LC_ALL, old_locale);
3536 if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
3541 int get_nr_feature(const model *model_)
3543 return model_->nr_feature;
3546 int get_nr_class(const model *model_)
3548 return model_->nr_class;
3551 void get_labels(const model *model_, int* label)
3553 if (model_->label != NULL)
3554 for(int i=0;i<model_->nr_class;i++)
3555 label[i] = model_->label[i];
3558 // use inline here for better performance (around 20% faster than the non-inline one)
3559 static inline double get_w_value(const struct model *model_, int idx, int label_idx)
3561 int nr_class = model_->nr_class;
3562 int solver_type = model_->param.solver_type;
3563 const double *w = model_->w;
3565 if(idx < 0 || idx > model_->nr_feature)
3567 if(check_regression_model(model_) || check_oneclass_model(model_))
3571 if(label_idx < 0 || label_idx >= nr_class)
3573 if(nr_class == 2 && solver_type != MCSVM_CS)
3581 return w[idx*nr_class+label_idx];
3585 // feat_idx: starting from 1 to nr_feature
3586 // label_idx: starting from 0 to nr_class-1 for classification models;
3587 // for regression and one-class SVM models, label_idx is
3589 double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
3591 if(feat_idx > model_->nr_feature)
3593 return get_w_value(model_, feat_idx-1, label_idx);
3596 double get_decfun_bias(const struct model *model_, int label_idx)
3598 if(check_oneclass_model(model_))
3600 fprintf(stderr, "ERROR: get_decfun_bias can not be called for a one-class SVM model\n");
3603 int bias_idx = model_->nr_feature;
3604 double bias = model_->bias;
3608 return bias*get_w_value(model_, bias_idx, label_idx);
3611 double get_decfun_rho(const struct model *model_)
3613 if(check_oneclass_model(model_))
3617 fprintf(stderr, "ERROR: get_decfun_rho can be called only for a one-class SVM model\n");
3622 void free_model_content(struct model *model_ptr)
3624 if(model_ptr->w != NULL)
3626 if(model_ptr->label != NULL)
3627 free(model_ptr->label);
3630 void free_and_destroy_model(struct model **model_ptr_ptr)
3632 struct model *model_ptr = *model_ptr_ptr;
3633 if(model_ptr != NULL)
3635 free_model_content(model_ptr);
3640 void destroy_param(parameter* param)
3642 if(param->weight_label != NULL)
3643 free(param->weight_label);
3644 if(param->weight != NULL)
3645 free(param->weight);
3646 if(param->init_sol != NULL)
3647 free(param->init_sol);
3650 const char *check_parameter(const problem *prob, const parameter *param)
3661 if(prob->bias >= 0 && param->solver_type == ONECLASS_SVM)
3662 return "prob->bias >=0, but this is ignored in ONECLASS_SVM";
3664 if(param->regularize_bias == 0)
3666 if(prob->bias != 1.0)
3667 return "To not regularize bias, must specify -B 1 along with -R";
3668 if(param->solver_type != L2R_LR
3669 && param->solver_type != L2R_L2LOSS_SVC
3670 && param->solver_type != L1R_L2LOSS_SVC
3671 && param->solver_type != L1R_LR
3672 && param->solver_type != L2R_L2LOSS_SVR)
3673 return "-R option supported only for solver L2R_LR, L2R_L2LOSS_SVC, L1R_L2LOSS_SVC, L1R_LR, and L2R_L2LOSS_SVR";
3676 if(param->solver_type != L2R_LR
3677 && param->solver_type != L2R_L2LOSS_SVC_DUAL
3678 && param->solver_type != L2R_L2LOSS_SVC
3679 && param->solver_type != L2R_L1LOSS_SVC_DUAL
3680 && param->solver_type != MCSVM_CS
3681 && param->solver_type != L1R_L2LOSS_SVC
3682 && param->solver_type != L1R_LR
3683 && param->solver_type != L2R_LR_DUAL
3684 && param->solver_type != L2R_L2LOSS_SVR
3685 && param->solver_type != L2R_L2LOSS_SVR_DUAL
3686 && param->solver_type != L2R_L1LOSS_SVR_DUAL
3687 && param->solver_type != ONECLASS_SVM)
3688 return "unknown solver type";
3690 if(param->init_sol != NULL
3691 && param->solver_type != L2R_LR
3692 && param->solver_type != L2R_L2LOSS_SVC
3693 && param->solver_type != L2R_L2LOSS_SVR)
3694 return "Initial-solution specification supported only for solvers L2R_LR, L2R_L2LOSS_SVC, and L2R_L2LOSS_SVR";
3699 int check_probability_model(const struct model *model_)
3701 return (model_->param.solver_type==L2R_LR ||
3702 model_->param.solver_type==L2R_LR_DUAL ||
3703 model_->param.solver_type==L1R_LR);
3706 int check_regression_model(const struct model *model_)
3708 return (model_->param.solver_type==L2R_L2LOSS_SVR ||
3709 model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
3710 model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
3713 int check_oneclass_model(const struct model *model_)
3715 return model_->param.solver_type == ONECLASS_SVM;
3718 void set_print_string_function(void (*print_func)(const char*))
3720 if (print_func == NULL)
3721 liblinear_print_string = &print_string_stdout;
3723 liblinear_print_string = print_func;