]> granicus.if.org Git - liblinear/blob - linear.cpp
remove trailing space
[liblinear] / linear.cpp
1 #include <math.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <stdarg.h>
6 #include <locale.h>
7 #include "linear.h"
8 #include "tron.h"
9 int liblinear_version = LIBLINEAR_VERSION;
10 typedef signed char schar;
11 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
12 #ifndef min
13 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
14 #endif
15 #ifndef max
16 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
17 #endif
18 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
19 {
20         dst = new T[n];
21         memcpy((void *)dst,(void *)src,sizeof(T)*n);
22 }
23 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
24 #define INF HUGE_VAL
25
26 static void print_string_stdout(const char *s)
27 {
28         fputs(s,stdout);
29         fflush(stdout);
30 }
31 static void print_null(const char *s) {}
32
33 static void (*liblinear_print_string) (const char *) = &print_string_stdout;
34
35 #if 1
36 static void info(const char *fmt,...)
37 {
38         char buf[BUFSIZ];
39         va_list ap;
40         va_start(ap,fmt);
41         vsprintf(buf,fmt,ap);
42         va_end(ap);
43         (*liblinear_print_string)(buf);
44 }
45 #else
46 static void info(const char *fmt,...) {}
47 #endif
48 class sparse_operator
49 {
50 public:
51         static double nrm2_sq(const feature_node *x)
52         {
53                 double ret = 0;
54                 while(x->index != -1)
55                 {
56                         ret += x->value*x->value;
57                         x++;
58                 }
59                 return (ret);
60         }
61
62         static double dot(const double *s, const feature_node *x)
63         {
64                 double ret = 0;
65                 while(x->index != -1)
66                 {
67                         ret += s[x->index-1]*x->value;
68                         x++;
69                 }
70                 return (ret);
71         }
72
73         static void axpy(const double a, const feature_node *x, double *y)
74         {
75                 while(x->index != -1)
76                 {
77                         y[x->index-1] += a*x->value;
78                         x++;
79                 }
80         }
81 };
82
83 class l2r_lr_fun: public function
84 {
85 public:
86         l2r_lr_fun(const problem *prob, double *C);
87         ~l2r_lr_fun();
88
89         double fun(double *w);
90         void grad(double *w, double *g);
91         void Hv(double *s, double *Hs);
92
93         int get_nr_variable(void);
94         void get_diag_preconditioner(double *M);
95
96 private:
97         void Xv(double *v, double *Xv);
98         void XTv(double *v, double *XTv);
99
100         double *C;
101         double *z;
102         double *D;
103         const problem *prob;
104 };
105
106 l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C)
107 {
108         int l=prob->l;
109
110         this->prob = prob;
111
112         z = new double[l];
113         D = new double[l];
114         this->C = C;
115 }
116
117 l2r_lr_fun::~l2r_lr_fun()
118 {
119         delete[] z;
120         delete[] D;
121 }
122
123
124 double l2r_lr_fun::fun(double *w)
125 {
126         int i;
127         double f=0;
128         double *y=prob->y;
129         int l=prob->l;
130         int w_size=get_nr_variable();
131
132         Xv(w, z);
133
134         for(i=0;i<w_size;i++)
135                 f += w[i]*w[i];
136         f /= 2.0;
137         for(i=0;i<l;i++)
138         {
139                 double yz = y[i]*z[i];
140                 if (yz >= 0)
141                         f += C[i]*log(1 + exp(-yz));
142                 else
143                         f += C[i]*(-yz+log(1 + exp(yz)));
144         }
145
146         return(f);
147 }
148
149 void l2r_lr_fun::grad(double *w, double *g)
150 {
151         int i;
152         double *y=prob->y;
153         int l=prob->l;
154         int w_size=get_nr_variable();
155
156         for(i=0;i<l;i++)
157         {
158                 z[i] = 1/(1 + exp(-y[i]*z[i]));
159                 D[i] = z[i]*(1-z[i]);
160                 z[i] = C[i]*(z[i]-1)*y[i];
161         }
162         XTv(z, g);
163
164         for(i=0;i<w_size;i++)
165                 g[i] = w[i] + g[i];
166 }
167
168 int l2r_lr_fun::get_nr_variable(void)
169 {
170         return prob->n;
171 }
172
173 void l2r_lr_fun::get_diag_preconditioner(double *M)
174 {
175         int i;
176         int l = prob->l;
177         int w_size=get_nr_variable();
178         feature_node **x = prob->x;
179
180         for (i=0; i<w_size; i++)
181                 M[i] = 1;
182
183         for (i=0; i<l; i++)
184         {
185                 feature_node *s = x[i];
186                 while (s->index!=-1)
187                 {
188                         M[s->index-1] += s->value*s->value*C[i]*D[i];
189                         s++;
190                 }
191         }
192 }
193
194 void l2r_lr_fun::Hv(double *s, double *Hs)
195 {
196         int i;
197         int l=prob->l;
198         int w_size=get_nr_variable();
199         feature_node **x=prob->x;
200
201         for(i=0;i<w_size;i++)
202                 Hs[i] = 0;
203         for(i=0;i<l;i++)
204         {
205                 feature_node * const xi=x[i];
206                 double xTs = sparse_operator::dot(s, xi);
207
208                 xTs = C[i]*D[i]*xTs;
209
210                 sparse_operator::axpy(xTs, xi, Hs);
211         }
212         for(i=0;i<w_size;i++)
213                 Hs[i] = s[i] + Hs[i];
214 }
215
216 void l2r_lr_fun::Xv(double *v, double *Xv)
217 {
218         int i;
219         int l=prob->l;
220         feature_node **x=prob->x;
221
222         for(i=0;i<l;i++)
223                 Xv[i]=sparse_operator::dot(v, x[i]);
224 }
225
226 void l2r_lr_fun::XTv(double *v, double *XTv)
227 {
228         int i;
229         int l=prob->l;
230         int w_size=get_nr_variable();
231         feature_node **x=prob->x;
232
233         for(i=0;i<w_size;i++)
234                 XTv[i]=0;
235         for(i=0;i<l;i++)
236                 sparse_operator::axpy(v[i], x[i], XTv);
237 }
238
239 class l2r_l2_svc_fun: public function
240 {
241 public:
242         l2r_l2_svc_fun(const problem *prob, double *C);
243         ~l2r_l2_svc_fun();
244
245         double fun(double *w);
246         void grad(double *w, double *g);
247         void Hv(double *s, double *Hs);
248
249         int get_nr_variable(void);
250         void get_diag_preconditioner(double *M);
251
252 protected:
253         void Xv(double *v, double *Xv);
254         void subXTv(double *v, double *XTv);
255
256         double *C;
257         double *z;
258         int *I;
259         int sizeI;
260         const problem *prob;
261 };
262
263 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C)
264 {
265         int l=prob->l;
266
267         this->prob = prob;
268
269         z = new double[l];
270         I = new int[l];
271         this->C = C;
272 }
273
274 l2r_l2_svc_fun::~l2r_l2_svc_fun()
275 {
276         delete[] z;
277         delete[] I;
278 }
279
280 double l2r_l2_svc_fun::fun(double *w)
281 {
282         int i;
283         double f=0;
284         double *y=prob->y;
285         int l=prob->l;
286         int w_size=get_nr_variable();
287
288         Xv(w, z);
289
290         for(i=0;i<w_size;i++)
291                 f += w[i]*w[i];
292         f /= 2.0;
293         for(i=0;i<l;i++)
294         {
295                 z[i] = y[i]*z[i];
296                 double d = 1-z[i];
297                 if (d > 0)
298                         f += C[i]*d*d;
299         }
300
301         return(f);
302 }
303
304 void l2r_l2_svc_fun::grad(double *w, double *g)
305 {
306         int i;
307         double *y=prob->y;
308         int l=prob->l;
309         int w_size=get_nr_variable();
310
311         sizeI = 0;
312         for (i=0;i<l;i++)
313                 if (z[i] < 1)
314                 {
315                         z[sizeI] = C[i]*y[i]*(z[i]-1);
316                         I[sizeI] = i;
317                         sizeI++;
318                 }
319         subXTv(z, g);
320
321         for(i=0;i<w_size;i++)
322                 g[i] = w[i] + 2*g[i];
323 }
324
325 int l2r_l2_svc_fun::get_nr_variable(void)
326 {
327         return prob->n;
328 }
329
330 void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
331 {
332         int i;
333         int w_size=get_nr_variable();
334         feature_node **x = prob->x;
335
336         for (i=0; i<w_size; i++)
337                 M[i] = 1;
338
339         for (i=0; i<sizeI; i++)
340         {
341                 int idx = I[i];
342                 feature_node *s = x[idx];
343                 while (s->index!=-1)
344                 {
345                         M[s->index-1] += s->value*s->value*C[idx]*2;
346                         s++;
347                 }
348         }
349 }
350
351 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
352 {
353         int i;
354         int w_size=get_nr_variable();
355         feature_node **x=prob->x;
356
357         for(i=0;i<w_size;i++)
358                 Hs[i]=0;
359         for(i=0;i<sizeI;i++)
360         {
361                 feature_node * const xi=x[I[i]];
362                 double xTs = sparse_operator::dot(s, xi);
363
364                 xTs = C[I[i]]*xTs;
365
366                 sparse_operator::axpy(xTs, xi, Hs);
367         }
368         for(i=0;i<w_size;i++)
369                 Hs[i] = s[i] + 2*Hs[i];
370 }
371
372 void l2r_l2_svc_fun::Xv(double *v, double *Xv)
373 {
374         int i;
375         int l=prob->l;
376         feature_node **x=prob->x;
377
378         for(i=0;i<l;i++)
379                 Xv[i]=sparse_operator::dot(v, x[i]);
380 }
381
382 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
383 {
384         int i;
385         int w_size=get_nr_variable();
386         feature_node **x=prob->x;
387
388         for(i=0;i<w_size;i++)
389                 XTv[i]=0;
390         for(i=0;i<sizeI;i++)
391                 sparse_operator::axpy(v[i], x[I[i]], XTv);
392 }
393
394 class l2r_l2_svr_fun: public l2r_l2_svc_fun
395 {
396 public:
397         l2r_l2_svr_fun(const problem *prob, double *C, double p);
398
399         double fun(double *w);
400         void grad(double *w, double *g);
401
402 private:
403         double p;
404 };
405
406 l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *C, double p):
407         l2r_l2_svc_fun(prob, C)
408 {
409         this->p = p;
410 }
411
412 double l2r_l2_svr_fun::fun(double *w)
413 {
414         int i;
415         double f=0;
416         double *y=prob->y;
417         int l=prob->l;
418         int w_size=get_nr_variable();
419         double d;
420
421         Xv(w, z);
422
423         for(i=0;i<w_size;i++)
424                 f += w[i]*w[i];
425         f /= 2;
426         for(i=0;i<l;i++)
427         {
428                 d = z[i] - y[i];
429                 if(d < -p)
430                         f += C[i]*(d+p)*(d+p);
431                 else if(d > p)
432                         f += C[i]*(d-p)*(d-p);
433         }
434
435         return(f);
436 }
437
438 void l2r_l2_svr_fun::grad(double *w, double *g)
439 {
440         int i;
441         double *y=prob->y;
442         int l=prob->l;
443         int w_size=get_nr_variable();
444         double d;
445
446         sizeI = 0;
447         for(i=0;i<l;i++)
448         {
449                 d = z[i] - y[i];
450
451                 // generate index set I
452                 if(d < -p)
453                 {
454                         z[sizeI] = C[i]*(d+p);
455                         I[sizeI] = i;
456                         sizeI++;
457                 }
458                 else if(d > p)
459                 {
460                         z[sizeI] = C[i]*(d-p);
461                         I[sizeI] = i;
462                         sizeI++;
463                 }
464
465         }
466         subXTv(z, g);
467
468         for(i=0;i<w_size;i++)
469                 g[i] = w[i] + 2*g[i];
470 }
471
472 // A coordinate descent algorithm for
473 // multi-class support vector machines by Crammer and Singer
474 //
475 //  min_{\alpha}  0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
476 //    s.t.     \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
477 //
478 //  where e^m_i = 0 if y_i  = m,
479 //        e^m_i = 1 if y_i != m,
480 //  C^m_i = C if m  = y_i,
481 //  C^m_i = 0 if m != y_i,
482 //  and w_m(\alpha) = \sum_i \alpha^m_i x_i
483 //
484 // Given:
485 // x, y, C
486 // eps is the stopping tolerance
487 //
488 // solution will be put in w
489 //
490 // See Appendix of LIBLINEAR paper, Fan et al. (2008)
491
492 #define GETI(i) ((int) prob->y[i])
493 // To support weights for instances, use GETI(i) (i)
494
495 class Solver_MCSVM_CS
496 {
497         public:
498                 Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
499                 ~Solver_MCSVM_CS();
500                 void Solve(double *w);
501         private:
502                 void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
503                 bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
504                 double *B, *C, *G;
505                 int w_size, l;
506                 int nr_class;
507                 int max_iter;
508                 double eps;
509                 const problem *prob;
510 };
511
512 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
513 {
514         this->w_size = prob->n;
515         this->l = prob->l;
516         this->nr_class = nr_class;
517         this->eps = eps;
518         this->max_iter = max_iter;
519         this->prob = prob;
520         this->B = new double[nr_class];
521         this->G = new double[nr_class];
522         this->C = weighted_C;
523 }
524
525 Solver_MCSVM_CS::~Solver_MCSVM_CS()
526 {
527         delete[] B;
528         delete[] G;
529 }
530
531 int compare_double(const void *a, const void *b)
532 {
533         if(*(double *)a > *(double *)b)
534                 return -1;
535         if(*(double *)a < *(double *)b)
536                 return 1;
537         return 0;
538 }
539
540 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
541 {
542         int r;
543         double *D;
544
545         clone(D, B, active_i);
546         if(yi < active_i)
547                 D[yi] += A_i*C_yi;
548         qsort(D, active_i, sizeof(double), compare_double);
549
550         double beta = D[0] - A_i*C_yi;
551         for(r=1;r<active_i && beta<r*D[r];r++)
552                 beta += D[r];
553         beta /= r;
554
555         for(r=0;r<active_i;r++)
556         {
557                 if(r == yi)
558                         alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
559                 else
560                         alpha_new[r] = min((double)0, (beta - B[r])/A_i);
561         }
562         delete[] D;
563 }
564
565 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
566 {
567         double bound = 0;
568         if(m == yi)
569                 bound = C[GETI(i)];
570         if(alpha_i == bound && G[m] < minG)
571                 return true;
572         return false;
573 }
574
575 void Solver_MCSVM_CS::Solve(double *w)
576 {
577         int i, m, s;
578         int iter = 0;
579         double *alpha =  new double[l*nr_class];
580         double *alpha_new = new double[nr_class];
581         int *index = new int[l];
582         double *QD = new double[l];
583         int *d_ind = new int[nr_class];
584         double *d_val = new double[nr_class];
585         int *alpha_index = new int[nr_class*l];
586         int *y_index = new int[l];
587         int active_size = l;
588         int *active_size_i = new int[l];
589         double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
590         bool start_from_all = true;
591
592         // Initial alpha can be set here. Note that
593         // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
594         // alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
595         // alpha[i*nr_class+m] <= 0 if prob->y[i] != m
596         // If initial alpha isn't zero, uncomment the for loop below to initialize w
597         for(i=0;i<l*nr_class;i++)
598                 alpha[i] = 0;
599
600         for(i=0;i<w_size*nr_class;i++)
601                 w[i] = 0;
602         for(i=0;i<l;i++)
603         {
604                 for(m=0;m<nr_class;m++)
605                         alpha_index[i*nr_class+m] = m;
606                 feature_node *xi = prob->x[i];
607                 QD[i] = 0;
608                 while(xi->index != -1)
609                 {
610                         double val = xi->value;
611                         QD[i] += val*val;
612
613                         // Uncomment the for loop if initial alpha isn't zero
614                         // for(m=0; m<nr_class; m++)
615                         //      w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
616                         xi++;
617                 }
618                 active_size_i[i] = nr_class;
619                 y_index[i] = (int)prob->y[i];
620                 index[i] = i;
621         }
622
623         while(iter < max_iter)
624         {
625                 double stopping = -INF;
626                 for(i=0;i<active_size;i++)
627                 {
628                         int j = i+rand()%(active_size-i);
629                         swap(index[i], index[j]);
630                 }
631                 for(s=0;s<active_size;s++)
632                 {
633                         i = index[s];
634                         double Ai = QD[i];
635                         double *alpha_i = &alpha[i*nr_class];
636                         int *alpha_index_i = &alpha_index[i*nr_class];
637
638                         if(Ai > 0)
639                         {
640                                 for(m=0;m<active_size_i[i];m++)
641                                         G[m] = 1;
642                                 if(y_index[i] < active_size_i[i])
643                                         G[y_index[i]] = 0;
644
645                                 feature_node *xi = prob->x[i];
646                                 while(xi->index!= -1)
647                                 {
648                                         double *w_i = &w[(xi->index-1)*nr_class];
649                                         for(m=0;m<active_size_i[i];m++)
650                                                 G[m] += w_i[alpha_index_i[m]]*(xi->value);
651                                         xi++;
652                                 }
653
654                                 double minG = INF;
655                                 double maxG = -INF;
656                                 for(m=0;m<active_size_i[i];m++)
657                                 {
658                                         if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
659                                                 minG = G[m];
660                                         if(G[m] > maxG)
661                                                 maxG = G[m];
662                                 }
663                                 if(y_index[i] < active_size_i[i])
664                                         if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
665                                                 minG = G[y_index[i]];
666
667                                 for(m=0;m<active_size_i[i];m++)
668                                 {
669                                         if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
670                                         {
671                                                 active_size_i[i]--;
672                                                 while(active_size_i[i]>m)
673                                                 {
674                                                         if(!be_shrunk(i, active_size_i[i], y_index[i],
675                                                                                         alpha_i[alpha_index_i[active_size_i[i]]], minG))
676                                                         {
677                                                                 swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
678                                                                 swap(G[m], G[active_size_i[i]]);
679                                                                 if(y_index[i] == active_size_i[i])
680                                                                         y_index[i] = m;
681                                                                 else if(y_index[i] == m)
682                                                                         y_index[i] = active_size_i[i];
683                                                                 break;
684                                                         }
685                                                         active_size_i[i]--;
686                                                 }
687                                         }
688                                 }
689
690                                 if(active_size_i[i] <= 1)
691                                 {
692                                         active_size--;
693                                         swap(index[s], index[active_size]);
694                                         s--;
695                                         continue;
696                                 }
697
698                                 if(maxG-minG <= 1e-12)
699                                         continue;
700                                 else
701                                         stopping = max(maxG - minG, stopping);
702
703                                 for(m=0;m<active_size_i[i];m++)
704                                         B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
705
706                                 solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
707                                 int nz_d = 0;
708                                 for(m=0;m<active_size_i[i];m++)
709                                 {
710                                         double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
711                                         alpha_i[alpha_index_i[m]] = alpha_new[m];
712                                         if(fabs(d) >= 1e-12)
713                                         {
714                                                 d_ind[nz_d] = alpha_index_i[m];
715                                                 d_val[nz_d] = d;
716                                                 nz_d++;
717                                         }
718                                 }
719
720                                 xi = prob->x[i];
721                                 while(xi->index != -1)
722                                 {
723                                         double *w_i = &w[(xi->index-1)*nr_class];
724                                         for(m=0;m<nz_d;m++)
725                                                 w_i[d_ind[m]] += d_val[m]*xi->value;
726                                         xi++;
727                                 }
728                         }
729                 }
730
731                 iter++;
732                 if(iter % 10 == 0)
733                 {
734                         info(".");
735                 }
736
737                 if(stopping < eps_shrink)
738                 {
739                         if(stopping < eps && start_from_all == true)
740                                 break;
741                         else
742                         {
743                                 active_size = l;
744                                 for(i=0;i<l;i++)
745                                         active_size_i[i] = nr_class;
746                                 info("*");
747                                 eps_shrink = max(eps_shrink/2, eps);
748                                 start_from_all = true;
749                         }
750                 }
751                 else
752                         start_from_all = false;
753         }
754
755         info("\noptimization finished, #iter = %d\n",iter);
756         if (iter >= max_iter)
757                 info("\nWARNING: reaching max number of iterations\n");
758
759         // calculate objective value
760         double v = 0;
761         int nSV = 0;
762         for(i=0;i<w_size*nr_class;i++)
763                 v += w[i]*w[i];
764         v = 0.5*v;
765         for(i=0;i<l*nr_class;i++)
766         {
767                 v += alpha[i];
768                 if(fabs(alpha[i]) > 0)
769                         nSV++;
770         }
771         for(i=0;i<l;i++)
772                 v -= alpha[i*nr_class+(int)prob->y[i]];
773         info("Objective value = %lf\n",v);
774         info("nSV = %d\n",nSV);
775
776         delete [] alpha;
777         delete [] alpha_new;
778         delete [] index;
779         delete [] QD;
780         delete [] d_ind;
781         delete [] d_val;
782         delete [] alpha_index;
783         delete [] y_index;
784         delete [] active_size_i;
785 }
786
787 // A coordinate descent algorithm for
788 // L1-loss and L2-loss SVM dual problems
789 //
790 //  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
791 //    s.t.      0 <= \alpha_i <= upper_bound_i,
792 //
793 //  where Qij = yi yj xi^T xj and
794 //  D is a diagonal matrix
795 //
796 // In L1-SVM case:
797 //              upper_bound_i = Cp if y_i = 1
798 //              upper_bound_i = Cn if y_i = -1
799 //              D_ii = 0
800 // In L2-SVM case:
801 //              upper_bound_i = INF
802 //              D_ii = 1/(2*Cp) if y_i = 1
803 //              D_ii = 1/(2*Cn) if y_i = -1
804 //
805 // Given:
806 // x, y, Cp, Cn
807 // eps is the stopping tolerance
808 //
809 // solution will be put in w
810 //
811 // See Algorithm 3 of Hsieh et al., ICML 2008
812
813 #undef GETI
814 #define GETI(i) (y[i]+1)
815 // To support weights for instances, use GETI(i) (i)
816
817 static void solve_l2r_l1l2_svc(
818         const problem *prob, double *w, double eps,
819         double Cp, double Cn, int solver_type)
820 {
821         int l = prob->l;
822         int w_size = prob->n;
823         int i, s, iter = 0;
824         double C, d, G;
825         double *QD = new double[l];
826         int max_iter = 1000;
827         int *index = new int[l];
828         double *alpha = new double[l];
829         schar *y = new schar[l];
830         int active_size = l;
831
832         // PG: projected gradient, for shrinking and stopping
833         double PG;
834         double PGmax_old = INF;
835         double PGmin_old = -INF;
836         double PGmax_new, PGmin_new;
837
838         // default solver_type: L2R_L2LOSS_SVC_DUAL
839         double diag[3] = {0.5/Cn, 0, 0.5/Cp};
840         double upper_bound[3] = {INF, 0, INF};
841         if(solver_type == L2R_L1LOSS_SVC_DUAL)
842         {
843                 diag[0] = 0;
844                 diag[2] = 0;
845                 upper_bound[0] = Cn;
846                 upper_bound[2] = Cp;
847         }
848
849         for(i=0; i<l; i++)
850         {
851                 if(prob->y[i] > 0)
852                 {
853                         y[i] = +1;
854                 }
855                 else
856                 {
857                         y[i] = -1;
858                 }
859         }
860
861         // Initial alpha can be set here. Note that
862         // 0 <= alpha[i] <= upper_bound[GETI(i)]
863         for(i=0; i<l; i++)
864                 alpha[i] = 0;
865
866         for(i=0; i<w_size; i++)
867                 w[i] = 0;
868         for(i=0; i<l; i++)
869         {
870                 QD[i] = diag[GETI(i)];
871
872                 feature_node * const xi = prob->x[i];
873                 QD[i] += sparse_operator::nrm2_sq(xi);
874                 sparse_operator::axpy(y[i]*alpha[i], xi, w);
875
876                 index[i] = i;
877         }
878
879         while (iter < max_iter)
880         {
881                 PGmax_new = -INF;
882                 PGmin_new = INF;
883
884                 for (i=0; i<active_size; i++)
885                 {
886                         int j = i+rand()%(active_size-i);
887                         swap(index[i], index[j]);
888                 }
889
890                 for (s=0; s<active_size; s++)
891                 {
892                         i = index[s];
893                         const schar yi = y[i];
894                         feature_node * const xi = prob->x[i];
895
896                         G = yi*sparse_operator::dot(w, xi)-1;
897
898                         C = upper_bound[GETI(i)];
899                         G += alpha[i]*diag[GETI(i)];
900
901                         PG = 0;
902                         if (alpha[i] == 0)
903                         {
904                                 if (G > PGmax_old)
905                                 {
906                                         active_size--;
907                                         swap(index[s], index[active_size]);
908                                         s--;
909                                         continue;
910                                 }
911                                 else if (G < 0)
912                                         PG = G;
913                         }
914                         else if (alpha[i] == C)
915                         {
916                                 if (G < PGmin_old)
917                                 {
918                                         active_size--;
919                                         swap(index[s], index[active_size]);
920                                         s--;
921                                         continue;
922                                 }
923                                 else if (G > 0)
924                                         PG = G;
925                         }
926                         else
927                                 PG = G;
928
929                         PGmax_new = max(PGmax_new, PG);
930                         PGmin_new = min(PGmin_new, PG);
931
932                         if(fabs(PG) > 1.0e-12)
933                         {
934                                 double alpha_old = alpha[i];
935                                 alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
936                                 d = (alpha[i] - alpha_old)*yi;
937                                 sparse_operator::axpy(d, xi, w);
938                         }
939                 }
940
941                 iter++;
942                 if(iter % 10 == 0)
943                         info(".");
944
945                 if(PGmax_new - PGmin_new <= eps)
946                 {
947                         if(active_size == l)
948                                 break;
949                         else
950                         {
951                                 active_size = l;
952                                 info("*");
953                                 PGmax_old = INF;
954                                 PGmin_old = -INF;
955                                 continue;
956                         }
957                 }
958                 PGmax_old = PGmax_new;
959                 PGmin_old = PGmin_new;
960                 if (PGmax_old <= 0)
961                         PGmax_old = INF;
962                 if (PGmin_old >= 0)
963                         PGmin_old = -INF;
964         }
965
966         info("\noptimization finished, #iter = %d\n",iter);
967         if (iter >= max_iter)
968                 info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
969
970         // calculate objective value
971
972         double v = 0;
973         int nSV = 0;
974         for(i=0; i<w_size; i++)
975                 v += w[i]*w[i];
976         for(i=0; i<l; i++)
977         {
978                 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
979                 if(alpha[i] > 0)
980                         ++nSV;
981         }
982         info("Objective value = %lf\n",v/2);
983         info("nSV = %d\n",nSV);
984
985         delete [] QD;
986         delete [] alpha;
987         delete [] y;
988         delete [] index;
989 }
990
991
992 // A coordinate descent algorithm for
993 // L1-loss and L2-loss epsilon-SVR dual problem
994 //
995 //  min_\beta  0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
996 //    s.t.      -upper_bound_i <= \beta_i <= upper_bound_i,
997 //
998 //  where Qij = xi^T xj and
999 //  D is a diagonal matrix
1000 //
1001 // In L1-SVM case:
1002 //              upper_bound_i = C
1003 //              lambda_i = 0
1004 // In L2-SVM case:
1005 //              upper_bound_i = INF
1006 //              lambda_i = 1/(2*C)
1007 //
1008 // Given:
1009 // x, y, p, C
1010 // eps is the stopping tolerance
1011 //
1012 // solution will be put in w
1013 //
1014 // See Algorithm 4 of Ho and Lin, 2012
1015
1016 #undef GETI
1017 #define GETI(i) (0)
1018 // To support weights for instances, use GETI(i) (i)
1019
1020 static void solve_l2r_l1l2_svr(
1021         const problem *prob, double *w, const parameter *param,
1022         int solver_type)
1023 {
1024         int l = prob->l;
1025         double C = param->C;
1026         double p = param->p;
1027         int w_size = prob->n;
1028         double eps = param->eps;
1029         int i, s, iter = 0;
1030         int max_iter = 1000;
1031         int active_size = l;
1032         int *index = new int[l];
1033
1034         double d, G, H;
1035         double Gmax_old = INF;
1036         double Gmax_new, Gnorm1_new;
1037         double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1038         double *beta = new double[l];
1039         double *QD = new double[l];
1040         double *y = prob->y;
1041
1042         // L2R_L2LOSS_SVR_DUAL
1043         double lambda[1], upper_bound[1];
1044         lambda[0] = 0.5/C;
1045         upper_bound[0] = INF;
1046
1047         if(solver_type == L2R_L1LOSS_SVR_DUAL)
1048         {
1049                 lambda[0] = 0;
1050                 upper_bound[0] = C;
1051         }
1052
1053         // Initial beta can be set here. Note that
1054         // -upper_bound <= beta[i] <= upper_bound
1055         for(i=0; i<l; i++)
1056                 beta[i] = 0;
1057
1058         for(i=0; i<w_size; i++)
1059                 w[i] = 0;
1060         for(i=0; i<l; i++)
1061         {
1062                 feature_node * const xi = prob->x[i];
1063                 QD[i] = sparse_operator::nrm2_sq(xi);
1064                 sparse_operator::axpy(beta[i], xi, w);
1065
1066                 index[i] = i;
1067         }
1068
1069
1070         while(iter < max_iter)
1071         {
1072                 Gmax_new = 0;
1073                 Gnorm1_new = 0;
1074
1075                 for(i=0; i<active_size; i++)
1076                 {
1077                         int j = i+rand()%(active_size-i);
1078                         swap(index[i], index[j]);
1079                 }
1080
1081                 for(s=0; s<active_size; s++)
1082                 {
1083                         i = index[s];
1084                         G = -y[i] + lambda[GETI(i)]*beta[i];
1085                         H = QD[i] + lambda[GETI(i)];
1086
1087                         feature_node * const xi = prob->x[i];
1088                         G += sparse_operator::dot(w, xi);
1089
1090                         double Gp = G+p;
1091                         double Gn = G-p;
1092                         double violation = 0;
1093                         if(beta[i] == 0)
1094                         {
1095                                 if(Gp < 0)
1096                                         violation = -Gp;
1097                                 else if(Gn > 0)
1098                                         violation = Gn;
1099                                 else if(Gp>Gmax_old && Gn<-Gmax_old)
1100                                 {
1101                                         active_size--;
1102                                         swap(index[s], index[active_size]);
1103                                         s--;
1104                                         continue;
1105                                 }
1106                         }
1107                         else if(beta[i] >= upper_bound[GETI(i)])
1108                         {
1109                                 if(Gp > 0)
1110                                         violation = Gp;
1111                                 else if(Gp < -Gmax_old)
1112                                 {
1113                                         active_size--;
1114                                         swap(index[s], index[active_size]);
1115                                         s--;
1116                                         continue;
1117                                 }
1118                         }
1119                         else if(beta[i] <= -upper_bound[GETI(i)])
1120                         {
1121                                 if(Gn < 0)
1122                                         violation = -Gn;
1123                                 else if(Gn > Gmax_old)
1124                                 {
1125                                         active_size--;
1126                                         swap(index[s], index[active_size]);
1127                                         s--;
1128                                         continue;
1129                                 }
1130                         }
1131                         else if(beta[i] > 0)
1132                                 violation = fabs(Gp);
1133                         else
1134                                 violation = fabs(Gn);
1135
1136                         Gmax_new = max(Gmax_new, violation);
1137                         Gnorm1_new += violation;
1138
1139                         // obtain Newton direction d
1140                         if(Gp < H*beta[i])
1141                                 d = -Gp/H;
1142                         else if(Gn > H*beta[i])
1143                                 d = -Gn/H;
1144                         else
1145                                 d = -beta[i];
1146
1147                         if(fabs(d) < 1.0e-12)
1148                                 continue;
1149
1150                         double beta_old = beta[i];
1151                         beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
1152                         d = beta[i]-beta_old;
1153
1154                         if(d != 0)
1155                                 sparse_operator::axpy(d, xi, w);
1156                 }
1157
1158                 if(iter == 0)
1159                         Gnorm1_init = Gnorm1_new;
1160                 iter++;
1161                 if(iter % 10 == 0)
1162                         info(".");
1163
1164                 if(Gnorm1_new <= eps*Gnorm1_init)
1165                 {
1166                         if(active_size == l)
1167                                 break;
1168                         else
1169                         {
1170                                 active_size = l;
1171                                 info("*");
1172                                 Gmax_old = INF;
1173                                 continue;
1174                         }
1175                 }
1176
1177                 Gmax_old = Gmax_new;
1178         }
1179
1180         info("\noptimization finished, #iter = %d\n", iter);
1181         if(iter >= max_iter)
1182                 info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
1183
1184         // calculate objective value
1185         double v = 0;
1186         int nSV = 0;
1187         for(i=0; i<w_size; i++)
1188                 v += w[i]*w[i];
1189         v = 0.5*v;
1190         for(i=0; i<l; i++)
1191         {
1192                 v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
1193                 if(beta[i] != 0)
1194                         nSV++;
1195         }
1196
1197         info("Objective value = %lf\n", v);
1198         info("nSV = %d\n",nSV);
1199
1200         delete [] beta;
1201         delete [] QD;
1202         delete [] index;
1203 }
1204
1205
1206 // A coordinate descent algorithm for
1207 // the dual of L2-regularized logistic regression problems
1208 //
1209 //  min_\alpha  0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1210 //    s.t.      0 <= \alpha_i <= upper_bound_i,
1211 //
1212 //  where Qij = yi yj xi^T xj and
1213 //  upper_bound_i = Cp if y_i = 1
1214 //  upper_bound_i = Cn if y_i = -1
1215 //
1216 // Given:
1217 // x, y, Cp, Cn
1218 // eps is the stopping tolerance
1219 //
1220 // solution will be put in w
1221 //
1222 // See Algorithm 5 of Yu et al., MLJ 2010
1223
1224 #undef GETI
1225 #define GETI(i) (y[i]+1)
1226 // To support weights for instances, use GETI(i) (i)
1227
1228 void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
1229 {
1230         int l = prob->l;
1231         int w_size = prob->n;
1232         int i, s, iter = 0;
1233         double *xTx = new double[l];
1234         int max_iter = 1000;
1235         int *index = new int[l];
1236         double *alpha = new double[2*l]; // store alpha and C - alpha
1237         schar *y = new schar[l];
1238         int max_inner_iter = 100; // for inner Newton
1239         double innereps = 1e-2;
1240         double innereps_min = min(1e-8, eps);
1241         double upper_bound[3] = {Cn, 0, Cp};
1242
1243         for(i=0; i<l; i++)
1244         {
1245                 if(prob->y[i] > 0)
1246                 {
1247                         y[i] = +1;
1248                 }
1249                 else
1250                 {
1251                         y[i] = -1;
1252                 }
1253         }
1254
1255         // Initial alpha can be set here. Note that
1256         // 0 < alpha[i] < upper_bound[GETI(i)]
1257         // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1258         for(i=0; i<l; i++)
1259         {
1260                 alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
1261                 alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1262         }
1263
1264         for(i=0; i<w_size; i++)
1265                 w[i] = 0;
1266         for(i=0; i<l; i++)
1267         {
1268                 feature_node * const xi = prob->x[i];
1269                 xTx[i] = sparse_operator::nrm2_sq(xi);
1270                 sparse_operator::axpy(y[i]*alpha[2*i], xi, w);
1271                 index[i] = i;
1272         }
1273
1274         while (iter < max_iter)
1275         {
1276                 for (i=0; i<l; i++)
1277                 {
1278                         int j = i+rand()%(l-i);
1279                         swap(index[i], index[j]);
1280                 }
1281                 int newton_iter = 0;
1282                 double Gmax = 0;
1283                 for (s=0; s<l; s++)
1284                 {
1285                         i = index[s];
1286                         const schar yi = y[i];
1287                         double C = upper_bound[GETI(i)];
1288                         double ywTx = 0, xisq = xTx[i];
1289                         feature_node * const xi = prob->x[i];
1290                         ywTx = yi*sparse_operator::dot(w, xi);
1291                         double a = xisq, b = ywTx;
1292
1293                         // Decide to minimize g_1(z) or g_2(z)
1294                         int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1295                         if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1296                         {
1297                                 ind1 = 2*i+1;
1298                                 ind2 = 2*i;
1299                                 sign = -1;
1300                         }
1301
1302                         //  g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1303                         double alpha_old = alpha[ind1];
1304                         double z = alpha_old;
1305                         if(C - z < 0.5 * C)
1306                                 z = 0.1*z;
1307                         double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1308                         Gmax = max(Gmax, fabs(gp));
1309
1310                         // Newton method on the sub-problem
1311                         const double eta = 0.1; // xi in the paper
1312                         int inner_iter = 0;
1313                         while (inner_iter <= max_inner_iter)
1314                         {
1315                                 if(fabs(gp) < innereps)
1316                                         break;
1317                                 double gpp = a + C/(C-z)/z;
1318                                 double tmpz = z - gp/gpp;
1319                                 if(tmpz <= 0)
1320                                         z *= eta;
1321                                 else // tmpz in (0, C)
1322                                         z = tmpz;
1323                                 gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1324                                 newton_iter++;
1325                                 inner_iter++;
1326                         }
1327
1328                         if(inner_iter > 0) // update w
1329                         {
1330                                 alpha[ind1] = z;
1331                                 alpha[ind2] = C-z;
1332                                 sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w);
1333                         }
1334                 }
1335
1336                 iter++;
1337                 if(iter % 10 == 0)
1338                         info(".");
1339
1340                 if(Gmax < eps)
1341                         break;
1342
1343                 if(newton_iter <= l/10)
1344                         innereps = max(innereps_min, 0.1*innereps);
1345
1346         }
1347
1348         info("\noptimization finished, #iter = %d\n",iter);
1349         if (iter >= max_iter)
1350                 info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1351
1352         // calculate objective value
1353
1354         double v = 0;
1355         for(i=0; i<w_size; i++)
1356                 v += w[i] * w[i];
1357         v *= 0.5;
1358         for(i=0; i<l; i++)
1359                 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1360                         - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1361         info("Objective value = %lf\n", v);
1362
1363         delete [] xTx;
1364         delete [] alpha;
1365         delete [] y;
1366         delete [] index;
1367 }
1368
1369 // A coordinate descent algorithm for
1370 // L1-regularized L2-loss support vector classification
1371 //
1372 //  min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1373 //
1374 // Given:
1375 // x, y, Cp, Cn
1376 // eps is the stopping tolerance
1377 //
1378 // solution will be put in w
1379 //
1380 // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1381
1382 #undef GETI
1383 #define GETI(i) (y[i]+1)
1384 // To support weights for instances, use GETI(i) (i)
1385
1386 static void solve_l1r_l2_svc(
1387         problem *prob_col, double *w, double eps,
1388         double Cp, double Cn)
1389 {
1390         int l = prob_col->l;
1391         int w_size = prob_col->n;
1392         int j, s, iter = 0;
1393         int max_iter = 1000;
1394         int active_size = w_size;
1395         int max_num_linesearch = 20;
1396
1397         double sigma = 0.01;
1398         double d, G_loss, G, H;
1399         double Gmax_old = INF;
1400         double Gmax_new, Gnorm1_new;
1401         double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1402         double d_old, d_diff;
1403         double loss_old, loss_new;
1404         double appxcond, cond;
1405
1406         int *index = new int[w_size];
1407         schar *y = new schar[l];
1408         double *b = new double[l]; // b = 1-ywTx
1409         double *xj_sq = new double[w_size];
1410         feature_node *x;
1411
1412         double C[3] = {Cn,0,Cp};
1413
1414         // Initial w can be set here.
1415         for(j=0; j<w_size; j++)
1416                 w[j] = 0;
1417
1418         for(j=0; j<l; j++)
1419         {
1420                 b[j] = 1;
1421                 if(prob_col->y[j] > 0)
1422                         y[j] = 1;
1423                 else
1424                         y[j] = -1;
1425         }
1426         for(j=0; j<w_size; j++)
1427         {
1428                 index[j] = j;
1429                 xj_sq[j] = 0;
1430                 x = prob_col->x[j];
1431                 while(x->index != -1)
1432                 {
1433                         int ind = x->index-1;
1434                         x->value *= y[ind]; // x->value stores yi*xij
1435                         double val = x->value;
1436                         b[ind] -= w[j]*val;
1437                         xj_sq[j] += C[GETI(ind)]*val*val;
1438                         x++;
1439                 }
1440         }
1441
1442         while(iter < max_iter)
1443         {
1444                 Gmax_new = 0;
1445                 Gnorm1_new = 0;
1446
1447                 for(j=0; j<active_size; j++)
1448                 {
1449                         int i = j+rand()%(active_size-j);
1450                         swap(index[i], index[j]);
1451                 }
1452
1453                 for(s=0; s<active_size; s++)
1454                 {
1455                         j = index[s];
1456                         G_loss = 0;
1457                         H = 0;
1458
1459                         x = prob_col->x[j];
1460                         while(x->index != -1)
1461                         {
1462                                 int ind = x->index-1;
1463                                 if(b[ind] > 0)
1464                                 {
1465                                         double val = x->value;
1466                                         double tmp = C[GETI(ind)]*val;
1467                                         G_loss -= tmp*b[ind];
1468                                         H += tmp*val;
1469                                 }
1470                                 x++;
1471                         }
1472                         G_loss *= 2;
1473
1474                         G = G_loss;
1475                         H *= 2;
1476                         H = max(H, 1e-12);
1477
1478                         double Gp = G+1;
1479                         double Gn = G-1;
1480                         double violation = 0;
1481                         if(w[j] == 0)
1482                         {
1483                                 if(Gp < 0)
1484                                         violation = -Gp;
1485                                 else if(Gn > 0)
1486                                         violation = Gn;
1487                                 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1488                                 {
1489                                         active_size--;
1490                                         swap(index[s], index[active_size]);
1491                                         s--;
1492                                         continue;
1493                                 }
1494                         }
1495                         else if(w[j] > 0)
1496                                 violation = fabs(Gp);
1497                         else
1498                                 violation = fabs(Gn);
1499
1500                         Gmax_new = max(Gmax_new, violation);
1501                         Gnorm1_new += violation;
1502
1503                         // obtain Newton direction d
1504                         if(Gp < H*w[j])
1505                                 d = -Gp/H;
1506                         else if(Gn > H*w[j])
1507                                 d = -Gn/H;
1508                         else
1509                                 d = -w[j];
1510
1511                         if(fabs(d) < 1.0e-12)
1512                                 continue;
1513
1514                         double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1515                         d_old = 0;
1516                         int num_linesearch;
1517                         for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1518                         {
1519                                 d_diff = d_old - d;
1520                                 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1521
1522                                 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1523                                 if(appxcond <= 0)
1524                                 {
1525                                         x = prob_col->x[j];
1526                                         sparse_operator::axpy(d_diff, x, b);
1527                                         break;
1528                                 }
1529
1530                                 if(num_linesearch == 0)
1531                                 {
1532                                         loss_old = 0;
1533                                         loss_new = 0;
1534                                         x = prob_col->x[j];
1535                                         while(x->index != -1)
1536                                         {
1537                                                 int ind = x->index-1;
1538                                                 if(b[ind] > 0)
1539                                                         loss_old += C[GETI(ind)]*b[ind]*b[ind];
1540                                                 double b_new = b[ind] + d_diff*x->value;
1541                                                 b[ind] = b_new;
1542                                                 if(b_new > 0)
1543                                                         loss_new += C[GETI(ind)]*b_new*b_new;
1544                                                 x++;
1545                                         }
1546                                 }
1547                                 else
1548                                 {
1549                                         loss_new = 0;
1550                                         x = prob_col->x[j];
1551                                         while(x->index != -1)
1552                                         {
1553                                                 int ind = x->index-1;
1554                                                 double b_new = b[ind] + d_diff*x->value;
1555                                                 b[ind] = b_new;
1556                                                 if(b_new > 0)
1557                                                         loss_new += C[GETI(ind)]*b_new*b_new;
1558                                                 x++;
1559                                         }
1560                                 }
1561
1562                                 cond = cond + loss_new - loss_old;
1563                                 if(cond <= 0)
1564                                         break;
1565                                 else
1566                                 {
1567                                         d_old = d;
1568                                         d *= 0.5;
1569                                         delta *= 0.5;
1570                                 }
1571                         }
1572
1573                         w[j] += d;
1574
1575                         // recompute b[] if line search takes too many steps
1576                         if(num_linesearch >= max_num_linesearch)
1577                         {
1578                                 info("#");
1579                                 for(int i=0; i<l; i++)
1580                                         b[i] = 1;
1581
1582                                 for(int i=0; i<w_size; i++)
1583                                 {
1584                                         if(w[i]==0) continue;
1585                                         x = prob_col->x[i];
1586                                         sparse_operator::axpy(-w[i], x, b);
1587                                 }
1588                         }
1589                 }
1590
1591                 if(iter == 0)
1592                         Gnorm1_init = Gnorm1_new;
1593                 iter++;
1594                 if(iter % 10 == 0)
1595                         info(".");
1596
1597                 if(Gnorm1_new <= eps*Gnorm1_init)
1598                 {
1599                         if(active_size == w_size)
1600                                 break;
1601                         else
1602                         {
1603                                 active_size = w_size;
1604                                 info("*");
1605                                 Gmax_old = INF;
1606                                 continue;
1607                         }
1608                 }
1609
1610                 Gmax_old = Gmax_new;
1611         }
1612
1613         info("\noptimization finished, #iter = %d\n", iter);
1614         if(iter >= max_iter)
1615                 info("\nWARNING: reaching max number of iterations\n");
1616
1617         // calculate objective value
1618
1619         double v = 0;
1620         int nnz = 0;
1621         for(j=0; j<w_size; j++)
1622         {
1623                 x = prob_col->x[j];
1624                 while(x->index != -1)
1625                 {
1626                         x->value *= prob_col->y[x->index-1]; // restore x->value
1627                         x++;
1628                 }
1629                 if(w[j] != 0)
1630                 {
1631                         v += fabs(w[j]);
1632                         nnz++;
1633                 }
1634         }
1635         for(j=0; j<l; j++)
1636                 if(b[j] > 0)
1637                         v += C[GETI(j)]*b[j]*b[j];
1638
1639         info("Objective value = %lf\n", v);
1640         info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1641
1642         delete [] index;
1643         delete [] y;
1644         delete [] b;
1645         delete [] xj_sq;
1646 }
1647
1648 // A coordinate descent algorithm for
1649 // L1-regularized logistic regression problems
1650 //
1651 //  min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1652 //
1653 // Given:
1654 // x, y, Cp, Cn
1655 // eps is the stopping tolerance
1656 //
1657 // solution will be put in w
1658 //
1659 // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1660
1661 #undef GETI
1662 #define GETI(i) (y[i]+1)
1663 // To support weights for instances, use GETI(i) (i)
1664
1665 static void solve_l1r_lr(
1666         const problem *prob_col, double *w, double eps,
1667         double Cp, double Cn)
1668 {
1669         int l = prob_col->l;
1670         int w_size = prob_col->n;
1671         int j, s, newton_iter=0, iter=0;
1672         int max_newton_iter = 100;
1673         int max_iter = 1000;
1674         int max_num_linesearch = 20;
1675         int active_size;
1676         int QP_active_size;
1677
1678         double nu = 1e-12;
1679         double inner_eps = 1;
1680         double sigma = 0.01;
1681         double w_norm, w_norm_new;
1682         double z, G, H;
1683         double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1684         double Gmax_old = INF;
1685         double Gmax_new, Gnorm1_new;
1686         double QP_Gmax_old = INF;
1687         double QP_Gmax_new, QP_Gnorm1_new;
1688         double delta, negsum_xTd, cond;
1689
1690         int *index = new int[w_size];
1691         schar *y = new schar[l];
1692         double *Hdiag = new double[w_size];
1693         double *Grad = new double[w_size];
1694         double *wpd = new double[w_size];
1695         double *xjneg_sum = new double[w_size];
1696         double *xTd = new double[l];
1697         double *exp_wTx = new double[l];
1698         double *exp_wTx_new = new double[l];
1699         double *tau = new double[l];
1700         double *D = new double[l];
1701         feature_node *x;
1702
1703         double C[3] = {Cn,0,Cp};
1704
1705         // Initial w can be set here.
1706         for(j=0; j<w_size; j++)
1707                 w[j] = 0;
1708
1709         for(j=0; j<l; j++)
1710         {
1711                 if(prob_col->y[j] > 0)
1712                         y[j] = 1;
1713                 else
1714                         y[j] = -1;
1715
1716                 exp_wTx[j] = 0;
1717         }
1718
1719         w_norm = 0;
1720         for(j=0; j<w_size; j++)
1721         {
1722                 w_norm += fabs(w[j]);
1723                 wpd[j] = w[j];
1724                 index[j] = j;
1725                 xjneg_sum[j] = 0;
1726                 x = prob_col->x[j];
1727                 while(x->index != -1)
1728                 {
1729                         int ind = x->index-1;
1730                         double val = x->value;
1731                         exp_wTx[ind] += w[j]*val;
1732                         if(y[ind] == -1)
1733                                 xjneg_sum[j] += C[GETI(ind)]*val;
1734                         x++;
1735                 }
1736         }
1737         for(j=0; j<l; j++)
1738         {
1739                 exp_wTx[j] = exp(exp_wTx[j]);
1740                 double tau_tmp = 1/(1+exp_wTx[j]);
1741                 tau[j] = C[GETI(j)]*tau_tmp;
1742                 D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
1743         }
1744
1745         while(newton_iter < max_newton_iter)
1746         {
1747                 Gmax_new = 0;
1748                 Gnorm1_new = 0;
1749                 active_size = w_size;
1750
1751                 for(s=0; s<active_size; s++)
1752                 {
1753                         j = index[s];
1754                         Hdiag[j] = nu;
1755                         Grad[j] = 0;
1756
1757                         double tmp = 0;
1758                         x = prob_col->x[j];
1759                         while(x->index != -1)
1760                         {
1761                                 int ind = x->index-1;
1762                                 Hdiag[j] += x->value*x->value*D[ind];
1763                                 tmp += x->value*tau[ind];
1764                                 x++;
1765                         }
1766                         Grad[j] = -tmp + xjneg_sum[j];
1767
1768                         double Gp = Grad[j]+1;
1769                         double Gn = Grad[j]-1;
1770                         double violation = 0;
1771                         if(w[j] == 0)
1772                         {
1773                                 if(Gp < 0)
1774                                         violation = -Gp;
1775                                 else if(Gn > 0)
1776                                         violation = Gn;
1777                                 //outer-level shrinking
1778                                 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1779                                 {
1780                                         active_size--;
1781                                         swap(index[s], index[active_size]);
1782                                         s--;
1783                                         continue;
1784                                 }
1785                         }
1786                         else if(w[j] > 0)
1787                                 violation = fabs(Gp);
1788                         else
1789                                 violation = fabs(Gn);
1790
1791                         Gmax_new = max(Gmax_new, violation);
1792                         Gnorm1_new += violation;
1793                 }
1794
1795                 if(newton_iter == 0)
1796                         Gnorm1_init = Gnorm1_new;
1797
1798                 if(Gnorm1_new <= eps*Gnorm1_init)
1799                         break;
1800
1801                 iter = 0;
1802                 QP_Gmax_old = INF;
1803                 QP_active_size = active_size;
1804
1805                 for(int i=0; i<l; i++)
1806                         xTd[i] = 0;
1807
1808                 // optimize QP over wpd
1809                 while(iter < max_iter)
1810                 {
1811                         QP_Gmax_new = 0;
1812                         QP_Gnorm1_new = 0;
1813
1814                         for(j=0; j<QP_active_size; j++)
1815                         {
1816                                 int i = j+rand()%(QP_active_size-j);
1817                                 swap(index[i], index[j]);
1818                         }
1819
1820                         for(s=0; s<QP_active_size; s++)
1821                         {
1822                                 j = index[s];
1823                                 H = Hdiag[j];
1824
1825                                 x = prob_col->x[j];
1826                                 G = Grad[j] + (wpd[j]-w[j])*nu;
1827                                 while(x->index != -1)
1828                                 {
1829                                         int ind = x->index-1;
1830                                         G += x->value*D[ind]*xTd[ind];
1831                                         x++;
1832                                 }
1833
1834                                 double Gp = G+1;
1835                                 double Gn = G-1;
1836                                 double violation = 0;
1837                                 if(wpd[j] == 0)
1838                                 {
1839                                         if(Gp < 0)
1840                                                 violation = -Gp;
1841                                         else if(Gn > 0)
1842                                                 violation = Gn;
1843                                         //inner-level shrinking
1844                                         else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1845                                         {
1846                                                 QP_active_size--;
1847                                                 swap(index[s], index[QP_active_size]);
1848                                                 s--;
1849                                                 continue;
1850                                         }
1851                                 }
1852                                 else if(wpd[j] > 0)
1853                                         violation = fabs(Gp);
1854                                 else
1855                                         violation = fabs(Gn);
1856
1857                                 QP_Gmax_new = max(QP_Gmax_new, violation);
1858                                 QP_Gnorm1_new += violation;
1859
1860                                 // obtain solution of one-variable problem
1861                                 if(Gp < H*wpd[j])
1862                                         z = -Gp/H;
1863                                 else if(Gn > H*wpd[j])
1864                                         z = -Gn/H;
1865                                 else
1866                                         z = -wpd[j];
1867
1868                                 if(fabs(z) < 1.0e-12)
1869                                         continue;
1870                                 z = min(max(z,-10.0),10.0);
1871
1872                                 wpd[j] += z;
1873
1874                                 x = prob_col->x[j];
1875                                 sparse_operator::axpy(z, x, xTd);
1876                         }
1877
1878                         iter++;
1879
1880                         if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
1881                         {
1882                                 //inner stopping
1883                                 if(QP_active_size == active_size)
1884                                         break;
1885                                 //active set reactivation
1886                                 else
1887                                 {
1888                                         QP_active_size = active_size;
1889                                         QP_Gmax_old = INF;
1890                                         continue;
1891                                 }
1892                         }
1893
1894                         QP_Gmax_old = QP_Gmax_new;
1895                 }
1896
1897                 if(iter >= max_iter)
1898                         info("WARNING: reaching max number of inner iterations\n");
1899
1900                 delta = 0;
1901                 w_norm_new = 0;
1902                 for(j=0; j<w_size; j++)
1903                 {
1904                         delta += Grad[j]*(wpd[j]-w[j]);
1905                         if(wpd[j] != 0)
1906                                 w_norm_new += fabs(wpd[j]);
1907                 }
1908                 delta += (w_norm_new-w_norm);
1909
1910                 negsum_xTd = 0;
1911                 for(int i=0; i<l; i++)
1912                         if(y[i] == -1)
1913                                 negsum_xTd += C[GETI(i)]*xTd[i];
1914
1915                 int num_linesearch;
1916                 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1917                 {
1918                         cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
1919
1920                         for(int i=0; i<l; i++)
1921                         {
1922                                 double exp_xTd = exp(xTd[i]);
1923                                 exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
1924                                 cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
1925                         }
1926
1927                         if(cond <= 0)
1928                         {
1929                                 w_norm = w_norm_new;
1930                                 for(j=0; j<w_size; j++)
1931                                         w[j] = wpd[j];
1932                                 for(int i=0; i<l; i++)
1933                                 {
1934                                         exp_wTx[i] = exp_wTx_new[i];
1935                                         double tau_tmp = 1/(1+exp_wTx[i]);
1936                                         tau[i] = C[GETI(i)]*tau_tmp;
1937                                         D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
1938                                 }
1939                                 break;
1940                         }
1941                         else
1942                         {
1943                                 w_norm_new = 0;
1944                                 for(j=0; j<w_size; j++)
1945                                 {
1946                                         wpd[j] = (w[j]+wpd[j])*0.5;
1947                                         if(wpd[j] != 0)
1948                                                 w_norm_new += fabs(wpd[j]);
1949                                 }
1950                                 delta *= 0.5;
1951                                 negsum_xTd *= 0.5;
1952                                 for(int i=0; i<l; i++)
1953                                         xTd[i] *= 0.5;
1954                         }
1955                 }
1956
1957                 // Recompute some info due to too many line search steps
1958                 if(num_linesearch >= max_num_linesearch)
1959                 {
1960                         for(int i=0; i<l; i++)
1961                                 exp_wTx[i] = 0;
1962
1963                         for(int i=0; i<w_size; i++)
1964                         {
1965                                 if(w[i]==0) continue;
1966                                 x = prob_col->x[i];
1967                                 sparse_operator::axpy(w[i], x, exp_wTx);
1968                         }
1969
1970                         for(int i=0; i<l; i++)
1971                                 exp_wTx[i] = exp(exp_wTx[i]);
1972                 }
1973
1974                 if(iter == 1)
1975                         inner_eps *= 0.25;
1976
1977                 newton_iter++;
1978                 Gmax_old = Gmax_new;
1979
1980                 info("iter %3d  #CD cycles %d\n", newton_iter, iter);
1981         }
1982
1983         info("=========================\n");
1984         info("optimization finished, #iter = %d\n", newton_iter);
1985         if(newton_iter >= max_newton_iter)
1986                 info("WARNING: reaching max number of iterations\n");
1987
1988         // calculate objective value
1989
1990         double v = 0;
1991         int nnz = 0;
1992         for(j=0; j<w_size; j++)
1993                 if(w[j] != 0)
1994                 {
1995                         v += fabs(w[j]);
1996                         nnz++;
1997                 }
1998         for(j=0; j<l; j++)
1999                 if(y[j] == 1)
2000                         v += C[GETI(j)]*log(1+1/exp_wTx[j]);
2001                 else
2002                         v += C[GETI(j)]*log(1+exp_wTx[j]);
2003
2004         info("Objective value = %lf\n", v);
2005         info("#nonzeros/#features = %d/%d\n", nnz, w_size);
2006
2007         delete [] index;
2008         delete [] y;
2009         delete [] Hdiag;
2010         delete [] Grad;
2011         delete [] wpd;
2012         delete [] xjneg_sum;
2013         delete [] xTd;
2014         delete [] exp_wTx;
2015         delete [] exp_wTx_new;
2016         delete [] tau;
2017         delete [] D;
2018 }
2019
2020 // transpose matrix X from row format to column format
2021 static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
2022 {
2023         int i;
2024         int l = prob->l;
2025         int n = prob->n;
2026         size_t nnz = 0;
2027         size_t *col_ptr = new size_t [n+1];
2028         feature_node *x_space;
2029         prob_col->l = l;
2030         prob_col->n = n;
2031         prob_col->y = new double[l];
2032         prob_col->x = new feature_node*[n];
2033
2034         for(i=0; i<l; i++)
2035                 prob_col->y[i] = prob->y[i];
2036
2037         for(i=0; i<n+1; i++)
2038                 col_ptr[i] = 0;
2039         for(i=0; i<l; i++)
2040         {
2041                 feature_node *x = prob->x[i];
2042                 while(x->index != -1)
2043                 {
2044                         nnz++;
2045                         col_ptr[x->index]++;
2046                         x++;
2047                 }
2048         }
2049         for(i=1; i<n+1; i++)
2050                 col_ptr[i] += col_ptr[i-1] + 1;
2051
2052         x_space = new feature_node[nnz+n];
2053         for(i=0; i<n; i++)
2054                 prob_col->x[i] = &x_space[col_ptr[i]];
2055
2056         for(i=0; i<l; i++)
2057         {
2058                 feature_node *x = prob->x[i];
2059                 while(x->index != -1)
2060                 {
2061                         int ind = x->index-1;
2062                         x_space[col_ptr[ind]].index = i+1; // starts from 1
2063                         x_space[col_ptr[ind]].value = x->value;
2064                         col_ptr[ind]++;
2065                         x++;
2066                 }
2067         }
2068         for(i=0; i<n; i++)
2069                 x_space[col_ptr[i]].index = -1;
2070
2071         *x_space_ret = x_space;
2072
2073         delete [] col_ptr;
2074 }
2075
2076 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2077 // perm, length l, must be allocated before calling this subroutine
2078 static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2079 {
2080         int l = prob->l;
2081         int max_nr_class = 16;
2082         int nr_class = 0;
2083         int *label = Malloc(int,max_nr_class);
2084         int *count = Malloc(int,max_nr_class);
2085         int *data_label = Malloc(int,l);
2086         int i;
2087
2088         for(i=0;i<l;i++)
2089         {
2090                 int this_label = (int)prob->y[i];
2091                 int j;
2092                 for(j=0;j<nr_class;j++)
2093                 {
2094                         if(this_label == label[j])
2095                         {
2096                                 ++count[j];
2097                                 break;
2098                         }
2099                 }
2100                 data_label[i] = j;
2101                 if(j == nr_class)
2102                 {
2103                         if(nr_class == max_nr_class)
2104                         {
2105                                 max_nr_class *= 2;
2106                                 label = (int *)realloc(label,max_nr_class*sizeof(int));
2107                                 count = (int *)realloc(count,max_nr_class*sizeof(int));
2108                         }
2109                         label[nr_class] = this_label;
2110                         count[nr_class] = 1;
2111                         ++nr_class;
2112                 }
2113         }
2114
2115         //
2116         // Labels are ordered by their first occurrence in the training set.
2117         // However, for two-class sets with -1/+1 labels and -1 appears first,
2118         // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2119         //
2120         if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2121         {
2122                 swap(label[0],label[1]);
2123                 swap(count[0],count[1]);
2124                 for(i=0;i<l;i++)
2125                 {
2126                         if(data_label[i] == 0)
2127                                 data_label[i] = 1;
2128                         else
2129                                 data_label[i] = 0;
2130                 }
2131         }
2132
2133         int *start = Malloc(int,nr_class);
2134         start[0] = 0;
2135         for(i=1;i<nr_class;i++)
2136                 start[i] = start[i-1]+count[i-1];
2137         for(i=0;i<l;i++)
2138         {
2139                 perm[start[data_label[i]]] = i;
2140                 ++start[data_label[i]];
2141         }
2142         start[0] = 0;
2143         for(i=1;i<nr_class;i++)
2144                 start[i] = start[i-1]+count[i-1];
2145
2146         *nr_class_ret = nr_class;
2147         *label_ret = label;
2148         *start_ret = start;
2149         *count_ret = count;
2150         free(data_label);
2151 }
2152
2153 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2154 {
2155         //inner and outer tolerances for TRON
2156         double eps = param->eps;
2157         double eps_cg = 0.1;
2158         if(param->init_sol != NULL)
2159                 eps_cg = 0.5;
2160
2161         int pos = 0;
2162         int neg = 0;
2163         for(int i=0;i<prob->l;i++)
2164                 if(prob->y[i] > 0)
2165                         pos++;
2166         neg = prob->l - pos;
2167         double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
2168
2169         function *fun_obj=NULL;
2170         switch(param->solver_type)
2171         {
2172                 case L2R_LR:
2173                 {
2174                         double *C = new double[prob->l];
2175                         for(int i = 0; i < prob->l; i++)
2176                         {
2177                                 if(prob->y[i] > 0)
2178                                         C[i] = Cp;
2179                                 else
2180                                         C[i] = Cn;
2181                         }
2182                         fun_obj=new l2r_lr_fun(prob, C);
2183                         TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2184                         tron_obj.set_print_string(liblinear_print_string);
2185                         tron_obj.tron(w);
2186                         delete fun_obj;
2187                         delete[] C;
2188                         break;
2189                 }
2190                 case L2R_L2LOSS_SVC:
2191                 {
2192                         double *C = new double[prob->l];
2193                         for(int i = 0; i < prob->l; i++)
2194                         {
2195                                 if(prob->y[i] > 0)
2196                                         C[i] = Cp;
2197                                 else
2198                                         C[i] = Cn;
2199                         }
2200                         fun_obj=new l2r_l2_svc_fun(prob, C);
2201                         TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2202                         tron_obj.set_print_string(liblinear_print_string);
2203                         tron_obj.tron(w);
2204                         delete fun_obj;
2205                         delete[] C;
2206                         break;
2207                 }
2208                 case L2R_L2LOSS_SVC_DUAL:
2209                         solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
2210                         break;
2211                 case L2R_L1LOSS_SVC_DUAL:
2212                         solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
2213                         break;
2214                 case L1R_L2LOSS_SVC:
2215                 {
2216                         problem prob_col;
2217                         feature_node *x_space = NULL;
2218                         transpose(prob, &x_space ,&prob_col);
2219                         solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn);
2220                         delete [] prob_col.y;
2221                         delete [] prob_col.x;
2222                         delete [] x_space;
2223                         break;
2224                 }
2225                 case L1R_LR:
2226                 {
2227                         problem prob_col;
2228                         feature_node *x_space = NULL;
2229                         transpose(prob, &x_space ,&prob_col);
2230                         solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn);
2231                         delete [] prob_col.y;
2232                         delete [] prob_col.x;
2233                         delete [] x_space;
2234                         break;
2235                 }
2236                 case L2R_LR_DUAL:
2237                         solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
2238                         break;
2239                 case L2R_L2LOSS_SVR:
2240                 {
2241                         double *C = new double[prob->l];
2242                         for(int i = 0; i < prob->l; i++)
2243                                 C[i] = param->C;
2244
2245                         fun_obj=new l2r_l2_svr_fun(prob, C, param->p);
2246                         TRON tron_obj(fun_obj, param->eps);
2247                         tron_obj.set_print_string(liblinear_print_string);
2248                         tron_obj.tron(w);
2249                         delete fun_obj;
2250                         delete[] C;
2251                         break;
2252
2253                 }
2254                 case L2R_L1LOSS_SVR_DUAL:
2255                         solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL);
2256                         break;
2257                 case L2R_L2LOSS_SVR_DUAL:
2258                         solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL);
2259                         break;
2260                 default:
2261                         fprintf(stderr, "ERROR: unknown solver_type\n");
2262                         break;
2263         }
2264 }
2265
2266 // Calculate the initial C for parameter selection
2267 static double calc_start_C(const problem *prob, const parameter *param)
2268 {
2269         int i;
2270         double xTx,max_xTx;
2271         max_xTx = 0;
2272         for(i=0; i<prob->l; i++)
2273         {
2274                 xTx = 0;
2275                 feature_node *xi=prob->x[i];
2276                 while(xi->index != -1)
2277                 {
2278                         double val = xi->value;
2279                         xTx += val*val;
2280                         xi++;
2281                 }
2282                 if(xTx > max_xTx)
2283                         max_xTx = xTx;
2284         }
2285
2286         double min_C = 1.0;
2287         if(param->solver_type == L2R_LR)
2288                 min_C = 1.0 / (prob->l * max_xTx);
2289         else if(param->solver_type == L2R_L2LOSS_SVC)
2290                 min_C = 1.0 / (2 * prob->l * max_xTx);
2291
2292         return pow( 2, floor(log(min_C) / log(2.0)) );
2293 }
2294
2295
2296 //
2297 // Interface functions
2298 //
2299 model* train(const problem *prob, const parameter *param)
2300 {
2301         int i,j;
2302         int l = prob->l;
2303         int n = prob->n;
2304         int w_size = prob->n;
2305         model *model_ = Malloc(model,1);
2306
2307         if(prob->bias>=0)
2308                 model_->nr_feature=n-1;
2309         else
2310                 model_->nr_feature=n;
2311         model_->param = *param;
2312         model_->bias = prob->bias;
2313
2314         if(check_regression_model(model_))
2315         {
2316                 model_->w = Malloc(double, w_size);
2317                 for(i=0; i<w_size; i++)
2318                         model_->w[i] = 0;
2319                 model_->nr_class = 2;
2320                 model_->label = NULL;
2321                 train_one(prob, param, model_->w, 0, 0);
2322         }
2323         else
2324         {
2325                 int nr_class;
2326                 int *label = NULL;
2327                 int *start = NULL;
2328                 int *count = NULL;
2329                 int *perm = Malloc(int,l);
2330
2331                 // group training data of the same class
2332                 group_classes(prob,&nr_class,&label,&start,&count,perm);
2333
2334                 model_->nr_class=nr_class;
2335                 model_->label = Malloc(int,nr_class);
2336                 for(i=0;i<nr_class;i++)
2337                         model_->label[i] = label[i];
2338
2339                 // calculate weighted C
2340                 double *weighted_C = Malloc(double, nr_class);
2341                 for(i=0;i<nr_class;i++)
2342                         weighted_C[i] = param->C;
2343                 for(i=0;i<param->nr_weight;i++)
2344                 {
2345                         for(j=0;j<nr_class;j++)
2346                                 if(param->weight_label[i] == label[j])
2347                                         break;
2348                         if(j == nr_class)
2349                                 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2350                         else
2351                                 weighted_C[j] *= param->weight[i];
2352                 }
2353
2354                 // constructing the subproblem
2355                 feature_node **x = Malloc(feature_node *,l);
2356                 for(i=0;i<l;i++)
2357                         x[i] = prob->x[perm[i]];
2358
2359                 int k;
2360                 problem sub_prob;
2361                 sub_prob.l = l;
2362                 sub_prob.n = n;
2363                 sub_prob.x = Malloc(feature_node *,sub_prob.l);
2364                 sub_prob.y = Malloc(double,sub_prob.l);
2365
2366                 for(k=0; k<sub_prob.l; k++)
2367                         sub_prob.x[k] = x[k];
2368
2369                 // multi-class svm by Crammer and Singer
2370                 if(param->solver_type == MCSVM_CS)
2371                 {
2372                         model_->w=Malloc(double, n*nr_class);
2373                         for(i=0;i<nr_class;i++)
2374                                 for(j=start[i];j<start[i]+count[i];j++)
2375                                         sub_prob.y[j] = i;
2376                         Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
2377                         Solver.Solve(model_->w);
2378                 }
2379                 else
2380                 {
2381                         if(nr_class == 2)
2382                         {
2383                                 model_->w=Malloc(double, w_size);
2384
2385                                 int e0 = start[0]+count[0];
2386                                 k=0;
2387                                 for(; k<e0; k++)
2388                                         sub_prob.y[k] = +1;
2389                                 for(; k<sub_prob.l; k++)
2390                                         sub_prob.y[k] = -1;
2391
2392                                 if(param->init_sol != NULL)
2393                                         for(i=0;i<w_size;i++)
2394                                                 model_->w[i] = param->init_sol[i];
2395                                 else
2396                                         for(i=0;i<w_size;i++)
2397                                                 model_->w[i] = 0;
2398
2399                                 train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
2400                         }
2401                         else
2402                         {
2403                                 model_->w=Malloc(double, w_size*nr_class);
2404                                 double *w=Malloc(double, w_size);
2405                                 for(i=0;i<nr_class;i++)
2406                                 {
2407                                         int si = start[i];
2408                                         int ei = si+count[i];
2409
2410                                         k=0;
2411                                         for(; k<si; k++)
2412                                                 sub_prob.y[k] = -1;
2413                                         for(; k<ei; k++)
2414                                                 sub_prob.y[k] = +1;
2415                                         for(; k<sub_prob.l; k++)
2416                                                 sub_prob.y[k] = -1;
2417
2418                                         if(param->init_sol != NULL)
2419                                                 for(j=0;j<w_size;j++)
2420                                                         w[j] = param->init_sol[j*nr_class+i];
2421                                         else
2422                                                 for(j=0;j<w_size;j++)
2423                                                         w[j] = 0;
2424
2425                                         train_one(&sub_prob, param, w, weighted_C[i], param->C);
2426
2427                                         for(j=0;j<w_size;j++)
2428                                                 model_->w[j*nr_class+i] = w[j];
2429                                 }
2430                                 free(w);
2431                         }
2432
2433                 }
2434
2435                 free(x);
2436                 free(label);
2437                 free(start);
2438                 free(count);
2439                 free(perm);
2440                 free(sub_prob.x);
2441                 free(sub_prob.y);
2442                 free(weighted_C);
2443         }
2444         return model_;
2445 }
2446
2447 void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
2448 {
2449         int i;
2450         int *fold_start;
2451         int l = prob->l;
2452         int *perm = Malloc(int,l);
2453         if (nr_fold > l)
2454         {
2455                 nr_fold = l;
2456                 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2457         }
2458         fold_start = Malloc(int,nr_fold+1);
2459         for(i=0;i<l;i++) perm[i]=i;
2460         for(i=0;i<l;i++)
2461         {
2462                 int j = i+rand()%(l-i);
2463                 swap(perm[i],perm[j]);
2464         }
2465         for(i=0;i<=nr_fold;i++)
2466                 fold_start[i]=i*l/nr_fold;
2467
2468         for(i=0;i<nr_fold;i++)
2469         {
2470                 int begin = fold_start[i];
2471                 int end = fold_start[i+1];
2472                 int j,k;
2473                 struct problem subprob;
2474
2475                 subprob.bias = prob->bias;
2476                 subprob.n = prob->n;
2477                 subprob.l = l-(end-begin);
2478                 subprob.x = Malloc(struct feature_node*,subprob.l);
2479                 subprob.y = Malloc(double,subprob.l);
2480
2481                 k=0;
2482                 for(j=0;j<begin;j++)
2483                 {
2484                         subprob.x[k] = prob->x[perm[j]];
2485                         subprob.y[k] = prob->y[perm[j]];
2486                         ++k;
2487                 }
2488                 for(j=end;j<l;j++)
2489                 {
2490                         subprob.x[k] = prob->x[perm[j]];
2491                         subprob.y[k] = prob->y[perm[j]];
2492                         ++k;
2493                 }
2494                 struct model *submodel = train(&subprob,param);
2495                 for(j=begin;j<end;j++)
2496                         target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2497                 free_and_destroy_model(&submodel);
2498                 free(subprob.x);
2499                 free(subprob.y);
2500         }
2501         free(fold_start);
2502         free(perm);
2503 }
2504
2505 void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
2506 {
2507         // variables for CV
2508         int i;
2509         int *fold_start;
2510         int l = prob->l;
2511         int *perm = Malloc(int, l);
2512         double *target = Malloc(double, prob->l);
2513         struct problem *subprob = Malloc(problem,nr_fold);
2514
2515         // variables for warm start
2516         double ratio = 2;
2517         double **prev_w = Malloc(double*, nr_fold);
2518         for(i = 0; i < nr_fold; i++)
2519                 prev_w[i] = NULL;
2520         int num_unchanged_w = 0;
2521         struct parameter param1 = *param;
2522         void (*default_print_string) (const char *) = liblinear_print_string;
2523
2524         if (nr_fold > l)
2525         {
2526                 nr_fold = l;
2527                 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2528         }
2529         fold_start = Malloc(int,nr_fold+1);
2530         for(i=0;i<l;i++) perm[i]=i;
2531         for(i=0;i<l;i++)
2532         {
2533                 int j = i+rand()%(l-i);
2534                 swap(perm[i],perm[j]);
2535         }
2536         for(i=0;i<=nr_fold;i++)
2537                 fold_start[i]=i*l/nr_fold;
2538
2539         for(i=0;i<nr_fold;i++)
2540         {
2541                 int begin = fold_start[i];
2542                 int end = fold_start[i+1];
2543                 int j,k;
2544
2545                 subprob[i].bias = prob->bias;
2546                 subprob[i].n = prob->n;
2547                 subprob[i].l = l-(end-begin);
2548                 subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
2549                 subprob[i].y = Malloc(double,subprob[i].l);
2550
2551                 k=0;
2552                 for(j=0;j<begin;j++)
2553                 {
2554                         subprob[i].x[k] = prob->x[perm[j]];
2555                         subprob[i].y[k] = prob->y[perm[j]];
2556                         ++k;
2557                 }
2558                 for(j=end;j<l;j++)
2559                 {
2560                         subprob[i].x[k] = prob->x[perm[j]];
2561                         subprob[i].y[k] = prob->y[perm[j]];
2562                         ++k;
2563                 }
2564
2565         }
2566
2567         *best_rate = 0;
2568         if(start_C <= 0)
2569                 start_C = calc_start_C(prob,param);
2570         param1.C = start_C;
2571
2572         while(param1.C <= max_C)
2573         {
2574                 //Output disabled for running CV at a particular C
2575                 set_print_string_function(&print_null);
2576
2577                 for(i=0; i<nr_fold; i++)
2578                 {
2579                         int j;
2580                         int begin = fold_start[i];
2581                         int end = fold_start[i+1];
2582
2583                         param1.init_sol = prev_w[i];
2584                         struct model *submodel = train(&subprob[i],&param1);
2585
2586                         int total_w_size;
2587                         if(submodel->nr_class == 2)
2588                                 total_w_size = subprob[i].n;
2589                         else
2590                                 total_w_size = subprob[i].n * submodel->nr_class;
2591
2592                         if(prev_w[i] == NULL)
2593                         {
2594                                 prev_w[i] = Malloc(double, total_w_size);
2595                                 for(j=0; j<total_w_size; j++)
2596                                         prev_w[i][j] = submodel->w[j];
2597                         }
2598                         else if(num_unchanged_w >= 0)
2599                         {
2600                                 double norm_w_diff = 0;
2601                                 for(j=0; j<total_w_size; j++)
2602                                 {
2603                                         norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2604                                         prev_w[i][j] = submodel->w[j];
2605                                 }
2606                                 norm_w_diff = sqrt(norm_w_diff);
2607
2608                                 if(norm_w_diff > 1e-15)
2609                                         num_unchanged_w = -1;
2610                         }
2611                         else
2612                         {
2613                                 for(j=0; j<total_w_size; j++)
2614                                         prev_w[i][j] = submodel->w[j];
2615                         }
2616
2617                         for(j=begin; j<end; j++)
2618                                 target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2619
2620                         free_and_destroy_model(&submodel);
2621                 }
2622                 set_print_string_function(default_print_string);
2623
2624                 int total_correct = 0;
2625                 for(i=0; i<prob->l; i++)
2626                         if(target[i] == prob->y[i])
2627                                 ++total_correct;
2628                 double current_rate = (double)total_correct/prob->l;
2629                 if(current_rate > *best_rate)
2630                 {
2631                         *best_C = param1.C;
2632                         *best_rate = current_rate;
2633                 }
2634
2635                 info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
2636                 num_unchanged_w++;
2637                 if(num_unchanged_w == 3)
2638                         break;
2639                 param1.C = param1.C*ratio;
2640         }
2641
2642         if(param1.C > max_C && max_C > start_C)
2643                 info("warning: maximum C reached.\n");
2644         free(fold_start);
2645         free(perm);
2646         free(target);
2647         for(i=0; i<nr_fold; i++)
2648         {
2649                 free(subprob[i].x);
2650                 free(subprob[i].y);
2651                 free(prev_w[i]);
2652         }
2653         free(prev_w);
2654         free(subprob);
2655 }
2656
2657 double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
2658 {
2659         int idx;
2660         int n;
2661         if(model_->bias>=0)
2662                 n=model_->nr_feature+1;
2663         else
2664                 n=model_->nr_feature;
2665         double *w=model_->w;
2666         int nr_class=model_->nr_class;
2667         int i;
2668         int nr_w;
2669         if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
2670                 nr_w = 1;
2671         else
2672                 nr_w = nr_class;
2673
2674         const feature_node *lx=x;
2675         for(i=0;i<nr_w;i++)
2676                 dec_values[i] = 0;
2677         for(; (idx=lx->index)!=-1; lx++)
2678         {
2679                 // the dimension of testing data may exceed that of training
2680                 if(idx<=n)
2681                         for(i=0;i<nr_w;i++)
2682                                 dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2683         }
2684
2685         if(nr_class==2)
2686         {
2687                 if(check_regression_model(model_))
2688                         return dec_values[0];
2689                 else
2690                         return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2691         }
2692         else
2693         {
2694                 int dec_max_idx = 0;
2695                 for(i=1;i<nr_class;i++)
2696                 {
2697                         if(dec_values[i] > dec_values[dec_max_idx])
2698                                 dec_max_idx = i;
2699                 }
2700                 return model_->label[dec_max_idx];
2701         }
2702 }
2703
2704 double predict(const model *model_, const feature_node *x)
2705 {
2706         double *dec_values = Malloc(double, model_->nr_class);
2707         double label=predict_values(model_, x, dec_values);
2708         free(dec_values);
2709         return label;
2710 }
2711
2712 double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
2713 {
2714         if(check_probability_model(model_))
2715         {
2716                 int i;
2717                 int nr_class=model_->nr_class;
2718                 int nr_w;
2719                 if(nr_class==2)
2720                         nr_w = 1;
2721                 else
2722                         nr_w = nr_class;
2723
2724                 double label=predict_values(model_, x, prob_estimates);
2725                 for(i=0;i<nr_w;i++)
2726                         prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
2727
2728                 if(nr_class==2) // for binary classification
2729                         prob_estimates[1]=1.-prob_estimates[0];
2730                 else
2731                 {
2732                         double sum=0;
2733                         for(i=0; i<nr_class; i++)
2734                                 sum+=prob_estimates[i];
2735
2736                         for(i=0; i<nr_class; i++)
2737                                 prob_estimates[i]=prob_estimates[i]/sum;
2738                 }
2739
2740                 return label;
2741         }
2742         else
2743                 return 0;
2744 }
2745
2746 static const char *solver_type_table[]=
2747 {
2748         "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2749         "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
2750         "", "", "",
2751         "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL
2752 };
2753
2754 int save_model(const char *model_file_name, const struct model *model_)
2755 {
2756         int i;
2757         int nr_feature=model_->nr_feature;
2758         int n;
2759         const parameter& param = model_->param;
2760
2761         if(model_->bias>=0)
2762                 n=nr_feature+1;
2763         else
2764                 n=nr_feature;
2765         int w_size = n;
2766         FILE *fp = fopen(model_file_name,"w");
2767         if(fp==NULL) return -1;
2768
2769         char *old_locale = setlocale(LC_ALL, NULL);
2770         if (old_locale)
2771         {
2772                 old_locale = strdup(old_locale);
2773         }
2774         setlocale(LC_ALL, "C");
2775
2776         int nr_w;
2777         if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
2778                 nr_w=1;
2779         else
2780                 nr_w=model_->nr_class;
2781
2782         fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
2783         fprintf(fp, "nr_class %d\n", model_->nr_class);
2784
2785         if(model_->label)
2786         {
2787                 fprintf(fp, "label");
2788                 for(i=0; i<model_->nr_class; i++)
2789                         fprintf(fp, " %d", model_->label[i]);
2790                 fprintf(fp, "\n");
2791         }
2792
2793         fprintf(fp, "nr_feature %d\n", nr_feature);
2794
2795         fprintf(fp, "bias %.17g\n", model_->bias);
2796
2797         fprintf(fp, "w\n");
2798         for(i=0; i<w_size; i++)
2799         {
2800                 int j;
2801                 for(j=0; j<nr_w; j++)
2802                         fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
2803                 fprintf(fp, "\n");
2804         }
2805
2806         setlocale(LC_ALL, old_locale);
2807         free(old_locale);
2808
2809         if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2810         else return 0;
2811 }
2812
2813 //
2814 // FSCANF helps to handle fscanf failures.
2815 // Its do-while block avoids the ambiguity when
2816 // if (...)
2817 //    FSCANF();
2818 // is used
2819 //
2820 #define FSCANF(_stream, _format, _var)do\
2821 {\
2822         if (fscanf(_stream, _format, _var) != 1)\
2823         {\
2824                 fprintf(stderr, "ERROR: fscanf failed to read the model\n");\
2825                 EXIT_LOAD_MODEL()\
2826         }\
2827 }while(0)
2828 // EXIT_LOAD_MODEL should NOT end with a semicolon.
2829 #define EXIT_LOAD_MODEL()\
2830 {\
2831         setlocale(LC_ALL, old_locale);\
2832         free(model_->label);\
2833         free(model_);\
2834         free(old_locale);\
2835         return NULL;\
2836 }
2837 struct model *load_model(const char *model_file_name)
2838 {
2839         FILE *fp = fopen(model_file_name,"r");
2840         if(fp==NULL) return NULL;
2841
2842         int i;
2843         int nr_feature;
2844         int n;
2845         int nr_class;
2846         double bias;
2847         model *model_ = Malloc(model,1);
2848         parameter& param = model_->param;
2849         // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
2850         param.nr_weight = 0;
2851         param.weight_label = NULL;
2852         param.weight = NULL;    
2853         param.init_sol = NULL;
2854
2855         model_->label = NULL;
2856
2857         char *old_locale = setlocale(LC_ALL, NULL);
2858         if (old_locale)
2859         {
2860                 old_locale = strdup(old_locale);
2861         }
2862         setlocale(LC_ALL, "C");
2863
2864         char cmd[81];
2865         while(1)
2866         {
2867                 FSCANF(fp,"%80s",cmd);
2868                 if(strcmp(cmd,"solver_type")==0)
2869                 {
2870                         FSCANF(fp,"%80s",cmd);
2871                         int i;
2872                         for(i=0;solver_type_table[i];i++)
2873                         {
2874                                 if(strcmp(solver_type_table[i],cmd)==0)
2875                                 {
2876                                         param.solver_type=i;
2877                                         break;
2878                                 }
2879                         }
2880                         if(solver_type_table[i] == NULL)
2881                         {
2882                                 fprintf(stderr,"unknown solver type.\n");
2883                                 EXIT_LOAD_MODEL()
2884                         }
2885                 }
2886                 else if(strcmp(cmd,"nr_class")==0)
2887                 {
2888                         FSCANF(fp,"%d",&nr_class);
2889                         model_->nr_class=nr_class;
2890                 }
2891                 else if(strcmp(cmd,"nr_feature")==0)
2892                 {
2893                         FSCANF(fp,"%d",&nr_feature);
2894                         model_->nr_feature=nr_feature;
2895                 }
2896                 else if(strcmp(cmd,"bias")==0)
2897                 {
2898                         FSCANF(fp,"%lf",&bias);
2899                         model_->bias=bias;
2900                 }
2901                 else if(strcmp(cmd,"w")==0)
2902                 {
2903                         break;
2904                 }
2905                 else if(strcmp(cmd,"label")==0)
2906                 {
2907                         int nr_class = model_->nr_class;
2908                         model_->label = Malloc(int,nr_class);
2909                         for(int i=0;i<nr_class;i++)
2910                                 FSCANF(fp,"%d",&model_->label[i]);
2911                 }
2912                 else
2913                 {
2914                         fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2915                         EXIT_LOAD_MODEL()
2916                 }
2917         }
2918
2919         nr_feature=model_->nr_feature;
2920         if(model_->bias>=0)
2921                 n=nr_feature+1;
2922         else
2923                 n=nr_feature;
2924         int w_size = n;
2925         int nr_w;
2926         if(nr_class==2 && param.solver_type != MCSVM_CS)
2927                 nr_w = 1;
2928         else
2929                 nr_w = nr_class;
2930
2931         model_->w=Malloc(double, w_size*nr_w);
2932         for(i=0; i<w_size; i++)
2933         {
2934                 int j;
2935                 for(j=0; j<nr_w; j++)
2936                         FSCANF(fp, "%lf ", &model_->w[i*nr_w+j]);
2937         }
2938
2939         setlocale(LC_ALL, old_locale);
2940         free(old_locale);
2941
2942         if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2943
2944         return model_;
2945 }
2946
2947 int get_nr_feature(const model *model_)
2948 {
2949         return model_->nr_feature;
2950 }
2951
2952 int get_nr_class(const model *model_)
2953 {
2954         return model_->nr_class;
2955 }
2956
2957 void get_labels(const model *model_, int* label)
2958 {
2959         if (model_->label != NULL)
2960                 for(int i=0;i<model_->nr_class;i++)
2961                         label[i] = model_->label[i];
2962 }
2963
2964 // use inline here for better performance (around 20% faster than the non-inline one)
2965 static inline double get_w_value(const struct model *model_, int idx, int label_idx)
2966 {
2967         int nr_class = model_->nr_class;
2968         int solver_type = model_->param.solver_type;
2969         const double *w = model_->w;
2970
2971         if(idx < 0 || idx > model_->nr_feature)
2972                 return 0;
2973         if(check_regression_model(model_))
2974                 return w[idx];
2975         else
2976         {
2977                 if(label_idx < 0 || label_idx >= nr_class)
2978                         return 0;
2979                 if(nr_class == 2 && solver_type != MCSVM_CS)
2980                 {
2981                         if(label_idx == 0)
2982                                 return w[idx];
2983                         else
2984                                 return -w[idx];
2985                 }
2986                 else
2987                         return w[idx*nr_class+label_idx];
2988         }
2989 }
2990
2991 // feat_idx: starting from 1 to nr_feature
2992 // label_idx: starting from 0 to nr_class-1 for classification models;
2993 //            for regression models, label_idx is ignored.
2994 double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
2995 {
2996         if(feat_idx > model_->nr_feature)
2997                 return 0;
2998         return get_w_value(model_, feat_idx-1, label_idx);
2999 }
3000
3001 double get_decfun_bias(const struct model *model_, int label_idx)
3002 {
3003         int bias_idx = model_->nr_feature;
3004         double bias = model_->bias;
3005         if(bias <= 0)
3006                 return 0;
3007         else
3008                 return bias*get_w_value(model_, bias_idx, label_idx);
3009 }
3010
3011 void free_model_content(struct model *model_ptr)
3012 {
3013         if(model_ptr->w != NULL)
3014                 free(model_ptr->w);
3015         if(model_ptr->label != NULL)
3016                 free(model_ptr->label);
3017 }
3018
3019 void free_and_destroy_model(struct model **model_ptr_ptr)
3020 {
3021         struct model *model_ptr = *model_ptr_ptr;
3022         if(model_ptr != NULL)
3023         {
3024                 free_model_content(model_ptr);
3025                 free(model_ptr);
3026         }
3027 }
3028
3029 void destroy_param(parameter* param)
3030 {
3031         if(param->weight_label != NULL)
3032                 free(param->weight_label);
3033         if(param->weight != NULL)
3034                 free(param->weight);
3035         if(param->init_sol != NULL)
3036                 free(param->init_sol);
3037 }
3038
3039 const char *check_parameter(const problem *prob, const parameter *param)
3040 {
3041         if(param->eps <= 0)
3042                 return "eps <= 0";
3043
3044         if(param->C <= 0)
3045                 return "C <= 0";
3046
3047         if(param->p < 0)
3048                 return "p < 0";
3049
3050         if(param->solver_type != L2R_LR
3051                 && param->solver_type != L2R_L2LOSS_SVC_DUAL
3052                 && param->solver_type != L2R_L2LOSS_SVC
3053                 && param->solver_type != L2R_L1LOSS_SVC_DUAL
3054                 && param->solver_type != MCSVM_CS
3055                 && param->solver_type != L1R_L2LOSS_SVC
3056                 && param->solver_type != L1R_LR
3057                 && param->solver_type != L2R_LR_DUAL
3058                 && param->solver_type != L2R_L2LOSS_SVR
3059                 && param->solver_type != L2R_L2LOSS_SVR_DUAL
3060                 && param->solver_type != L2R_L1LOSS_SVR_DUAL)
3061                 return "unknown solver type";
3062
3063         if(param->init_sol != NULL
3064                 && param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC)
3065                 return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC";
3066
3067         return NULL;
3068 }
3069
3070 int check_probability_model(const struct model *model_)
3071 {
3072         return (model_->param.solver_type==L2R_LR ||
3073                         model_->param.solver_type==L2R_LR_DUAL ||
3074                         model_->param.solver_type==L1R_LR);
3075 }
3076
3077 int check_regression_model(const struct model *model_)
3078 {
3079         return (model_->param.solver_type==L2R_L2LOSS_SVR ||
3080                         model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
3081                         model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
3082 }
3083
3084 void set_print_string_function(void (*print_func)(const char*))
3085 {
3086         if (print_func == NULL)
3087                 liblinear_print_string = &print_string_stdout;
3088         else
3089                 liblinear_print_string = print_func;
3090 }
3091