granicus.if.org Git - liblinear/blob - linear.cpp

   1 #include <math.h>
   2 #include <stdio.h>
   3 #include <stdlib.h>
   4 #include <string.h>
   5 #include <stdarg.h>
   6 #include <locale.h>
   7 #include "linear.h"
   8 #include "tron.h"
   9 int liblinear_version = LIBLINEAR_VERSION;
  10 typedef signed char schar;
  11 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
  12 #ifndef min
  13 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
  14 #endif
  15 #ifndef max
  16 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
  17 #endif
  18 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
  19 {
  20         dst = new T[n];
  21         memcpy((void *)dst,(void *)src,sizeof(T)*n);
  22 }
  23 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
  24 #define INF HUGE_VAL
  25
  26 static void print_string_stdout(const char *s)
  27 {
  28         fputs(s,stdout);
  29         fflush(stdout);
  30 }
  31 static void print_null(const char *s) {}
  32
  33 static void (*liblinear_print_string) (const char *) = &print_string_stdout;
  34
  35 #if 1
  36 static void info(const char *fmt,...)
  37 {
  38         char buf[BUFSIZ];
  39         va_list ap;
  40         va_start(ap,fmt);
  41         vsprintf(buf,fmt,ap);
  42         va_end(ap);
  43         (*liblinear_print_string)(buf);
  44 }
  45 #else
  46 static void info(const char *fmt,...) {}
  47 #endif
  48 class sparse_operator
  49 {
  50 public:
  51         static double nrm2_sq(const feature_node *x)
  52         {
  53                 double ret = 0;
  54                 while(x->index != -1)
  55                 {
  56                         ret += x->value*x->value;
  57                         x++;
  58                 }
  59                 return (ret);
  60         }
  61
  62         static double dot(const double *s, const feature_node *x)
  63         {
  64                 double ret = 0;
  65                 while(x->index != -1)
  66                 {
  67                         ret += s[x->index-1]*x->value;
  68                         x++;
  69                 }
  70                 return (ret);
  71         }
  72
  73         static void axpy(const double a, const feature_node *x, double *y)
  74         {
  75                 while(x->index != -1)
  76                 {
  77                         y[x->index-1] += a*x->value;
  78                         x++;
  79                 }
  80         }
  81 };
  82
  83 class l2r_lr_fun: public function
  84 {
  85 public:
  86         l2r_lr_fun(const problem *prob, double *C);
  87         ~l2r_lr_fun();
  88
  89         double fun(double *w);
  90         void grad(double *w, double *g);
  91         void Hv(double *s, double *Hs);
  92
  93         int get_nr_variable(void);
  94         void get_diag_preconditioner(double *M);
  95
  96 private:
  97         void Xv(double *v, double *Xv);
  98         void XTv(double *v, double *XTv);
  99
 100         double *C;
 101         double *z;
 102         double *D;
 103         const problem *prob;
 104 };
 105
 106 l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C)
 107 {
 108         int l=prob->l;
 109
 110         this->prob = prob;
 111
 112         z = new double[l];
 113         D = new double[l];
 114         this->C = C;
 115 }
 116
 117 l2r_lr_fun::~l2r_lr_fun()
 118 {
 119         delete[] z;
 120         delete[] D;
 121 }
 122
 123
 124 double l2r_lr_fun::fun(double *w)
 125 {
 126         int i;
 127         double f=0;
 128         double *y=prob->y;
 129         int l=prob->l;
 130         int w_size=get_nr_variable();
 131
 132         Xv(w, z);
 133
 134         for(i=0;i<w_size;i++)
 135                 f += w[i]*w[i];
 136         f /= 2.0;
 137         for(i=0;i<l;i++)
 138         {
 139                 double yz = y[i]*z[i];
 140                 if (yz >= 0)
 141                         f += C[i]*log(1 + exp(-yz));
 142                 else
 143                         f += C[i]*(-yz+log(1 + exp(yz)));
 144         }
 145
 146         return(f);
 147 }
 148
 149 void l2r_lr_fun::grad(double *w, double *g)
 150 {
 151         int i;
 152         double *y=prob->y;
 153         int l=prob->l;
 154         int w_size=get_nr_variable();
 155
 156         for(i=0;i<l;i++)
 157         {
 158                 z[i] = 1/(1 + exp(-y[i]*z[i]));
 159                 D[i] = z[i]*(1-z[i]);
 160                 z[i] = C[i]*(z[i]-1)*y[i];
 161         }
 162         XTv(z, g);
 163
 164         for(i=0;i<w_size;i++)
 165                 g[i] = w[i] + g[i];
 166 }
 167
 168 int l2r_lr_fun::get_nr_variable(void)
 169 {
 170         return prob->n;
 171 }
 172
 173 void l2r_lr_fun::get_diag_preconditioner(double *M)
 174 {
 175         int i;
 176         int l = prob->l;
 177         int w_size=get_nr_variable();
 178         feature_node **x = prob->x;
 179
 180         for (i=0; i<w_size; i++)
 181                 M[i] = 1;
 182
 183         for (i=0; i<l; i++)
 184         {
 185                 feature_node *s = x[i];
 186                 while (s->index!=-1)
 187                 {
 188                         M[s->index-1] += s->value*s->value*C[i]*D[i];
 189                         s++;
 190                 }
 191         }
 192 }
 193
 194 void l2r_lr_fun::Hv(double *s, double *Hs)
 195 {
 196         int i;
 197         int l=prob->l;
 198         int w_size=get_nr_variable();
 199         feature_node **x=prob->x;
 200
 201         for(i=0;i<w_size;i++)
 202                 Hs[i] = 0;
 203         for(i=0;i<l;i++)
 204         {
 205                 feature_node * const xi=x[i];
 206                 double xTs = sparse_operator::dot(s, xi);
 207
 208                 xTs = C[i]*D[i]*xTs;
 209
 210                 sparse_operator::axpy(xTs, xi, Hs);
 211         }
 212         for(i=0;i<w_size;i++)
 213                 Hs[i] = s[i] + Hs[i];
 214 }
 215
 216 void l2r_lr_fun::Xv(double *v, double *Xv)
 217 {
 218         int i;
 219         int l=prob->l;
 220         feature_node **x=prob->x;
 221
 222         for(i=0;i<l;i++)
 223                 Xv[i]=sparse_operator::dot(v, x[i]);
 224 }
 225
 226 void l2r_lr_fun::XTv(double *v, double *XTv)
 227 {
 228         int i;
 229         int l=prob->l;
 230         int w_size=get_nr_variable();
 231         feature_node **x=prob->x;
 232
 233         for(i=0;i<w_size;i++)
 234                 XTv[i]=0;
 235         for(i=0;i<l;i++)
 236                 sparse_operator::axpy(v[i], x[i], XTv);
 237 }
 238
 239 class l2r_l2_svc_fun: public function
 240 {
 241 public:
 242         l2r_l2_svc_fun(const problem *prob, double *C);
 243         ~l2r_l2_svc_fun();
 244
 245         double fun(double *w);
 246         void grad(double *w, double *g);
 247         void Hv(double *s, double *Hs);
 248
 249         int get_nr_variable(void);
 250         void get_diag_preconditioner(double *M);
 251
 252 protected:
 253         void Xv(double *v, double *Xv);
 254         void subXTv(double *v, double *XTv);
 255
 256         double *C;
 257         double *z;
 258         int *I;
 259         int sizeI;
 260         const problem *prob;
 261 };
 262
 263 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C)
 264 {
 265         int l=prob->l;
 266
 267         this->prob = prob;
 268
 269         z = new double[l];
 270         I = new int[l];
 271         this->C = C;
 272 }
 273
 274 l2r_l2_svc_fun::~l2r_l2_svc_fun()
 275 {
 276         delete[] z;
 277         delete[] I;
 278 }
 279
 280 double l2r_l2_svc_fun::fun(double *w)
 281 {
 282         int i;
 283         double f=0;
 284         double *y=prob->y;
 285         int l=prob->l;
 286         int w_size=get_nr_variable();
 287
 288         Xv(w, z);
 289
 290         for(i=0;i<w_size;i++)
 291                 f += w[i]*w[i];
 292         f /= 2.0;
 293         for(i=0;i<l;i++)
 294         {
 295                 z[i] = y[i]*z[i];
 296                 double d = 1-z[i];
 297                 if (d > 0)
 298                         f += C[i]*d*d;
 299         }
 300
 301         return(f);
 302 }
 303
 304 void l2r_l2_svc_fun::grad(double *w, double *g)
 305 {
 306         int i;
 307         double *y=prob->y;
 308         int l=prob->l;
 309         int w_size=get_nr_variable();
 310
 311         sizeI = 0;
 312         for (i=0;i<l;i++)
 313                 if (z[i] < 1)
 314                 {
 315                         z[sizeI] = C[i]*y[i]*(z[i]-1);
 316                         I[sizeI] = i;
 317                         sizeI++;
 318                 }
 319         subXTv(z, g);
 320
 321         for(i=0;i<w_size;i++)
 322                 g[i] = w[i] + 2*g[i];
 323 }
 324
 325 int l2r_l2_svc_fun::get_nr_variable(void)
 326 {
 327         return prob->n;
 328 }
 329
 330 void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
 331 {
 332         int i;
 333         int w_size=get_nr_variable();
 334         feature_node **x = prob->x;
 335
 336         for (i=0; i<w_size; i++)
 337                 M[i] = 1;
 338
 339         for (i=0; i<sizeI; i++)
 340         {
 341                 int idx = I[i];
 342                 feature_node *s = x[idx];
 343                 while (s->index!=-1)
 344                 {
 345                         M[s->index-1] += s->value*s->value*C[idx]*2;
 346                         s++;
 347                 }
 348         }
 349 }
 350
 351 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
 352 {
 353         int i;
 354         int w_size=get_nr_variable();
 355         feature_node **x=prob->x;
 356
 357         for(i=0;i<w_size;i++)
 358                 Hs[i]=0;
 359         for(i=0;i<sizeI;i++)
 360         {
 361                 feature_node * const xi=x[I[i]];
 362                 double xTs = sparse_operator::dot(s, xi);
 363
 364                 xTs = C[I[i]]*xTs;
 365
 366                 sparse_operator::axpy(xTs, xi, Hs);
 367         }
 368         for(i=0;i<w_size;i++)
 369                 Hs[i] = s[i] + 2*Hs[i];
 370 }
 371
 372 void l2r_l2_svc_fun::Xv(double *v, double *Xv)
 373 {
 374         int i;
 375         int l=prob->l;
 376         feature_node **x=prob->x;
 377
 378         for(i=0;i<l;i++)
 379                 Xv[i]=sparse_operator::dot(v, x[i]);
 380 }
 381
 382 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
 383 {
 384         int i;
 385         int w_size=get_nr_variable();
 386         feature_node **x=prob->x;
 387
 388         for(i=0;i<w_size;i++)
 389                 XTv[i]=0;
 390         for(i=0;i<sizeI;i++)
 391                 sparse_operator::axpy(v[i], x[I[i]], XTv);
 392 }
 393
 394 class l2r_l2_svr_fun: public l2r_l2_svc_fun
 395 {
 396 public:
 397         l2r_l2_svr_fun(const problem *prob, double *C, double p);
 398
 399         double fun(double *w);
 400         void grad(double *w, double *g);
 401
 402 private:
 403         double p;
 404 };
 405
 406 l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *C, double p):
 407         l2r_l2_svc_fun(prob, C)
 408 {
 409         this->p = p;
 410 }
 411
 412 double l2r_l2_svr_fun::fun(double *w)
 413 {
 414         int i;
 415         double f=0;
 416         double *y=prob->y;
 417         int l=prob->l;
 418         int w_size=get_nr_variable();
 419         double d;
 420
 421         Xv(w, z);
 422
 423         for(i=0;i<w_size;i++)
 424                 f += w[i]*w[i];
 425         f /= 2;
 426         for(i=0;i<l;i++)
 427         {
 428                 d = z[i] - y[i];
 429                 if(d < -p)
 430                         f += C[i]*(d+p)*(d+p);
 431                 else if(d > p)
 432                         f += C[i]*(d-p)*(d-p);
 433         }
 434
 435         return(f);
 436 }
 437
 438 void l2r_l2_svr_fun::grad(double *w, double *g)
 439 {
 440         int i;
 441         double *y=prob->y;
 442         int l=prob->l;
 443         int w_size=get_nr_variable();
 444         double d;
 445
 446         sizeI = 0;
 447         for(i=0;i<l;i++)
 448         {
 449                 d = z[i] - y[i];
 450
 451                 // generate index set I
 452                 if(d < -p)
 453                 {
 454                         z[sizeI] = C[i]*(d+p);
 455                         I[sizeI] = i;
 456                         sizeI++;
 457                 }
 458                 else if(d > p)
 459                 {
 460                         z[sizeI] = C[i]*(d-p);
 461                         I[sizeI] = i;
 462                         sizeI++;
 463                 }
 464
 465         }
 466         subXTv(z, g);
 467
 468         for(i=0;i<w_size;i++)
 469                 g[i] = w[i] + 2*g[i];
 470 }
 471
 472 // A coordinate descent algorithm for
 473 // multi-class support vector machines by Crammer and Singer
 474 //
 475 //  min_{\alpha}  0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
 476 //    s.t.     \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
 477 //
 478 //  where e^m_i = 0 if y_i  = m,
 479 //        e^m_i = 1 if y_i != m,
 480 //  C^m_i = C if m  = y_i,
 481 //  C^m_i = 0 if m != y_i,
 482 //  and w_m(\alpha) = \sum_i \alpha^m_i x_i
 483 //
 484 // Given:
 485 // x, y, C
 486 // eps is the stopping tolerance
 487 //
 488 // solution will be put in w
 489 //
 490 // See Appendix of LIBLINEAR paper, Fan et al. (2008)
 491
 492 #define GETI(i) ((int) prob->y[i])
 493 // To support weights for instances, use GETI(i) (i)
 494
 495 class Solver_MCSVM_CS
 496 {
 497         public:
 498                 Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
 499                 ~Solver_MCSVM_CS();
 500                 void Solve(double *w);
 501         private:
 502                 void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
 503                 bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
 504                 double *B, *C, *G;
 505                 int w_size, l;
 506                 int nr_class;
 507                 int max_iter;
 508                 double eps;
 509                 const problem *prob;
 510 };
 511
 512 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
 513 {
 514         this->w_size = prob->n;
 515         this->l = prob->l;
 516         this->nr_class = nr_class;
 517         this->eps = eps;
 518         this->max_iter = max_iter;
 519         this->prob = prob;
 520         this->B = new double[nr_class];
 521         this->G = new double[nr_class];
 522         this->C = weighted_C;
 523 }
 524
 525 Solver_MCSVM_CS::~Solver_MCSVM_CS()
 526 {
 527         delete[] B;
 528         delete[] G;
 529 }
 530
 531 int compare_double(const void *a, const void *b)
 532 {
 533         if(*(double *)a > *(double *)b)
 534                 return -1;
 535         if(*(double *)a < *(double *)b)
 536                 return 1;
 537         return 0;
 538 }
 539
 540 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
 541 {
 542         int r;
 543         double *D;
 544
 545         clone(D, B, active_i);
 546         if(yi < active_i)
 547                 D[yi] += A_i*C_yi;
 548         qsort(D, active_i, sizeof(double), compare_double);
 549
 550         double beta = D[0] - A_i*C_yi;
 551         for(r=1;r<active_i && beta<r*D[r];r++)
 552                 beta += D[r];
 553         beta /= r;
 554
 555         for(r=0;r<active_i;r++)
 556         {
 557                 if(r == yi)
 558                         alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
 559                 else
 560                         alpha_new[r] = min((double)0, (beta - B[r])/A_i);
 561         }
 562         delete[] D;
 563 }
 564
 565 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
 566 {
 567         double bound = 0;
 568         if(m == yi)
 569                 bound = C[GETI(i)];
 570         if(alpha_i == bound && G[m] < minG)
 571                 return true;
 572         return false;
 573 }
 574
 575 void Solver_MCSVM_CS::Solve(double *w)
 576 {
 577         int i, m, s;
 578         int iter = 0;
 579         double *alpha =  new double[l*nr_class];
 580         double *alpha_new = new double[nr_class];
 581         int *index = new int[l];
 582         double *QD = new double[l];
 583         int *d_ind = new int[nr_class];
 584         double *d_val = new double[nr_class];
 585         int *alpha_index = new int[nr_class*l];
 586         int *y_index = new int[l];
 587         int active_size = l;
 588         int *active_size_i = new int[l];
 589         double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
 590         bool start_from_all = true;
 591
 592         // Initial alpha can be set here. Note that
 593         // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
 594         // alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
 595         // alpha[i*nr_class+m] <= 0 if prob->y[i] != m
 596         // If initial alpha isn't zero, uncomment the for loop below to initialize w
 597         for(i=0;i<l*nr_class;i++)
 598                 alpha[i] = 0;
 599
 600         for(i=0;i<w_size*nr_class;i++)
 601                 w[i] = 0;
 602         for(i=0;i<l;i++)
 603         {
 604                 for(m=0;m<nr_class;m++)
 605                         alpha_index[i*nr_class+m] = m;
 606                 feature_node *xi = prob->x[i];
 607                 QD[i] = 0;
 608                 while(xi->index != -1)
 609                 {
 610                         double val = xi->value;
 611                         QD[i] += val*val;
 612
 613                         // Uncomment the for loop if initial alpha isn't zero
 614                         // for(m=0; m<nr_class; m++)
 615                         //      w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
 616                         xi++;
 617                 }
 618                 active_size_i[i] = nr_class;
 619                 y_index[i] = (int)prob->y[i];
 620                 index[i] = i;
 621         }
 622
 623         while(iter < max_iter)
 624         {
 625                 double stopping = -INF;
 626                 for(i=0;i<active_size;i++)
 627                 {
 628                         int j = i+rand()%(active_size-i);
 629                         swap(index[i], index[j]);
 630                 }
 631                 for(s=0;s<active_size;s++)
 632                 {
 633                         i = index[s];
 634                         double Ai = QD[i];
 635                         double *alpha_i = &alpha[i*nr_class];
 636                         int *alpha_index_i = &alpha_index[i*nr_class];
 637
 638                         if(Ai > 0)
 639                         {
 640                                 for(m=0;m<active_size_i[i];m++)
 641                                         G[m] = 1;
 642                                 if(y_index[i] < active_size_i[i])
 643                                         G[y_index[i]] = 0;
 644
 645                                 feature_node *xi = prob->x[i];
 646                                 while(xi->index!= -1)
 647                                 {
 648                                         double *w_i = &w[(xi->index-1)*nr_class];
 649                                         for(m=0;m<active_size_i[i];m++)
 650                                                 G[m] += w_i[alpha_index_i[m]]*(xi->value);
 651                                         xi++;
 652                                 }
 653
 654                                 double minG = INF;
 655                                 double maxG = -INF;
 656                                 for(m=0;m<active_size_i[i];m++)
 657                                 {
 658                                         if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
 659                                                 minG = G[m];
 660                                         if(G[m] > maxG)
 661                                                 maxG = G[m];
 662                                 }
 663                                 if(y_index[i] < active_size_i[i])
 664                                         if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
 665                                                 minG = G[y_index[i]];
 666
 667                                 for(m=0;m<active_size_i[i];m++)
 668                                 {
 669                                         if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
 670                                         {
 671                                                 active_size_i[i]--;
 672                                                 while(active_size_i[i]>m)
 673                                                 {
 674                                                         if(!be_shrunk(i, active_size_i[i], y_index[i],
 675                                                                                         alpha_i[alpha_index_i[active_size_i[i]]], minG))
 676                                                         {
 677                                                                 swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
 678                                                                 swap(G[m], G[active_size_i[i]]);
 679                                                                 if(y_index[i] == active_size_i[i])
 680                                                                         y_index[i] = m;
 681                                                                 else if(y_index[i] == m)
 682                                                                         y_index[i] = active_size_i[i];
 683                                                                 break;
 684                                                         }
 685                                                         active_size_i[i]--;
 686                                                 }
 687                                         }
 688                                 }
 689
 690                                 if(active_size_i[i] <= 1)
 691                                 {
 692                                         active_size--;
 693                                         swap(index[s], index[active_size]);
 694                                         s--;
 695                                         continue;
 696                                 }
 697
 698                                 if(maxG-minG <= 1e-12)
 699                                         continue;
 700                                 else
 701                                         stopping = max(maxG - minG, stopping);
 702
 703                                 for(m=0;m<active_size_i[i];m++)
 704                                         B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
 705
 706                                 solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
 707                                 int nz_d = 0;
 708                                 for(m=0;m<active_size_i[i];m++)
 709                                 {
 710                                         double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
 711                                         alpha_i[alpha_index_i[m]] = alpha_new[m];
 712                                         if(fabs(d) >= 1e-12)
 713                                         {
 714                                                 d_ind[nz_d] = alpha_index_i[m];
 715                                                 d_val[nz_d] = d;
 716                                                 nz_d++;
 717                                         }
 718                                 }
 719
 720                                 xi = prob->x[i];
 721                                 while(xi->index != -1)
 722                                 {
 723                                         double *w_i = &w[(xi->index-1)*nr_class];
 724                                         for(m=0;m<nz_d;m++)
 725                                                 w_i[d_ind[m]] += d_val[m]*xi->value;
 726                                         xi++;
 727                                 }
 728                         }
 729                 }
 730
 731                 iter++;
 732                 if(iter % 10 == 0)
 733                 {
 734                         info(".");
 735                 }
 736
 737                 if(stopping < eps_shrink)
 738                 {
 739                         if(stopping < eps && start_from_all == true)
 740                                 break;
 741                         else
 742                         {
 743                                 active_size = l;
 744                                 for(i=0;i<l;i++)
 745                                         active_size_i[i] = nr_class;
 746                                 info("*");
 747                                 eps_shrink = max(eps_shrink/2, eps);
 748                                 start_from_all = true;
 749                         }
 750                 }
 751                 else
 752                         start_from_all = false;
 753         }
 754
 755         info("\noptimization finished, #iter = %d\n",iter);
 756         if (iter >= max_iter)
 757                 info("\nWARNING: reaching max number of iterations\n");
 758
 759         // calculate objective value
 760         double v = 0;
 761         int nSV = 0;
 762         for(i=0;i<w_size*nr_class;i++)
 763                 v += w[i]*w[i];
 764         v = 0.5*v;
 765         for(i=0;i<l*nr_class;i++)
 766         {
 767                 v += alpha[i];
 768                 if(fabs(alpha[i]) > 0)
 769                         nSV++;
 770         }
 771         for(i=0;i<l;i++)
 772                 v -= alpha[i*nr_class+(int)prob->y[i]];
 773         info("Objective value = %lf\n",v);
 774         info("nSV = %d\n",nSV);
 775
 776         delete [] alpha;
 777         delete [] alpha_new;
 778         delete [] index;
 779         delete [] QD;
 780         delete [] d_ind;
 781         delete [] d_val;
 782         delete [] alpha_index;
 783         delete [] y_index;
 784         delete [] active_size_i;
 785 }
 786
 787 // A coordinate descent algorithm for
 788 // L1-loss and L2-loss SVM dual problems
 789 //
 790 //  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
 791 //    s.t.      0 <= \alpha_i <= upper_bound_i,
 792 //
 793 //  where Qij = yi yj xi^T xj and
 794 //  D is a diagonal matrix
 795 //
 796 // In L1-SVM case:
 797 //              upper_bound_i = Cp if y_i = 1
 798 //              upper_bound_i = Cn if y_i = -1
 799 //              D_ii = 0
 800 // In L2-SVM case:
 801 //              upper_bound_i = INF
 802 //              D_ii = 1/(2*Cp) if y_i = 1
 803 //              D_ii = 1/(2*Cn) if y_i = -1
 804 //
 805 // Given:
 806 // x, y, Cp, Cn
 807 // eps is the stopping tolerance
 808 //
 809 // solution will be put in w
 810 //
 811 // See Algorithm 3 of Hsieh et al., ICML 2008
 812
 813 #undef GETI
 814 #define GETI(i) (y[i]+1)
 815 // To support weights for instances, use GETI(i) (i)
 816
 817 static void solve_l2r_l1l2_svc(
 818         const problem *prob, double *w, double eps,
 819         double Cp, double Cn, int solver_type)
 820 {
 821         int l = prob->l;
 822         int w_size = prob->n;
 823         int i, s, iter = 0;
 824         double C, d, G;
 825         double *QD = new double[l];
 826         int max_iter = 1000;
 827         int *index = new int[l];
 828         double *alpha = new double[l];
 829         schar *y = new schar[l];
 830         int active_size = l;
 831
 832         // PG: projected gradient, for shrinking and stopping
 833         double PG;
 834         double PGmax_old = INF;
 835         double PGmin_old = -INF;
 836         double PGmax_new, PGmin_new;
 837
 838         // default solver_type: L2R_L2LOSS_SVC_DUAL
 839         double diag[3] = {0.5/Cn, 0, 0.5/Cp};
 840         double upper_bound[3] = {INF, 0, INF};
 841         if(solver_type == L2R_L1LOSS_SVC_DUAL)
 842         {
 843                 diag[0] = 0;
 844                 diag[2] = 0;
 845                 upper_bound[0] = Cn;
 846                 upper_bound[2] = Cp;
 847         }
 848
 849         for(i=0; i<l; i++)
 850         {
 851                 if(prob->y[i] > 0)
 852                 {
 853                         y[i] = +1;
 854                 }
 855                 else
 856                 {
 857                         y[i] = -1;
 858                 }
 859         }
 860
 861         // Initial alpha can be set here. Note that
 862         // 0 <= alpha[i] <= upper_bound[GETI(i)]
 863         for(i=0; i<l; i++)
 864                 alpha[i] = 0;
 865
 866         for(i=0; i<w_size; i++)
 867                 w[i] = 0;
 868         for(i=0; i<l; i++)
 869         {
 870                 QD[i] = diag[GETI(i)];
 871
 872                 feature_node * const xi = prob->x[i];
 873                 QD[i] += sparse_operator::nrm2_sq(xi);
 874                 sparse_operator::axpy(y[i]*alpha[i], xi, w);
 875
 876                 index[i] = i;
 877         }
 878
 879         while (iter < max_iter)
 880         {
 881                 PGmax_new = -INF;
 882                 PGmin_new = INF;
 883
 884                 for (i=0; i<active_size; i++)
 885                 {
 886                         int j = i+rand()%(active_size-i);
 887                         swap(index[i], index[j]);
 888                 }
 889
 890                 for (s=0; s<active_size; s++)
 891                 {
 892                         i = index[s];
 893                         const schar yi = y[i];
 894                         feature_node * const xi = prob->x[i];
 895
 896                         G = yi*sparse_operator::dot(w, xi)-1;
 897
 898                         C = upper_bound[GETI(i)];
 899                         G += alpha[i]*diag[GETI(i)];
 900
 901                         PG = 0;
 902                         if (alpha[i] == 0)
 903                         {
 904                                 if (G > PGmax_old)
 905                                 {
 906                                         active_size--;
 907                                         swap(index[s], index[active_size]);
 908                                         s--;
 909                                         continue;
 910                                 }
 911                                 else if (G < 0)
 912                                         PG = G;
 913                         }
 914                         else if (alpha[i] == C)
 915                         {
 916                                 if (G < PGmin_old)
 917                                 {
 918                                         active_size--;
 919                                         swap(index[s], index[active_size]);
 920                                         s--;
 921                                         continue;
 922                                 }
 923                                 else if (G > 0)
 924                                         PG = G;
 925                         }
 926                         else
 927                                 PG = G;
 928
 929                         PGmax_new = max(PGmax_new, PG);
 930                         PGmin_new = min(PGmin_new, PG);
 931
 932                         if(fabs(PG) > 1.0e-12)
 933                         {
 934                                 double alpha_old = alpha[i];
 935                                 alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
 936                                 d = (alpha[i] - alpha_old)*yi;
 937                                 sparse_operator::axpy(d, xi, w);
 938                         }
 939                 }
 940
 941                 iter++;
 942                 if(iter % 10 == 0)
 943                         info(".");
 944
 945                 if(PGmax_new - PGmin_new <= eps)
 946                 {
 947                         if(active_size == l)
 948                                 break;
 949                         else
 950                         {
 951                                 active_size = l;
 952                                 info("*");
 953                                 PGmax_old = INF;
 954                                 PGmin_old = -INF;
 955                                 continue;
 956                         }
 957                 }
 958                 PGmax_old = PGmax_new;
 959                 PGmin_old = PGmin_new;
 960                 if (PGmax_old <= 0)
 961                         PGmax_old = INF;
 962                 if (PGmin_old >= 0)
 963                         PGmin_old = -INF;
 964         }
 965
 966         info("\noptimization finished, #iter = %d\n",iter);
 967         if (iter >= max_iter)
 968                 info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
 969
 970         // calculate objective value
 971
 972         double v = 0;
 973         int nSV = 0;
 974         for(i=0; i<w_size; i++)
 975                 v += w[i]*w[i];
 976         for(i=0; i<l; i++)
 977         {
 978                 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
 979                 if(alpha[i] > 0)
 980                         ++nSV;
 981         }
 982         info("Objective value = %lf\n",v/2);
 983         info("nSV = %d\n",nSV);
 984
 985         delete [] QD;
 986         delete [] alpha;
 987         delete [] y;
 988         delete [] index;
 989 }
 990
 991
 992 // A coordinate descent algorithm for
 993 // L1-loss and L2-loss epsilon-SVR dual problem
 994 //
 995 //  min_\beta  0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
 996 //    s.t.      -upper_bound_i <= \beta_i <= upper_bound_i,
 997 //
 998 //  where Qij = xi^T xj and
 999 //  D is a diagonal matrix
1000 //
1001 // In L1-SVM case:
1002 //              upper_bound_i = C
1003 //              lambda_i = 0
1004 // In L2-SVM case:
1005 //              upper_bound_i = INF
1006 //              lambda_i = 1/(2*C)
1007 //
1008 // Given:
1009 // x, y, p, C
1010 // eps is the stopping tolerance
1011 //
1012 // solution will be put in w
1013 //
1014 // See Algorithm 4 of Ho and Lin, 2012
1015
1016 #undef GETI
1017 #define GETI(i) (0)
1018 // To support weights for instances, use GETI(i) (i)
1019
1020 static void solve_l2r_l1l2_svr(
1021         const problem *prob, double *w, const parameter *param,
1022         int solver_type)
1023 {
1024         int l = prob->l;
1025         double C = param->C;
1026         double p = param->p;
1027         int w_size = prob->n;
1028         double eps = param->eps;
1029         int i, s, iter = 0;
1030         int max_iter = 1000;
1031         int active_size = l;
1032         int *index = new int[l];
1033
1034         double d, G, H;
1035         double Gmax_old = INF;
1036         double Gmax_new, Gnorm1_new;
1037         double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1038         double *beta = new double[l];
1039         double *QD = new double[l];
1040         double *y = prob->y;
1041
1042         // L2R_L2LOSS_SVR_DUAL
1043         double lambda[1], upper_bound[1];
1044         lambda[0] = 0.5/C;
1045         upper_bound[0] = INF;
1046
1047         if(solver_type == L2R_L1LOSS_SVR_DUAL)
1048         {
1049                 lambda[0] = 0;
1050                 upper_bound[0] = C;
1051         }
1052
1053         // Initial beta can be set here. Note that
1054         // -upper_bound <= beta[i] <= upper_bound
1055         for(i=0; i<l; i++)
1056                 beta[i] = 0;
1057
1058         for(i=0; i<w_size; i++)
1059                 w[i] = 0;
1060         for(i=0; i<l; i++)
1061         {
1062                 feature_node * const xi = prob->x[i];
1063                 QD[i] = sparse_operator::nrm2_sq(xi);
1064                 sparse_operator::axpy(beta[i], xi, w);
1065
1066                 index[i] = i;
1067         }
1068
1069
1070         while(iter < max_iter)
1071         {
1072                 Gmax_new = 0;
1073                 Gnorm1_new = 0;
1074
1075                 for(i=0; i<active_size; i++)
1076                 {
1077                         int j = i+rand()%(active_size-i);
1078                         swap(index[i], index[j]);
1079                 }
1080
1081                 for(s=0; s<active_size; s++)
1082                 {
1083                         i = index[s];
1084                         G = -y[i] + lambda[GETI(i)]*beta[i];
1085                         H = QD[i] + lambda[GETI(i)];
1086
1087                         feature_node * const xi = prob->x[i];
1088                         G += sparse_operator::dot(w, xi);
1089
1090                         double Gp = G+p;
1091                         double Gn = G-p;
1092                         double violation = 0;
1093                         if(beta[i] == 0)
1094                         {
1095                                 if(Gp < 0)
1096                                         violation = -Gp;
1097                                 else if(Gn > 0)
1098                                         violation = Gn;
1099                                 else if(Gp>Gmax_old && Gn<-Gmax_old)
1100                                 {
1101                                         active_size--;
1102                                         swap(index[s], index[active_size]);
1103                                         s--;
1104                                         continue;
1105                                 }
1106                         }
1107                         else if(beta[i] >= upper_bound[GETI(i)])
1108                         {
1109                                 if(Gp > 0)
1110                                         violation = Gp;
1111                                 else if(Gp < -Gmax_old)
1112                                 {
1113                                         active_size--;
1114                                         swap(index[s], index[active_size]);
1115                                         s--;
1116                                         continue;
1117                                 }
1118                         }
1119                         else if(beta[i] <= -upper_bound[GETI(i)])
1120                         {
1121                                 if(Gn < 0)
1122                                         violation = -Gn;
1123                                 else if(Gn > Gmax_old)
1124                                 {
1125                                         active_size--;
1126                                         swap(index[s], index[active_size]);
1127                                         s--;
1128                                         continue;
1129                                 }
1130                         }
1131                         else if(beta[i] > 0)
1132                                 violation = fabs(Gp);
1133                         else
1134                                 violation = fabs(Gn);
1135
1136                         Gmax_new = max(Gmax_new, violation);
1137                         Gnorm1_new += violation;
1138
1139                         // obtain Newton direction d
1140                         if(Gp < H*beta[i])
1141                                 d = -Gp/H;
1142                         else if(Gn > H*beta[i])
1143                                 d = -Gn/H;
1144                         else
1145                                 d = -beta[i];
1146
1147                         if(fabs(d) < 1.0e-12)
1148                                 continue;
1149
1150                         double beta_old = beta[i];
1151                         beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
1152                         d = beta[i]-beta_old;
1153
1154                         if(d != 0)
1155                                 sparse_operator::axpy(d, xi, w);
1156                 }
1157
1158                 if(iter == 0)
1159                         Gnorm1_init = Gnorm1_new;
1160                 iter++;
1161                 if(iter % 10 == 0)
1162                         info(".");
1163
1164                 if(Gnorm1_new <= eps*Gnorm1_init)
1165                 {
1166                         if(active_size == l)
1167                                 break;
1168                         else
1169                         {
1170                                 active_size = l;
1171                                 info("*");
1172                                 Gmax_old = INF;
1173                                 continue;
1174                         }
1175                 }
1176
1177                 Gmax_old = Gmax_new;
1178         }
1179
1180         info("\noptimization finished, #iter = %d\n", iter);
1181         if(iter >= max_iter)
1182                 info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
1183
1184         // calculate objective value
1185         double v = 0;
1186         int nSV = 0;
1187         for(i=0; i<w_size; i++)
1188                 v += w[i]*w[i];
1189         v = 0.5*v;
1190         for(i=0; i<l; i++)
1191         {
1192                 v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
1193                 if(beta[i] != 0)
1194                         nSV++;
1195         }
1196
1197         info("Objective value = %lf\n", v);
1198         info("nSV = %d\n",nSV);
1199
1200         delete [] beta;
1201         delete [] QD;
1202         delete [] index;
1203 }
1204
1205
1206 // A coordinate descent algorithm for
1207 // the dual of L2-regularized logistic regression problems
1208 //
1209 //  min_\alpha  0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1210 //    s.t.      0 <= \alpha_i <= upper_bound_i,
1211 //
1212 //  where Qij = yi yj xi^T xj and
1213 //  upper_bound_i = Cp if y_i = 1
1214 //  upper_bound_i = Cn if y_i = -1
1215 //
1216 // Given:
1217 // x, y, Cp, Cn
1218 // eps is the stopping tolerance
1219 //
1220 // solution will be put in w
1221 //
1222 // See Algorithm 5 of Yu et al., MLJ 2010
1223
1224 #undef GETI
1225 #define GETI(i) (y[i]+1)
1226 // To support weights for instances, use GETI(i) (i)
1227
1228 void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
1229 {
1230         int l = prob->l;
1231         int w_size = prob->n;
1232         int i, s, iter = 0;
1233         double *xTx = new double[l];
1234         int max_iter = 1000;
1235         int *index = new int[l];
1236         double *alpha = new double[2*l]; // store alpha and C - alpha
1237         schar *y = new schar[l];
1238         int max_inner_iter = 100; // for inner Newton
1239         double innereps = 1e-2;
1240         double innereps_min = min(1e-8, eps);
1241         double upper_bound[3] = {Cn, 0, Cp};
1242
1243         for(i=0; i<l; i++)
1244         {
1245                 if(prob->y[i] > 0)
1246                 {
1247                         y[i] = +1;
1248                 }
1249                 else
1250                 {
1251                         y[i] = -1;
1252                 }
1253         }
1254
1255         // Initial alpha can be set here. Note that
1256         // 0 < alpha[i] < upper_bound[GETI(i)]
1257         // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1258         for(i=0; i<l; i++)
1259         {
1260                 alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
1261                 alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1262         }
1263
1264         for(i=0; i<w_size; i++)
1265                 w[i] = 0;
1266         for(i=0; i<l; i++)
1267         {
1268                 feature_node * const xi = prob->x[i];
1269                 xTx[i] = sparse_operator::nrm2_sq(xi);
1270                 sparse_operator::axpy(y[i]*alpha[2*i], xi, w);
1271                 index[i] = i;
1272         }
1273
1274         while (iter < max_iter)
1275         {
1276                 for (i=0; i<l; i++)
1277                 {
1278                         int j = i+rand()%(l-i);
1279                         swap(index[i], index[j]);
1280                 }
1281                 int newton_iter = 0;
1282                 double Gmax = 0;
1283                 for (s=0; s<l; s++)
1284                 {
1285                         i = index[s];
1286                         const schar yi = y[i];
1287                         double C = upper_bound[GETI(i)];
1288                         double ywTx = 0, xisq = xTx[i];
1289                         feature_node * const xi = prob->x[i];
1290                         ywTx = yi*sparse_operator::dot(w, xi);
1291                         double a = xisq, b = ywTx;
1292
1293                         // Decide to minimize g_1(z) or g_2(z)
1294                         int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1295                         if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1296                         {
1297                                 ind1 = 2*i+1;
1298                                 ind2 = 2*i;
1299                                 sign = -1;
1300                         }
1301
1302                         //  g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1303                         double alpha_old = alpha[ind1];
1304                         double z = alpha_old;
1305                         if(C - z < 0.5 * C)
1306                                 z = 0.1*z;
1307                         double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1308                         Gmax = max(Gmax, fabs(gp));
1309
1310                         // Newton method on the sub-problem
1311                         const double eta = 0.1; // xi in the paper
1312                         int inner_iter = 0;
1313                         while (inner_iter <= max_inner_iter)
1314                         {
1315                                 if(fabs(gp) < innereps)
1316                                         break;
1317                                 double gpp = a + C/(C-z)/z;
1318                                 double tmpz = z - gp/gpp;
1319                                 if(tmpz <= 0)
1320                                         z *= eta;
1321                                 else // tmpz in (0, C)
1322                                         z = tmpz;
1323                                 gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1324                                 newton_iter++;
1325                                 inner_iter++;
1326                         }
1327
1328                         if(inner_iter > 0) // update w
1329                         {
1330                                 alpha[ind1] = z;
1331                                 alpha[ind2] = C-z;
1332                                 sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w);
1333                         }
1334                 }
1335
1336                 iter++;
1337                 if(iter % 10 == 0)
1338                         info(".");
1339
1340                 if(Gmax < eps)
1341                         break;
1342
1343                 if(newton_iter <= l/10)
1344                         innereps = max(innereps_min, 0.1*innereps);
1345
1346         }
1347
1348         info("\noptimization finished, #iter = %d\n",iter);
1349         if (iter >= max_iter)
1350                 info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1351
1352         // calculate objective value
1353
1354         double v = 0;
1355         for(i=0; i<w_size; i++)
1356                 v += w[i] * w[i];
1357         v *= 0.5;
1358         for(i=0; i<l; i++)
1359                 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1360                         - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1361         info("Objective value = %lf\n", v);
1362
1363         delete [] xTx;
1364         delete [] alpha;
1365         delete [] y;
1366         delete [] index;
1367 }
1368
1369 // A coordinate descent algorithm for
1370 // L1-regularized L2-loss support vector classification
1371 //
1372 //  min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
1373 //
1374 // Given:
1375 // x, y, Cp, Cn
1376 // eps is the stopping tolerance
1377 //
1378 // solution will be put in w
1379 //
1380 // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
1381
1382 #undef GETI
1383 #define GETI(i) (y[i]+1)
1384 // To support weights for instances, use GETI(i) (i)
1385
1386 static void solve_l1r_l2_svc(
1387         problem *prob_col, double *w, double eps,
1388         double Cp, double Cn)
1389 {
1390         int l = prob_col->l;
1391         int w_size = prob_col->n;
1392         int j, s, iter = 0;
1393         int max_iter = 1000;
1394         int active_size = w_size;
1395         int max_num_linesearch = 20;
1396
1397         double sigma = 0.01;
1398         double d, G_loss, G, H;
1399         double Gmax_old = INF;
1400         double Gmax_new, Gnorm1_new;
1401         double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1402         double d_old, d_diff;
1403         double loss_old, loss_new;
1404         double appxcond, cond;
1405
1406         int *index = new int[w_size];
1407         schar *y = new schar[l];
1408         double *b = new double[l]; // b = 1-ywTx
1409         double *xj_sq = new double[w_size];
1410         feature_node *x;
1411
1412         double C[3] = {Cn,0,Cp};
1413
1414         // Initial w can be set here.
1415         for(j=0; j<w_size; j++)
1416                 w[j] = 0;
1417
1418         for(j=0; j<l; j++)
1419         {
1420                 b[j] = 1;
1421                 if(prob_col->y[j] > 0)
1422                         y[j] = 1;
1423                 else
1424                         y[j] = -1;
1425         }
1426         for(j=0; j<w_size; j++)
1427         {
1428                 index[j] = j;
1429                 xj_sq[j] = 0;
1430                 x = prob_col->x[j];
1431                 while(x->index != -1)
1432                 {
1433                         int ind = x->index-1;
1434                         x->value *= y[ind]; // x->value stores yi*xij
1435                         double val = x->value;
1436                         b[ind] -= w[j]*val;
1437                         xj_sq[j] += C[GETI(ind)]*val*val;
1438                         x++;
1439                 }
1440         }
1441
1442         while(iter < max_iter)
1443         {
1444                 Gmax_new = 0;
1445                 Gnorm1_new = 0;
1446
1447                 for(j=0; j<active_size; j++)
1448                 {
1449                         int i = j+rand()%(active_size-j);
1450                         swap(index[i], index[j]);
1451                 }
1452
1453                 for(s=0; s<active_size; s++)
1454                 {
1455                         j = index[s];
1456                         G_loss = 0;
1457                         H = 0;
1458
1459                         x = prob_col->x[j];
1460                         while(x->index != -1)
1461                         {
1462                                 int ind = x->index-1;
1463                                 if(b[ind] > 0)
1464                                 {
1465                                         double val = x->value;
1466                                         double tmp = C[GETI(ind)]*val;
1467                                         G_loss -= tmp*b[ind];
1468                                         H += tmp*val;
1469                                 }
1470                                 x++;
1471                         }
1472                         G_loss *= 2;
1473
1474                         G = G_loss;
1475                         H *= 2;
1476                         H = max(H, 1e-12);
1477
1478                         double Gp = G+1;
1479                         double Gn = G-1;
1480                         double violation = 0;
1481                         if(w[j] == 0)
1482                         {
1483                                 if(Gp < 0)
1484                                         violation = -Gp;
1485                                 else if(Gn > 0)
1486                                         violation = Gn;
1487                                 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1488                                 {
1489                                         active_size--;
1490                                         swap(index[s], index[active_size]);
1491                                         s--;
1492                                         continue;
1493                                 }
1494                         }
1495                         else if(w[j] > 0)
1496                                 violation = fabs(Gp);
1497                         else
1498                                 violation = fabs(Gn);
1499
1500                         Gmax_new = max(Gmax_new, violation);
1501                         Gnorm1_new += violation;
1502
1503                         // obtain Newton direction d
1504                         if(Gp < H*w[j])
1505                                 d = -Gp/H;
1506                         else if(Gn > H*w[j])
1507                                 d = -Gn/H;
1508                         else
1509                                 d = -w[j];
1510
1511                         if(fabs(d) < 1.0e-12)
1512                                 continue;
1513
1514                         double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
1515                         d_old = 0;
1516                         int num_linesearch;
1517                         for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1518                         {
1519                                 d_diff = d_old - d;
1520                                 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
1521
1522                                 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
1523                                 if(appxcond <= 0)
1524                                 {
1525                                         x = prob_col->x[j];
1526                                         sparse_operator::axpy(d_diff, x, b);
1527                                         break;
1528                                 }
1529
1530                                 if(num_linesearch == 0)
1531                                 {
1532                                         loss_old = 0;
1533                                         loss_new = 0;
1534                                         x = prob_col->x[j];
1535                                         while(x->index != -1)
1536                                         {
1537                                                 int ind = x->index-1;
1538                                                 if(b[ind] > 0)
1539                                                         loss_old += C[GETI(ind)]*b[ind]*b[ind];
1540                                                 double b_new = b[ind] + d_diff*x->value;
1541                                                 b[ind] = b_new;
1542                                                 if(b_new > 0)
1543                                                         loss_new += C[GETI(ind)]*b_new*b_new;
1544                                                 x++;
1545                                         }
1546                                 }
1547                                 else
1548                                 {
1549                                         loss_new = 0;
1550                                         x = prob_col->x[j];
1551                                         while(x->index != -1)
1552                                         {
1553                                                 int ind = x->index-1;
1554                                                 double b_new = b[ind] + d_diff*x->value;
1555                                                 b[ind] = b_new;
1556                                                 if(b_new > 0)
1557                                                         loss_new += C[GETI(ind)]*b_new*b_new;
1558                                                 x++;
1559                                         }
1560                                 }
1561
1562                                 cond = cond + loss_new - loss_old;
1563                                 if(cond <= 0)
1564                                         break;
1565                                 else
1566                                 {
1567                                         d_old = d;
1568                                         d *= 0.5;
1569                                         delta *= 0.5;
1570                                 }
1571                         }
1572
1573                         w[j] += d;
1574
1575                         // recompute b[] if line search takes too many steps
1576                         if(num_linesearch >= max_num_linesearch)
1577                         {
1578                                 info("#");
1579                                 for(int i=0; i<l; i++)
1580                                         b[i] = 1;
1581
1582                                 for(int i=0; i<w_size; i++)
1583                                 {
1584                                         if(w[i]==0) continue;
1585                                         x = prob_col->x[i];
1586                                         sparse_operator::axpy(-w[i], x, b);
1587                                 }
1588                         }
1589                 }
1590
1591                 if(iter == 0)
1592                         Gnorm1_init = Gnorm1_new;
1593                 iter++;
1594                 if(iter % 10 == 0)
1595                         info(".");
1596
1597                 if(Gnorm1_new <= eps*Gnorm1_init)
1598                 {
1599                         if(active_size == w_size)
1600                                 break;
1601                         else
1602                         {
1603                                 active_size = w_size;
1604                                 info("*");
1605                                 Gmax_old = INF;
1606                                 continue;
1607                         }
1608                 }
1609
1610                 Gmax_old = Gmax_new;
1611         }
1612
1613         info("\noptimization finished, #iter = %d\n", iter);
1614         if(iter >= max_iter)
1615                 info("\nWARNING: reaching max number of iterations\n");
1616
1617         // calculate objective value
1618
1619         double v = 0;
1620         int nnz = 0;
1621         for(j=0; j<w_size; j++)
1622         {
1623                 x = prob_col->x[j];
1624                 while(x->index != -1)
1625                 {
1626                         x->value *= prob_col->y[x->index-1]; // restore x->value
1627                         x++;
1628                 }
1629                 if(w[j] != 0)
1630                 {
1631                         v += fabs(w[j]);
1632                         nnz++;
1633                 }
1634         }
1635         for(j=0; j<l; j++)
1636                 if(b[j] > 0)
1637                         v += C[GETI(j)]*b[j]*b[j];
1638
1639         info("Objective value = %lf\n", v);
1640         info("#nonzeros/#features = %d/%d\n", nnz, w_size);
1641
1642         delete [] index;
1643         delete [] y;
1644         delete [] b;
1645         delete [] xj_sq;
1646 }
1647
1648 // A coordinate descent algorithm for
1649 // L1-regularized logistic regression problems
1650 //
1651 //  min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
1652 //
1653 // Given:
1654 // x, y, Cp, Cn
1655 // eps is the stopping tolerance
1656 //
1657 // solution will be put in w
1658 //
1659 // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1660
1661 #undef GETI
1662 #define GETI(i) (y[i]+1)
1663 // To support weights for instances, use GETI(i) (i)
1664
1665 static void solve_l1r_lr(
1666         const problem *prob_col, double *w, double eps,
1667         double Cp, double Cn)
1668 {
1669         int l = prob_col->l;
1670         int w_size = prob_col->n;
1671         int j, s, newton_iter=0, iter=0;
1672         int max_newton_iter = 100;
1673         int max_iter = 1000;
1674         int max_num_linesearch = 20;
1675         int active_size;
1676         int QP_active_size;
1677
1678         double nu = 1e-12;
1679         double inner_eps = 1;
1680         double sigma = 0.01;
1681         double w_norm, w_norm_new;
1682         double z, G, H;
1683         double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
1684         double Gmax_old = INF;
1685         double Gmax_new, Gnorm1_new;
1686         double QP_Gmax_old = INF;
1687         double QP_Gmax_new, QP_Gnorm1_new;
1688         double delta, negsum_xTd, cond;
1689
1690         int *index = new int[w_size];
1691         schar *y = new schar[l];
1692         double *Hdiag = new double[w_size];
1693         double *Grad = new double[w_size];
1694         double *wpd = new double[w_size];
1695         double *xjneg_sum = new double[w_size];
1696         double *xTd = new double[l];
1697         double *exp_wTx = new double[l];
1698         double *exp_wTx_new = new double[l];
1699         double *tau = new double[l];
1700         double *D = new double[l];
1701         feature_node *x;
1702
1703         double C[3] = {Cn,0,Cp};
1704
1705         // Initial w can be set here.
1706         for(j=0; j<w_size; j++)
1707                 w[j] = 0;
1708
1709         for(j=0; j<l; j++)
1710         {
1711                 if(prob_col->y[j] > 0)
1712                         y[j] = 1;
1713                 else
1714                         y[j] = -1;
1715
1716                 exp_wTx[j] = 0;
1717         }
1718
1719         w_norm = 0;
1720         for(j=0; j<w_size; j++)
1721         {
1722                 w_norm += fabs(w[j]);
1723                 wpd[j] = w[j];
1724                 index[j] = j;
1725                 xjneg_sum[j] = 0;
1726                 x = prob_col->x[j];
1727                 while(x->index != -1)
1728                 {
1729                         int ind = x->index-1;
1730                         double val = x->value;
1731                         exp_wTx[ind] += w[j]*val;
1732                         if(y[ind] == -1)
1733                                 xjneg_sum[j] += C[GETI(ind)]*val;
1734                         x++;
1735                 }
1736         }
1737         for(j=0; j<l; j++)
1738         {
1739                 exp_wTx[j] = exp(exp_wTx[j]);
1740                 double tau_tmp = 1/(1+exp_wTx[j]);
1741                 tau[j] = C[GETI(j)]*tau_tmp;
1742                 D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
1743         }
1744
1745         while(newton_iter < max_newton_iter)
1746         {
1747                 Gmax_new = 0;
1748                 Gnorm1_new = 0;
1749                 active_size = w_size;
1750
1751                 for(s=0; s<active_size; s++)
1752                 {
1753                         j = index[s];
1754                         Hdiag[j] = nu;
1755                         Grad[j] = 0;
1756
1757                         double tmp = 0;
1758                         x = prob_col->x[j];
1759                         while(x->index != -1)
1760                         {
1761                                 int ind = x->index-1;
1762                                 Hdiag[j] += x->value*x->value*D[ind];
1763                                 tmp += x->value*tau[ind];
1764                                 x++;
1765                         }
1766                         Grad[j] = -tmp + xjneg_sum[j];
1767
1768                         double Gp = Grad[j]+1;
1769                         double Gn = Grad[j]-1;
1770                         double violation = 0;
1771                         if(w[j] == 0)
1772                         {
1773                                 if(Gp < 0)
1774                                         violation = -Gp;
1775                                 else if(Gn > 0)
1776                                         violation = Gn;
1777                                 //outer-level shrinking
1778                                 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
1779                                 {
1780                                         active_size--;
1781                                         swap(index[s], index[active_size]);
1782                                         s--;
1783                                         continue;
1784                                 }
1785                         }
1786                         else if(w[j] > 0)
1787                                 violation = fabs(Gp);
1788                         else
1789                                 violation = fabs(Gn);
1790
1791                         Gmax_new = max(Gmax_new, violation);
1792                         Gnorm1_new += violation;
1793                 }
1794
1795                 if(newton_iter == 0)
1796                         Gnorm1_init = Gnorm1_new;
1797
1798                 if(Gnorm1_new <= eps*Gnorm1_init)
1799                         break;
1800
1801                 iter = 0;
1802                 QP_Gmax_old = INF;
1803                 QP_active_size = active_size;
1804
1805                 for(int i=0; i<l; i++)
1806                         xTd[i] = 0;
1807
1808                 // optimize QP over wpd
1809                 while(iter < max_iter)
1810                 {
1811                         QP_Gmax_new = 0;
1812                         QP_Gnorm1_new = 0;
1813
1814                         for(j=0; j<QP_active_size; j++)
1815                         {
1816                                 int i = j+rand()%(QP_active_size-j);
1817                                 swap(index[i], index[j]);
1818                         }
1819
1820                         for(s=0; s<QP_active_size; s++)
1821                         {
1822                                 j = index[s];
1823                                 H = Hdiag[j];
1824
1825                                 x = prob_col->x[j];
1826                                 G = Grad[j] + (wpd[j]-w[j])*nu;
1827                                 while(x->index != -1)
1828                                 {
1829                                         int ind = x->index-1;
1830                                         G += x->value*D[ind]*xTd[ind];
1831                                         x++;
1832                                 }
1833
1834                                 double Gp = G+1;
1835                                 double Gn = G-1;
1836                                 double violation = 0;
1837                                 if(wpd[j] == 0)
1838                                 {
1839                                         if(Gp < 0)
1840                                                 violation = -Gp;
1841                                         else if(Gn > 0)
1842                                                 violation = Gn;
1843                                         //inner-level shrinking
1844                                         else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
1845                                         {
1846                                                 QP_active_size--;
1847                                                 swap(index[s], index[QP_active_size]);
1848                                                 s--;
1849                                                 continue;
1850                                         }
1851                                 }
1852                                 else if(wpd[j] > 0)
1853                                         violation = fabs(Gp);
1854                                 else
1855                                         violation = fabs(Gn);
1856
1857                                 QP_Gmax_new = max(QP_Gmax_new, violation);
1858                                 QP_Gnorm1_new += violation;
1859
1860                                 // obtain solution of one-variable problem
1861                                 if(Gp < H*wpd[j])
1862                                         z = -Gp/H;
1863                                 else if(Gn > H*wpd[j])
1864                                         z = -Gn/H;
1865                                 else
1866                                         z = -wpd[j];
1867
1868                                 if(fabs(z) < 1.0e-12)
1869                                         continue;
1870                                 z = min(max(z,-10.0),10.0);
1871
1872                                 wpd[j] += z;
1873
1874                                 x = prob_col->x[j];
1875                                 sparse_operator::axpy(z, x, xTd);
1876                         }
1877
1878                         iter++;
1879
1880                         if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
1881                         {
1882                                 //inner stopping
1883                                 if(QP_active_size == active_size)
1884                                         break;
1885                                 //active set reactivation
1886                                 else
1887                                 {
1888                                         QP_active_size = active_size;
1889                                         QP_Gmax_old = INF;
1890                                         continue;
1891                                 }
1892                         }
1893
1894                         QP_Gmax_old = QP_Gmax_new;
1895                 }
1896
1897                 if(iter >= max_iter)
1898                         info("WARNING: reaching max number of inner iterations\n");
1899
1900                 delta = 0;
1901                 w_norm_new = 0;
1902                 for(j=0; j<w_size; j++)
1903                 {
1904                         delta += Grad[j]*(wpd[j]-w[j]);
1905                         if(wpd[j] != 0)
1906                                 w_norm_new += fabs(wpd[j]);
1907                 }
1908                 delta += (w_norm_new-w_norm);
1909
1910                 negsum_xTd = 0;
1911                 for(int i=0; i<l; i++)
1912                         if(y[i] == -1)
1913                                 negsum_xTd += C[GETI(i)]*xTd[i];
1914
1915                 int num_linesearch;
1916                 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
1917                 {
1918                         cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
1919
1920                         for(int i=0; i<l; i++)
1921                         {
1922                                 double exp_xTd = exp(xTd[i]);
1923                                 exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
1924                                 cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
1925                         }
1926
1927                         if(cond <= 0)
1928                         {
1929                                 w_norm = w_norm_new;
1930                                 for(j=0; j<w_size; j++)
1931                                         w[j] = wpd[j];
1932                                 for(int i=0; i<l; i++)
1933                                 {
1934                                         exp_wTx[i] = exp_wTx_new[i];
1935                                         double tau_tmp = 1/(1+exp_wTx[i]);
1936                                         tau[i] = C[GETI(i)]*tau_tmp;
1937                                         D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
1938                                 }
1939                                 break;
1940                         }
1941                         else
1942                         {
1943                                 w_norm_new = 0;
1944                                 for(j=0; j<w_size; j++)
1945                                 {
1946                                         wpd[j] = (w[j]+wpd[j])*0.5;
1947                                         if(wpd[j] != 0)
1948                                                 w_norm_new += fabs(wpd[j]);
1949                                 }
1950                                 delta *= 0.5;
1951                                 negsum_xTd *= 0.5;
1952                                 for(int i=0; i<l; i++)
1953                                         xTd[i] *= 0.5;
1954                         }
1955                 }
1956
1957                 // Recompute some info due to too many line search steps
1958                 if(num_linesearch >= max_num_linesearch)
1959                 {
1960                         for(int i=0; i<l; i++)
1961                                 exp_wTx[i] = 0;
1962
1963                         for(int i=0; i<w_size; i++)
1964                         {
1965                                 if(w[i]==0) continue;
1966                                 x = prob_col->x[i];
1967                                 sparse_operator::axpy(w[i], x, exp_wTx);
1968                         }
1969
1970                         for(int i=0; i<l; i++)
1971                                 exp_wTx[i] = exp(exp_wTx[i]);
1972                 }
1973
1974                 if(iter == 1)
1975                         inner_eps *= 0.25;
1976
1977                 newton_iter++;
1978                 Gmax_old = Gmax_new;
1979
1980                 info("iter %3d  #CD cycles %d\n", newton_iter, iter);
1981         }
1982
1983         info("=========================\n");
1984         info("optimization finished, #iter = %d\n", newton_iter);
1985         if(newton_iter >= max_newton_iter)
1986                 info("WARNING: reaching max number of iterations\n");
1987
1988         // calculate objective value
1989
1990         double v = 0;
1991         int nnz = 0;
1992         for(j=0; j<w_size; j++)
1993                 if(w[j] != 0)
1994                 {
1995                         v += fabs(w[j]);
1996                         nnz++;
1997                 }
1998         for(j=0; j<l; j++)
1999                 if(y[j] == 1)
2000                         v += C[GETI(j)]*log(1+1/exp_wTx[j]);
2001                 else
2002                         v += C[GETI(j)]*log(1+exp_wTx[j]);
2003
2004         info("Objective value = %lf\n", v);
2005         info("#nonzeros/#features = %d/%d\n", nnz, w_size);
2006
2007         delete [] index;
2008         delete [] y;
2009         delete [] Hdiag;
2010         delete [] Grad;
2011         delete [] wpd;
2012         delete [] xjneg_sum;
2013         delete [] xTd;
2014         delete [] exp_wTx;
2015         delete [] exp_wTx_new;
2016         delete [] tau;
2017         delete [] D;
2018 }
2019
2020 // transpose matrix X from row format to column format
2021 static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
2022 {
2023         int i;
2024         int l = prob->l;
2025         int n = prob->n;
2026         size_t nnz = 0;
2027         size_t *col_ptr = new size_t [n+1];
2028         feature_node *x_space;
2029         prob_col->l = l;
2030         prob_col->n = n;
2031         prob_col->y = new double[l];
2032         prob_col->x = new feature_node*[n];
2033
2034         for(i=0; i<l; i++)
2035                 prob_col->y[i] = prob->y[i];
2036
2037         for(i=0; i<n+1; i++)
2038                 col_ptr[i] = 0;
2039         for(i=0; i<l; i++)
2040         {
2041                 feature_node *x = prob->x[i];
2042                 while(x->index != -1)
2043                 {
2044                         nnz++;
2045                         col_ptr[x->index]++;
2046                         x++;
2047                 }
2048         }
2049         for(i=1; i<n+1; i++)
2050                 col_ptr[i] += col_ptr[i-1] + 1;
2051
2052         x_space = new feature_node[nnz+n];
2053         for(i=0; i<n; i++)
2054                 prob_col->x[i] = &x_space[col_ptr[i]];
2055
2056         for(i=0; i<l; i++)
2057         {
2058                 feature_node *x = prob->x[i];
2059                 while(x->index != -1)
2060                 {
2061                         int ind = x->index-1;
2062                         x_space[col_ptr[ind]].index = i+1; // starts from 1
2063                         x_space[col_ptr[ind]].value = x->value;
2064                         col_ptr[ind]++;
2065                         x++;
2066                 }
2067         }
2068         for(i=0; i<n; i++)
2069                 x_space[col_ptr[i]].index = -1;
2070
2071         *x_space_ret = x_space;
2072
2073         delete [] col_ptr;
2074 }
2075
2076 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2077 // perm, length l, must be allocated before calling this subroutine
2078 static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2079 {
2080         int l = prob->l;
2081         int max_nr_class = 16;
2082         int nr_class = 0;
2083         int *label = Malloc(int,max_nr_class);
2084         int *count = Malloc(int,max_nr_class);
2085         int *data_label = Malloc(int,l);
2086         int i;
2087
2088         for(i=0;i<l;i++)
2089         {
2090                 int this_label = (int)prob->y[i];
2091                 int j;
2092                 for(j=0;j<nr_class;j++)
2093                 {
2094                         if(this_label == label[j])
2095                         {
2096                                 ++count[j];
2097                                 break;
2098                         }
2099                 }
2100                 data_label[i] = j;
2101                 if(j == nr_class)
2102                 {
2103                         if(nr_class == max_nr_class)
2104                         {
2105                                 max_nr_class *= 2;
2106                                 label = (int *)realloc(label,max_nr_class*sizeof(int));
2107                                 count = (int *)realloc(count,max_nr_class*sizeof(int));
2108                         }
2109                         label[nr_class] = this_label;
2110                         count[nr_class] = 1;
2111                         ++nr_class;
2112                 }
2113         }
2114
2115         //
2116         // Labels are ordered by their first occurrence in the training set.
2117         // However, for two-class sets with -1/+1 labels and -1 appears first,
2118         // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2119         //
2120         if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2121         {
2122                 swap(label[0],label[1]);
2123                 swap(count[0],count[1]);
2124                 for(i=0;i<l;i++)
2125                 {
2126                         if(data_label[i] == 0)
2127                                 data_label[i] = 1;
2128                         else
2129                                 data_label[i] = 0;
2130                 }
2131         }
2132
2133         int *start = Malloc(int,nr_class);
2134         start[0] = 0;
2135         for(i=1;i<nr_class;i++)
2136                 start[i] = start[i-1]+count[i-1];
2137         for(i=0;i<l;i++)
2138         {
2139                 perm[start[data_label[i]]] = i;
2140                 ++start[data_label[i]];
2141         }
2142         start[0] = 0;
2143         for(i=1;i<nr_class;i++)
2144                 start[i] = start[i-1]+count[i-1];
2145
2146         *nr_class_ret = nr_class;
2147         *label_ret = label;
2148         *start_ret = start;
2149         *count_ret = count;
2150         free(data_label);
2151 }
2152
2153 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
2154 {
2155         //inner and outer tolerances for TRON
2156         double eps = param->eps;
2157         double eps_cg = 0.1;
2158         if(param->init_sol != NULL)
2159                 eps_cg = 0.5;
2160
2161         int pos = 0;
2162         int neg = 0;
2163         for(int i=0;i<prob->l;i++)
2164                 if(prob->y[i] > 0)
2165                         pos++;
2166         neg = prob->l - pos;
2167         double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
2168
2169         function *fun_obj=NULL;
2170         switch(param->solver_type)
2171         {
2172                 case L2R_LR:
2173                 {
2174                         double *C = new double[prob->l];
2175                         for(int i = 0; i < prob->l; i++)
2176                         {
2177                                 if(prob->y[i] > 0)
2178                                         C[i] = Cp;
2179                                 else
2180                                         C[i] = Cn;
2181                         }
2182                         fun_obj=new l2r_lr_fun(prob, C);
2183                         TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2184                         tron_obj.set_print_string(liblinear_print_string);
2185                         tron_obj.tron(w);
2186                         delete fun_obj;
2187                         delete[] C;
2188                         break;
2189                 }
2190                 case L2R_L2LOSS_SVC:
2191                 {
2192                         double *C = new double[prob->l];
2193                         for(int i = 0; i < prob->l; i++)
2194                         {
2195                                 if(prob->y[i] > 0)
2196                                         C[i] = Cp;
2197                                 else
2198                                         C[i] = Cn;
2199                         }
2200                         fun_obj=new l2r_l2_svc_fun(prob, C);
2201                         TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
2202                         tron_obj.set_print_string(liblinear_print_string);
2203                         tron_obj.tron(w);
2204                         delete fun_obj;
2205                         delete[] C;
2206                         break;
2207                 }
2208                 case L2R_L2LOSS_SVC_DUAL:
2209                         solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
2210                         break;
2211                 case L2R_L1LOSS_SVC_DUAL:
2212                         solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
2213                         break;
2214                 case L1R_L2LOSS_SVC:
2215                 {
2216                         problem prob_col;
2217                         feature_node *x_space = NULL;
2218                         transpose(prob, &x_space ,&prob_col);
2219                         solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn);
2220                         delete [] prob_col.y;
2221                         delete [] prob_col.x;
2222                         delete [] x_space;
2223                         break;
2224                 }
2225                 case L1R_LR:
2226                 {
2227                         problem prob_col;
2228                         feature_node *x_space = NULL;
2229                         transpose(prob, &x_space ,&prob_col);
2230                         solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn);
2231                         delete [] prob_col.y;
2232                         delete [] prob_col.x;
2233                         delete [] x_space;
2234                         break;
2235                 }
2236                 case L2R_LR_DUAL:
2237                         solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
2238                         break;
2239                 case L2R_L2LOSS_SVR:
2240                 {
2241                         double *C = new double[prob->l];
2242                         for(int i = 0; i < prob->l; i++)
2243                                 C[i] = param->C;
2244
2245                         fun_obj=new l2r_l2_svr_fun(prob, C, param->p);
2246                         TRON tron_obj(fun_obj, param->eps);
2247                         tron_obj.set_print_string(liblinear_print_string);
2248                         tron_obj.tron(w);
2249                         delete fun_obj;
2250                         delete[] C;
2251                         break;
2252
2253                 }
2254                 case L2R_L1LOSS_SVR_DUAL:
2255                         solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL);
2256                         break;
2257                 case L2R_L2LOSS_SVR_DUAL:
2258                         solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL);
2259                         break;
2260                 default:
2261                         fprintf(stderr, "ERROR: unknown solver_type\n");
2262                         break;
2263         }
2264 }
2265
2266 // Calculate the initial C for parameter selection
2267 static double calc_start_C(const problem *prob, const parameter *param)
2268 {
2269         int i;
2270         double xTx,max_xTx;
2271         max_xTx = 0;
2272         for(i=0; i<prob->l; i++)
2273         {
2274                 xTx = 0;
2275                 feature_node *xi=prob->x[i];
2276                 while(xi->index != -1)
2277                 {
2278                         double val = xi->value;
2279                         xTx += val*val;
2280                         xi++;
2281                 }
2282                 if(xTx > max_xTx)
2283                         max_xTx = xTx;
2284         }
2285
2286         double min_C = 1.0;
2287         if(param->solver_type == L2R_LR)
2288                 min_C = 1.0 / (prob->l * max_xTx);
2289         else if(param->solver_type == L2R_L2LOSS_SVC)
2290                 min_C = 1.0 / (2 * prob->l * max_xTx);
2291
2292         return pow( 2, floor(log(min_C) / log(2.0)) );
2293 }
2294
2295
2296 //
2297 // Interface functions
2298 //
2299 model* train(const problem *prob, const parameter *param)
2300 {
2301         int i,j;
2302         int l = prob->l;
2303         int n = prob->n;
2304         int w_size = prob->n;
2305         model *model_ = Malloc(model,1);
2306
2307         if(prob->bias>=0)
2308                 model_->nr_feature=n-1;
2309         else
2310                 model_->nr_feature=n;
2311         model_->param = *param;
2312         model_->bias = prob->bias;
2313
2314         if(check_regression_model(model_))
2315         {
2316                 model_->w = Malloc(double, w_size);
2317                 for(i=0; i<w_size; i++)
2318                         model_->w[i] = 0;
2319                 model_->nr_class = 2;
2320                 model_->label = NULL;
2321                 train_one(prob, param, model_->w, 0, 0);
2322         }
2323         else
2324         {
2325                 int nr_class;
2326                 int *label = NULL;
2327                 int *start = NULL;
2328                 int *count = NULL;
2329                 int *perm = Malloc(int,l);
2330
2331                 // group training data of the same class
2332                 group_classes(prob,&nr_class,&label,&start,&count,perm);
2333
2334                 model_->nr_class=nr_class;
2335                 model_->label = Malloc(int,nr_class);
2336                 for(i=0;i<nr_class;i++)
2337                         model_->label[i] = label[i];
2338
2339                 // calculate weighted C
2340                 double *weighted_C = Malloc(double, nr_class);
2341                 for(i=0;i<nr_class;i++)
2342                         weighted_C[i] = param->C;
2343                 for(i=0;i<param->nr_weight;i++)
2344                 {
2345                         for(j=0;j<nr_class;j++)
2346                                 if(param->weight_label[i] == label[j])
2347                                         break;
2348                         if(j == nr_class)
2349                                 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2350                         else
2351                                 weighted_C[j] *= param->weight[i];
2352                 }
2353
2354                 // constructing the subproblem
2355                 feature_node **x = Malloc(feature_node *,l);
2356                 for(i=0;i<l;i++)
2357                         x[i] = prob->x[perm[i]];
2358
2359                 int k;
2360                 problem sub_prob;
2361                 sub_prob.l = l;
2362                 sub_prob.n = n;
2363                 sub_prob.x = Malloc(feature_node *,sub_prob.l);
2364                 sub_prob.y = Malloc(double,sub_prob.l);
2365
2366                 for(k=0; k<sub_prob.l; k++)
2367                         sub_prob.x[k] = x[k];
2368
2369                 // multi-class svm by Crammer and Singer
2370                 if(param->solver_type == MCSVM_CS)
2371                 {
2372                         model_->w=Malloc(double, n*nr_class);
2373                         for(i=0;i<nr_class;i++)
2374                                 for(j=start[i];j<start[i]+count[i];j++)
2375                                         sub_prob.y[j] = i;
2376                         Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
2377                         Solver.Solve(model_->w);
2378                 }
2379                 else
2380                 {
2381                         if(nr_class == 2)
2382                         {
2383                                 model_->w=Malloc(double, w_size);
2384
2385                                 int e0 = start[0]+count[0];
2386                                 k=0;
2387                                 for(; k<e0; k++)
2388                                         sub_prob.y[k] = +1;
2389                                 for(; k<sub_prob.l; k++)
2390                                         sub_prob.y[k] = -1;
2391
2392                                 if(param->init_sol != NULL)
2393                                         for(i=0;i<w_size;i++)
2394                                                 model_->w[i] = param->init_sol[i];
2395                                 else
2396                                         for(i=0;i<w_size;i++)
2397                                                 model_->w[i] = 0;
2398
2399                                 train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
2400                         }
2401                         else
2402                         {
2403                                 model_->w=Malloc(double, w_size*nr_class);
2404                                 double *w=Malloc(double, w_size);
2405                                 for(i=0;i<nr_class;i++)
2406                                 {
2407                                         int si = start[i];
2408                                         int ei = si+count[i];
2409
2410                                         k=0;
2411                                         for(; k<si; k++)
2412                                                 sub_prob.y[k] = -1;
2413                                         for(; k<ei; k++)
2414                                                 sub_prob.y[k] = +1;
2415                                         for(; k<sub_prob.l; k++)
2416                                                 sub_prob.y[k] = -1;
2417
2418                                         if(param->init_sol != NULL)
2419                                                 for(j=0;j<w_size;j++)
2420                                                         w[j] = param->init_sol[j*nr_class+i];
2421                                         else
2422                                                 for(j=0;j<w_size;j++)
2423                                                         w[j] = 0;
2424
2425                                         train_one(&sub_prob, param, w, weighted_C[i], param->C);
2426
2427                                         for(j=0;j<w_size;j++)
2428                                                 model_->w[j*nr_class+i] = w[j];
2429                                 }
2430                                 free(w);
2431                         }
2432
2433                 }
2434
2435                 free(x);
2436                 free(label);
2437                 free(start);
2438                 free(count);
2439                 free(perm);
2440                 free(sub_prob.x);
2441                 free(sub_prob.y);
2442                 free(weighted_C);
2443         }
2444         return model_;
2445 }
2446
2447 void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
2448 {
2449         int i;
2450         int *fold_start;
2451         int l = prob->l;
2452         int *perm = Malloc(int,l);
2453         if (nr_fold > l)
2454         {
2455                 nr_fold = l;
2456                 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2457         }
2458         fold_start = Malloc(int,nr_fold+1);
2459         for(i=0;i<l;i++) perm[i]=i;
2460         for(i=0;i<l;i++)
2461         {
2462                 int j = i+rand()%(l-i);
2463                 swap(perm[i],perm[j]);
2464         }
2465         for(i=0;i<=nr_fold;i++)
2466                 fold_start[i]=i*l/nr_fold;
2467
2468         for(i=0;i<nr_fold;i++)
2469         {
2470                 int begin = fold_start[i];
2471                 int end = fold_start[i+1];
2472                 int j,k;
2473                 struct problem subprob;
2474
2475                 subprob.bias = prob->bias;
2476                 subprob.n = prob->n;
2477                 subprob.l = l-(end-begin);
2478                 subprob.x = Malloc(struct feature_node*,subprob.l);
2479                 subprob.y = Malloc(double,subprob.l);
2480
2481                 k=0;
2482                 for(j=0;j<begin;j++)
2483                 {
2484                         subprob.x[k] = prob->x[perm[j]];
2485                         subprob.y[k] = prob->y[perm[j]];
2486                         ++k;
2487                 }
2488                 for(j=end;j<l;j++)
2489                 {
2490                         subprob.x[k] = prob->x[perm[j]];
2491                         subprob.y[k] = prob->y[perm[j]];
2492                         ++k;
2493                 }
2494                 struct model *submodel = train(&subprob,param);
2495                 for(j=begin;j<end;j++)
2496                         target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2497                 free_and_destroy_model(&submodel);
2498                 free(subprob.x);
2499                 free(subprob.y);
2500         }
2501         free(fold_start);
2502         free(perm);
2503 }
2504
2505 void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
2506 {
2507         // variables for CV
2508         int i;
2509         int *fold_start;
2510         int l = prob->l;
2511         int *perm = Malloc(int, l);
2512         double *target = Malloc(double, prob->l);
2513         struct problem *subprob = Malloc(problem,nr_fold);
2514
2515         // variables for warm start
2516         double ratio = 2;
2517         double **prev_w = Malloc(double*, nr_fold);
2518         for(i = 0; i < nr_fold; i++)
2519                 prev_w[i] = NULL;
2520         int num_unchanged_w = 0;
2521         struct parameter param1 = *param;
2522         void (*default_print_string) (const char *) = liblinear_print_string;
2523
2524         if (nr_fold > l)
2525         {
2526                 nr_fold = l;
2527                 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2528         }
2529         fold_start = Malloc(int,nr_fold+1);
2530         for(i=0;i<l;i++) perm[i]=i;
2531         for(i=0;i<l;i++)
2532         {
2533                 int j = i+rand()%(l-i);
2534                 swap(perm[i],perm[j]);
2535         }
2536         for(i=0;i<=nr_fold;i++)
2537                 fold_start[i]=i*l/nr_fold;
2538
2539         for(i=0;i<nr_fold;i++)
2540         {
2541                 int begin = fold_start[i];
2542                 int end = fold_start[i+1];
2543                 int j,k;
2544
2545                 subprob[i].bias = prob->bias;
2546                 subprob[i].n = prob->n;
2547                 subprob[i].l = l-(end-begin);
2548                 subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
2549                 subprob[i].y = Malloc(double,subprob[i].l);
2550
2551                 k=0;
2552                 for(j=0;j<begin;j++)
2553                 {
2554                         subprob[i].x[k] = prob->x[perm[j]];
2555                         subprob[i].y[k] = prob->y[perm[j]];
2556                         ++k;
2557                 }
2558                 for(j=end;j<l;j++)
2559                 {
2560                         subprob[i].x[k] = prob->x[perm[j]];
2561                         subprob[i].y[k] = prob->y[perm[j]];
2562                         ++k;
2563                 }
2564
2565         }
2566
2567         *best_rate = 0;
2568         if(start_C <= 0)
2569                 start_C = calc_start_C(prob,param);
2570         param1.C = start_C;
2571
2572         while(param1.C <= max_C)
2573         {
2574                 //Output disabled for running CV at a particular C
2575                 set_print_string_function(&print_null);
2576
2577                 for(i=0; i<nr_fold; i++)
2578                 {
2579                         int j;
2580                         int begin = fold_start[i];
2581                         int end = fold_start[i+1];
2582
2583                         param1.init_sol = prev_w[i];
2584                         struct model *submodel = train(&subprob[i],&param1);
2585
2586                         int total_w_size;
2587                         if(submodel->nr_class == 2)
2588                                 total_w_size = subprob[i].n;
2589                         else
2590                                 total_w_size = subprob[i].n * submodel->nr_class;
2591
2592                         if(prev_w[i] == NULL)
2593                         {
2594                                 prev_w[i] = Malloc(double, total_w_size);
2595                                 for(j=0; j<total_w_size; j++)
2596                                         prev_w[i][j] = submodel->w[j];
2597                         }
2598                         else if(num_unchanged_w >= 0)
2599                         {
2600                                 double norm_w_diff = 0;
2601                                 for(j=0; j<total_w_size; j++)
2602                                 {
2603                                         norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
2604                                         prev_w[i][j] = submodel->w[j];
2605                                 }
2606                                 norm_w_diff = sqrt(norm_w_diff);
2607
2608                                 if(norm_w_diff > 1e-15)
2609                                         num_unchanged_w = -1;
2610                         }
2611                         else
2612                         {
2613                                 for(j=0; j<total_w_size; j++)
2614                                         prev_w[i][j] = submodel->w[j];
2615                         }
2616
2617                         for(j=begin; j<end; j++)
2618                                 target[perm[j]] = predict(submodel,prob->x[perm[j]]);
2619
2620                         free_and_destroy_model(&submodel);
2621                 }
2622                 set_print_string_function(default_print_string);
2623
2624                 int total_correct = 0;
2625                 for(i=0; i<prob->l; i++)
2626                         if(target[i] == prob->y[i])
2627                                 ++total_correct;
2628                 double current_rate = (double)total_correct/prob->l;
2629                 if(current_rate > *best_rate)
2630                 {
2631                         *best_C = param1.C;
2632                         *best_rate = current_rate;
2633                 }
2634
2635                 info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
2636                 num_unchanged_w++;
2637                 if(num_unchanged_w == 3)
2638                         break;
2639                 param1.C = param1.C*ratio;
2640         }
2641
2642         if(param1.C > max_C && max_C > start_C)
2643                 info("warning: maximum C reached.\n");
2644         free(fold_start);
2645         free(perm);
2646         free(target);
2647         for(i=0; i<nr_fold; i++)
2648         {
2649                 free(subprob[i].x);
2650                 free(subprob[i].y);
2651                 free(prev_w[i]);
2652         }
2653         free(prev_w);
2654         free(subprob);
2655 }
2656
2657 double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
2658 {
2659         int idx;
2660         int n;
2661         if(model_->bias>=0)
2662                 n=model_->nr_feature+1;
2663         else
2664                 n=model_->nr_feature;
2665         double *w=model_->w;
2666         int nr_class=model_->nr_class;
2667         int i;
2668         int nr_w;
2669         if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
2670                 nr_w = 1;
2671         else
2672                 nr_w = nr_class;
2673
2674         const feature_node *lx=x;
2675         for(i=0;i<nr_w;i++)
2676                 dec_values[i] = 0;
2677         for(; (idx=lx->index)!=-1; lx++)
2678         {
2679                 // the dimension of testing data may exceed that of training
2680                 if(idx<=n)
2681                         for(i=0;i<nr_w;i++)
2682                                 dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
2683         }
2684
2685         if(nr_class==2)
2686         {
2687                 if(check_regression_model(model_))
2688                         return dec_values[0];
2689                 else
2690                         return (dec_values[0]>0)?model_->label[0]:model_->label[1];
2691         }
2692         else
2693         {
2694                 int dec_max_idx = 0;
2695                 for(i=1;i<nr_class;i++)
2696                 {
2697                         if(dec_values[i] > dec_values[dec_max_idx])
2698                                 dec_max_idx = i;
2699                 }
2700                 return model_->label[dec_max_idx];
2701         }
2702 }
2703
2704 double predict(const model *model_, const feature_node *x)
2705 {
2706         double *dec_values = Malloc(double, model_->nr_class);
2707         double label=predict_values(model_, x, dec_values);
2708         free(dec_values);
2709         return label;
2710 }
2711
2712 double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
2713 {
2714         if(check_probability_model(model_))
2715         {
2716                 int i;
2717                 int nr_class=model_->nr_class;
2718                 int nr_w;
2719                 if(nr_class==2)
2720                         nr_w = 1;
2721                 else
2722                         nr_w = nr_class;
2723
2724                 double label=predict_values(model_, x, prob_estimates);
2725                 for(i=0;i<nr_w;i++)
2726                         prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
2727
2728                 if(nr_class==2) // for binary classification
2729                         prob_estimates[1]=1.-prob_estimates[0];
2730                 else
2731                 {
2732                         double sum=0;
2733                         for(i=0; i<nr_class; i++)
2734                                 sum+=prob_estimates[i];
2735
2736                         for(i=0; i<nr_class; i++)
2737                                 prob_estimates[i]=prob_estimates[i]/sum;
2738                 }
2739
2740                 return label;
2741         }
2742         else
2743                 return 0;
2744 }
2745
2746 static const char *solver_type_table[]=
2747 {
2748         "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
2749         "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
2750         "", "", "",
2751         "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL
2752 };
2753
2754 int save_model(const char *model_file_name, const struct model *model_)
2755 {
2756         int i;
2757         int nr_feature=model_->nr_feature;
2758         int n;
2759         const parameter& param = model_->param;
2760
2761         if(model_->bias>=0)
2762                 n=nr_feature+1;
2763         else
2764                 n=nr_feature;
2765         int w_size = n;
2766         FILE *fp = fopen(model_file_name,"w");
2767         if(fp==NULL) return -1;
2768
2769         char *old_locale = setlocale(LC_ALL, NULL);
2770         if (old_locale)
2771         {
2772                 old_locale = strdup(old_locale);
2773         }
2774         setlocale(LC_ALL, "C");
2775
2776         int nr_w;
2777         if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
2778                 nr_w=1;
2779         else
2780                 nr_w=model_->nr_class;
2781
2782         fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
2783         fprintf(fp, "nr_class %d\n", model_->nr_class);
2784
2785         if(model_->label)
2786         {
2787                 fprintf(fp, "label");
2788                 for(i=0; i<model_->nr_class; i++)
2789                         fprintf(fp, " %d", model_->label[i]);
2790                 fprintf(fp, "\n");
2791         }
2792
2793         fprintf(fp, "nr_feature %d\n", nr_feature);
2794
2795         fprintf(fp, "bias %.17g\n", model_->bias);
2796
2797         fprintf(fp, "w\n");
2798         for(i=0; i<w_size; i++)
2799         {
2800                 int j;
2801                 for(j=0; j<nr_w; j++)
2802                         fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
2803                 fprintf(fp, "\n");
2804         }
2805
2806         setlocale(LC_ALL, old_locale);
2807         free(old_locale);
2808
2809         if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2810         else return 0;
2811 }
2812
2813 //
2814 // FSCANF helps to handle fscanf failures.
2815 // Its do-while block avoids the ambiguity when
2816 // if (...)
2817 //    FSCANF();
2818 // is used
2819 //
2820 #define FSCANF(_stream, _format, _var)do\
2821 {\
2822         if (fscanf(_stream, _format, _var) != 1)\
2823         {\
2824                 fprintf(stderr, "ERROR: fscanf failed to read the model\n");\
2825                 EXIT_LOAD_MODEL()\
2826         }\
2827 }while(0)
2828 // EXIT_LOAD_MODEL should NOT end with a semicolon.
2829 #define EXIT_LOAD_MODEL()\
2830 {\
2831         setlocale(LC_ALL, old_locale);\
2832         free(model_->label);\
2833         free(model_);\
2834         free(old_locale);\
2835         return NULL;\
2836 }
2837 struct model *load_model(const char *model_file_name)
2838 {
2839         FILE *fp = fopen(model_file_name,"r");
2840         if(fp==NULL) return NULL;
2841
2842         int i;
2843         int nr_feature;
2844         int n;
2845         int nr_class;
2846         double bias;
2847         model *model_ = Malloc(model,1);
2848         parameter& param = model_->param;
2849         // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
2850         param.nr_weight = 0;
2851         param.weight_label = NULL;
2852         param.weight = NULL;
2853         param.init_sol = NULL;
2854
2855         model_->label = NULL;
2856
2857         char *old_locale = setlocale(LC_ALL, NULL);
2858         if (old_locale)
2859         {
2860                 old_locale = strdup(old_locale);
2861         }
2862         setlocale(LC_ALL, "C");
2863
2864         char cmd[81];
2865         while(1)
2866         {
2867                 FSCANF(fp,"%80s",cmd);
2868                 if(strcmp(cmd,"solver_type")==0)
2869                 {
2870                         FSCANF(fp,"%80s",cmd);
2871                         int i;
2872                         for(i=0;solver_type_table[i];i++)
2873                         {
2874                                 if(strcmp(solver_type_table[i],cmd)==0)
2875                                 {
2876                                         param.solver_type=i;
2877                                         break;
2878                                 }
2879                         }
2880                         if(solver_type_table[i] == NULL)
2881                         {
2882                                 fprintf(stderr,"unknown solver type.\n");
2883                                 EXIT_LOAD_MODEL()
2884                         }
2885                 }
2886                 else if(strcmp(cmd,"nr_class")==0)
2887                 {
2888                         FSCANF(fp,"%d",&nr_class);
2889                         model_->nr_class=nr_class;
2890                 }
2891                 else if(strcmp(cmd,"nr_feature")==0)
2892                 {
2893                         FSCANF(fp,"%d",&nr_feature);
2894                         model_->nr_feature=nr_feature;
2895                 }
2896                 else if(strcmp(cmd,"bias")==0)
2897                 {
2898                         FSCANF(fp,"%lf",&bias);
2899                         model_->bias=bias;
2900                 }
2901                 else if(strcmp(cmd,"w")==0)
2902                 {
2903                         break;
2904                 }
2905                 else if(strcmp(cmd,"label")==0)
2906                 {
2907                         int nr_class = model_->nr_class;
2908                         model_->label = Malloc(int,nr_class);
2909                         for(int i=0;i<nr_class;i++)
2910                                 FSCANF(fp,"%d",&model_->label[i]);
2911                 }
2912                 else
2913                 {
2914                         fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2915                         EXIT_LOAD_MODEL()
2916                 }
2917         }
2918
2919         nr_feature=model_->nr_feature;
2920         if(model_->bias>=0)
2921                 n=nr_feature+1;
2922         else
2923                 n=nr_feature;
2924         int w_size = n;
2925         int nr_w;
2926         if(nr_class==2 && param.solver_type != MCSVM_CS)
2927                 nr_w = 1;
2928         else
2929                 nr_w = nr_class;
2930
2931         model_->w=Malloc(double, w_size*nr_w);
2932         for(i=0; i<w_size; i++)
2933         {
2934                 int j;
2935                 for(j=0; j<nr_w; j++)
2936                         FSCANF(fp, "%lf ", &model_->w[i*nr_w+j]);
2937         }
2938
2939         setlocale(LC_ALL, old_locale);
2940         free(old_locale);
2941
2942         if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
2943
2944         return model_;
2945 }
2946
2947 int get_nr_feature(const model *model_)
2948 {
2949         return model_->nr_feature;
2950 }
2951
2952 int get_nr_class(const model *model_)
2953 {
2954         return model_->nr_class;
2955 }
2956
2957 void get_labels(const model *model_, int* label)
2958 {
2959         if (model_->label != NULL)
2960                 for(int i=0;i<model_->nr_class;i++)
2961                         label[i] = model_->label[i];
2962 }
2963
2964 // use inline here for better performance (around 20% faster than the non-inline one)
2965 static inline double get_w_value(const struct model *model_, int idx, int label_idx)
2966 {
2967         int nr_class = model_->nr_class;
2968         int solver_type = model_->param.solver_type;
2969         const double *w = model_->w;
2970
2971         if(idx < 0 || idx > model_->nr_feature)
2972                 return 0;
2973         if(check_regression_model(model_))
2974                 return w[idx];
2975         else
2976         {
2977                 if(label_idx < 0 || label_idx >= nr_class)
2978                         return 0;
2979                 if(nr_class == 2 && solver_type != MCSVM_CS)
2980                 {
2981                         if(label_idx == 0)
2982                                 return w[idx];
2983                         else
2984                                 return -w[idx];
2985                 }
2986                 else
2987                         return w[idx*nr_class+label_idx];
2988         }
2989 }
2990
2991 // feat_idx: starting from 1 to nr_feature
2992 // label_idx: starting from 0 to nr_class-1 for classification models;
2993 //            for regression models, label_idx is ignored.
2994 double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
2995 {
2996         if(feat_idx > model_->nr_feature)
2997                 return 0;
2998         return get_w_value(model_, feat_idx-1, label_idx);
2999 }
3000
3001 double get_decfun_bias(const struct model *model_, int label_idx)
3002 {
3003         int bias_idx = model_->nr_feature;
3004         double bias = model_->bias;
3005         if(bias <= 0)
3006                 return 0;
3007         else
3008                 return bias*get_w_value(model_, bias_idx, label_idx);
3009 }
3010
3011 void free_model_content(struct model *model_ptr)
3012 {
3013         if(model_ptr->w != NULL)
3014                 free(model_ptr->w);
3015         if(model_ptr->label != NULL)
3016                 free(model_ptr->label);
3017 }
3018
3019 void free_and_destroy_model(struct model **model_ptr_ptr)
3020 {
3021         struct model *model_ptr = *model_ptr_ptr;
3022         if(model_ptr != NULL)
3023         {
3024                 free_model_content(model_ptr);
3025                 free(model_ptr);
3026         }
3027 }
3028
3029 void destroy_param(parameter* param)
3030 {
3031         if(param->weight_label != NULL)
3032                 free(param->weight_label);
3033         if(param->weight != NULL)
3034                 free(param->weight);
3035         if(param->init_sol != NULL)
3036                 free(param->init_sol);
3037 }
3038
3039 const char *check_parameter(const problem *prob, const parameter *param)
3040 {
3041         if(param->eps <= 0)
3042                 return "eps <= 0";
3043
3044         if(param->C <= 0)
3045                 return "C <= 0";
3046
3047         if(param->p < 0)
3048                 return "p < 0";
3049
3050         if(param->solver_type != L2R_LR
3051                 && param->solver_type != L2R_L2LOSS_SVC_DUAL
3052                 && param->solver_type != L2R_L2LOSS_SVC
3053                 && param->solver_type != L2R_L1LOSS_SVC_DUAL
3054                 && param->solver_type != MCSVM_CS
3055                 && param->solver_type != L1R_L2LOSS_SVC
3056                 && param->solver_type != L1R_LR
3057                 && param->solver_type != L2R_LR_DUAL
3058                 && param->solver_type != L2R_L2LOSS_SVR
3059                 && param->solver_type != L2R_L2LOSS_SVR_DUAL
3060                 && param->solver_type != L2R_L1LOSS_SVR_DUAL)
3061                 return "unknown solver type";
3062
3063         if(param->init_sol != NULL
3064                 && param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC)
3065                 return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC";
3066
3067         return NULL;
3068 }
3069
3070 int check_probability_model(const struct model *model_)
3071 {
3072         return (model_->param.solver_type==L2R_LR ||
3073                         model_->param.solver_type==L2R_LR_DUAL ||
3074                         model_->param.solver_type==L1R_LR);
3075 }
3076
3077 int check_regression_model(const struct model *model_)
3078 {
3079         return (model_->param.solver_type==L2R_L2LOSS_SVR ||
3080                         model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
3081                         model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
3082 }
3083
3084 void set_print_string_function(void (*print_func)(const char*))
3085 {
3086         if (print_func == NULL)
3087                 liblinear_print_string = &print_string_stdout;
3088         else
3089                 liblinear_print_string = print_func;
3090 }
3091