]> granicus.if.org Git - libass/blob - libass/ass.c
Rename some preprocessor directives from CONFIG_* to HAVE_* where appropriate;
[libass] / libass / ass.c
1 // -*- c-basic-offset: 8; indent-tabs-mode: t -*-
2 // vim:ts=8:sw=8:noet:ai:
3 /*
4  * Copyright (C) 2006 Evgeniy Stepanov <eugeni.stepanov@gmail.com>
5  *
6  * This file is part of libass.
7  *
8  * libass is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * libass is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with libass; if not, write to the Free Software Foundation, Inc.,
20  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  */
22
23 #include "config.h"
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <assert.h>
29 #include <errno.h>
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <unistd.h>
33 #include <inttypes.h>
34
35 #ifdef HAVE_ICONV
36 #include <iconv.h>
37 #endif
38
39 #include "ass.h"
40 #include "ass_utils.h"
41 #include "ass_library.h"
42 #include "mputils.h"
43
44 typedef enum {PST_UNKNOWN = 0, PST_INFO, PST_STYLES, PST_EVENTS, PST_FONTS} parser_state_t;
45
46 struct parser_priv_s {
47         parser_state_t state;
48         char* fontname;
49         char* fontdata;
50         int fontdata_size;
51         int fontdata_used;
52 };
53
54 #define ASS_STYLES_ALLOC 20
55 #define ASS_EVENTS_ALLOC 200
56
57 void ass_free_track(ass_track_t* track) {
58         int i;
59         
60         if (track->parser_priv) {
61                 if (track->parser_priv->fontname)
62                         free(track->parser_priv->fontname);
63                 if (track->parser_priv->fontdata)
64                         free(track->parser_priv->fontdata);
65                 free(track->parser_priv);
66         }
67         if (track->style_format)
68                 free(track->style_format);
69         if (track->event_format)
70                 free(track->event_format);
71         if (track->styles) {
72                 for (i = 0; i < track->n_styles; ++i)
73                         ass_free_style(track, i);
74                 free(track->styles);
75         }
76         if (track->events) {
77                 for (i = 0; i < track->n_events; ++i)
78                         ass_free_event(track, i);
79                 free(track->events);
80         }
81 }
82
83 /// \brief Allocate a new style struct
84 /// \param track track
85 /// \return style id
86 int ass_alloc_style(ass_track_t* track) {
87         int sid;
88         
89         assert(track->n_styles <= track->max_styles);
90
91         if (track->n_styles == track->max_styles) {
92                 track->max_styles += ASS_STYLES_ALLOC;
93                 track->styles = (ass_style_t*)realloc(track->styles, sizeof(ass_style_t)*track->max_styles);
94         }
95         
96         sid = track->n_styles++;
97         memset(track->styles + sid, 0, sizeof(ass_style_t));
98         return sid;
99 }
100
101 /// \brief Allocate a new event struct
102 /// \param track track
103 /// \return event id
104 int ass_alloc_event(ass_track_t* track) {
105         int eid;
106         
107         assert(track->n_events <= track->max_events);
108
109         if (track->n_events == track->max_events) {
110                 track->max_events += ASS_EVENTS_ALLOC;
111                 track->events = (ass_event_t*)realloc(track->events, sizeof(ass_event_t)*track->max_events);
112         }
113         
114         eid = track->n_events++;
115         memset(track->events + eid, 0, sizeof(ass_event_t));
116         return eid;
117 }
118
119 void ass_free_event(ass_track_t* track, int eid) {
120         ass_event_t* event = track->events + eid;
121         if (event->Name)
122                 free(event->Name);
123         if (event->Effect)
124                 free(event->Effect);
125         if (event->Text)
126                 free(event->Text);
127         if (event->render_priv)
128                 free(event->render_priv);
129 }
130
131 void ass_free_style(ass_track_t* track, int sid) {
132         ass_style_t* style = track->styles + sid;
133         if (style->Name)
134                 free(style->Name);
135         if (style->FontName)
136                 free(style->FontName);
137 }
138
139 // ==============================================================================================
140
141 static void skip_spaces(char** str) {
142         char* p = *str;
143         while ((*p==' ') || (*p=='\t'))
144                 ++p;
145         *str = p;
146 }
147
148 static void rskip_spaces(char** str, char* limit) {
149         char* p = *str;
150         while ((p >= limit) && ((*p==' ') || (*p=='\t')))
151                 --p;
152         *str = p;
153 }
154
155 /**
156  * \brief find style by name
157  * \param track track
158  * \param name style name
159  * \return index in track->styles
160  * Returnes 0 if no styles found => expects at least 1 style.
161  * Parsing code always adds "Default" style in the end.
162  */
163 static int lookup_style(ass_track_t* track, char* name) {
164         int i;
165         if (*name == '*') ++name; // FIXME: what does '*' really mean ?
166         for (i = track->n_styles - 1; i >= 0; --i) {
167                 // FIXME: mb strcasecmp ?
168                 if (strcmp(track->styles[i].Name, name) == 0)
169                         return i;
170         }
171         i = track->default_style;
172         mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_LIBASS_NoStyleNamedXFoundUsingY, track, name, track->styles[i].Name);
173         return i; // use the first style
174 }
175
176 static uint32_t string2color(char* p) {
177         uint32_t tmp;
178         (void)strtocolor(&p, &tmp);
179         return tmp;
180 }
181
182 static long long string2timecode(char* p) {
183         unsigned h, m, s, ms;
184         long long tm;
185         int res = sscanf(p, "%1d:%2d:%2d.%2d", &h, &m, &s, &ms);
186         if (res < 4) {
187                 mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_LIBASS_BadTimestamp);
188                 return 0;
189         }
190         tm = ((h * 60 + m) * 60 + s) * 1000 + ms * 10;
191         return tm;
192 }
193
194 /**
195  * \brief converts numpad-style align to align.
196  */
197 static int numpad2align(int val) {
198         int res, v;
199         v = (val - 1) / 3; // 0, 1 or 2 for vertical alignment
200         if (v != 0) v = 3 - v;
201         res = ((val - 1) % 3) + 1; // horizontal alignment
202         res += v*4;
203         return res;
204 }
205
206 #define NEXT(str,token) \
207         token = next_token(&str); \
208         if (!token) break;
209
210 #define ANYVAL(name,func) \
211         } else if (strcasecmp(tname, #name) == 0) { \
212                 target->name = func(token); \
213                 mp_msg(MSGT_ASS, MSGL_DBG2, "%s = %s\n", #name, token);
214
215 #define STRVAL(name) \
216         } else if (strcasecmp(tname, #name) == 0) { \
217                 if (target->name != NULL) free(target->name); \
218                 target->name = strdup(token); \
219                 mp_msg(MSGT_ASS, MSGL_DBG2, "%s = %s\n", #name, token);
220                 
221 #define COLORVAL(name) ANYVAL(name,string2color)
222 #define INTVAL(name) ANYVAL(name,atoi)
223 #define FPVAL(name) ANYVAL(name,atof)
224 #define TIMEVAL(name) ANYVAL(name,string2timecode)
225 #define STYLEVAL(name) \
226         } else if (strcasecmp(tname, #name) == 0) { \
227                 target->name = lookup_style(track, token); \
228                 mp_msg(MSGT_ASS, MSGL_DBG2, "%s = %s\n", #name, token);
229
230 #define ALIAS(alias,name) \
231         if (strcasecmp(tname, #alias) == 0) {tname = #name;}
232
233 static char* next_token(char** str) {
234         char* p = *str;
235         char* start;
236         skip_spaces(&p);
237         if (*p == '\0') {
238                 *str = p;
239                 return 0;
240         }
241         start = p; // start of the token
242         for (; (*p != '\0') && (*p != ','); ++p) {}
243         if (*p == '\0') {
244                 *str = p; // eos found, str will point to '\0' at exit
245         } else {
246                 *p = '\0';
247                 *str = p + 1; // ',' found, str will point to the next char (beginning of the next token)
248         }
249         --p; // end of current token
250         rskip_spaces(&p, start);
251         if (p < start)
252                 p = start; // empty token
253         else
254                 ++p; // the first space character, or '\0'
255         *p = '\0';
256         return start;
257 }
258 /**
259  * \brief Parse the tail of Dialogue line
260  * \param track track
261  * \param event parsed data goes here
262  * \param str string to parse, zero-terminated
263  * \param n_ignored number of format options to skip at the beginning
264 */ 
265 static int process_event_tail(ass_track_t* track, ass_event_t* event, char* str, int n_ignored)
266 {
267         char* token;
268         char* tname;
269         char* p = str;
270         int i;
271         ass_event_t* target = event;
272
273         char* format = strdup(track->event_format);
274         char* q = format; // format scanning pointer
275
276         if (track->n_styles == 0) {
277                 // add "Default" style to the end
278                 // will be used if track does not contain a default style (or even does not contain styles at all)
279                 int sid = ass_alloc_style(track);
280                 track->styles[sid].Name = strdup("Default");
281                 track->styles[sid].FontName = strdup("Arial");
282         }
283
284         for (i = 0; i < n_ignored; ++i) {
285                 NEXT(q, tname);
286         }
287
288         while (1) {
289                 NEXT(q, tname);
290                 if (strcasecmp(tname, "Text") == 0) {
291                         char* last;
292                         event->Text = strdup(p);
293                         if (*event->Text != 0) {
294                                 last = event->Text + strlen(event->Text) - 1;
295                                 if (last >= event->Text && *last == '\r')
296                                         *last = 0;
297                         }
298                         mp_msg(MSGT_ASS, MSGL_DBG2, "Text = %s\n", event->Text);
299                         event->Duration -= event->Start;
300                         free(format);
301                         return 0; // "Text" is always the last
302                 }
303                 NEXT(p, token);
304
305                 ALIAS(End,Duration) // temporarily store end timecode in event->Duration
306                 if (0) { // cool ;)
307                         INTVAL(Layer)
308                         STYLEVAL(Style)
309                         STRVAL(Name)
310                         STRVAL(Effect)
311                         INTVAL(MarginL)
312                         INTVAL(MarginR)
313                         INTVAL(MarginV)
314                         TIMEVAL(Start)
315                         TIMEVAL(Duration)
316                 }
317         }
318         free(format);
319         return 1;
320 }
321
322 /**
323  * \brief Parse command line style overrides (--ass-force-style option)
324  * \param track track to apply overrides to
325  * The format for overrides is [StyleName.]Field=Value
326  */
327 void process_force_style(ass_track_t* track) {
328         char **fs, *eq, *dt, *style, *tname, *token;
329         ass_style_t* target;
330         int sid;
331         char** list = track->library->style_overrides;
332         
333         if (!list) return;
334         
335         for (fs = list; *fs; ++fs) {
336                 eq = strrchr(*fs, '=');
337                 if (!eq)
338                         continue;
339                 *eq = '\0';
340                 token = eq + 1;
341
342                 if(!strcasecmp(*fs, "PlayResX"))
343                         track->PlayResX = atoi(token);
344                 else if(!strcasecmp(*fs, "PlayResY"))
345                         track->PlayResY = atoi(token);
346                 else if(!strcasecmp(*fs, "Timer"))
347                         track->Timer = atof(token);
348                 else if(!strcasecmp(*fs, "WrapStyle"))
349                         track->WrapStyle = atoi(token);
350
351                 dt = strrchr(*fs, '.');
352                 if (dt) {
353                         *dt = '\0';
354                         style = *fs;
355                         tname = dt + 1;
356                 } else {
357                         style = NULL;
358                         tname = *fs;
359                 }
360                 for (sid = 0; sid < track->n_styles; ++sid) {
361                         if (style == NULL || strcasecmp(track->styles[sid].Name, style) == 0) {
362                                 target = track->styles + sid;
363                                 if (0) {
364                                         STRVAL(FontName)
365                                         COLORVAL(PrimaryColour)
366                                         COLORVAL(SecondaryColour)
367                                         COLORVAL(OutlineColour)
368                                         COLORVAL(BackColour)
369                                         FPVAL(FontSize)
370                                         INTVAL(Bold)
371                                         INTVAL(Italic)
372                                         INTVAL(Underline)
373                                         INTVAL(StrikeOut)
374                                         FPVAL(Spacing)
375                                         INTVAL(Angle)
376                                         INTVAL(BorderStyle)
377                                         INTVAL(Alignment)
378                                         INTVAL(MarginL)
379                                         INTVAL(MarginR)
380                                         INTVAL(MarginV)
381                                         INTVAL(Encoding)
382                                         FPVAL(ScaleX)
383                                         FPVAL(ScaleY)
384                                         FPVAL(Outline)
385                                         FPVAL(Shadow)
386                                 }
387                         }
388                 }
389                 *eq = '=';
390                 if (dt) *dt = '.';
391         }
392 }
393
394 /**
395  * \brief Parse the Style line
396  * \param track track
397  * \param str string to parse, zero-terminated
398  * Allocates a new style struct.
399 */ 
400 static int process_style(ass_track_t* track, char *str)
401 {
402
403         char* token;
404         char* tname;
405         char* p = str;
406         char* format;
407         char* q; // format scanning pointer
408         int sid;
409         ass_style_t* style;
410         ass_style_t* target;
411
412         if (!track->style_format) {
413                 // no style format header
414                 // probably an ancient script version
415                 if (track->track_type == TRACK_TYPE_SSA)
416                         track->style_format = strdup("Name, Fontname, Fontsize, PrimaryColour, SecondaryColour,"
417                                         "TertiaryColour, BackColour, Bold, Italic, BorderStyle, Outline,"
418                                         "Shadow, Alignment, MarginL, MarginR, MarginV, AlphaLevel, Encoding");
419                 else
420                         track->style_format = strdup("Name, Fontname, Fontsize, PrimaryColour, SecondaryColour,"
421                                         "OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut,"
422                                         "ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow,"
423                                         "Alignment, MarginL, MarginR, MarginV, Encoding");
424         }
425
426         q = format = strdup(track->style_format);
427         
428         mp_msg(MSGT_ASS, MSGL_V, "[%p] Style: %s\n", track, str);
429         
430         sid = ass_alloc_style(track);
431
432         style = track->styles + sid;
433         target = style;
434 // fill style with some default values
435         style->ScaleX = 100.;
436         style->ScaleY = 100.;
437         
438         while (1) {
439                 NEXT(q, tname);
440                 NEXT(p, token);
441                 
442 //              ALIAS(TertiaryColour,OutlineColour) // ignore TertiaryColour; it appears only in SSA, and is overridden by BackColour
443                         
444                 if (0) { // cool ;)
445                         STRVAL(Name)
446                                 if ((strcmp(target->Name, "Default")==0) || (strcmp(target->Name, "*Default")==0))
447                                         track->default_style = sid;
448                         STRVAL(FontName)
449                         COLORVAL(PrimaryColour)
450                         COLORVAL(SecondaryColour)
451                         COLORVAL(OutlineColour) // TertiaryColor
452                         COLORVAL(BackColour)
453                                 // SSA uses BackColour for both outline and shadow
454                                 // this will destroy SSA's TertiaryColour, but i'm not going to use it anyway
455                                 if (track->track_type == TRACK_TYPE_SSA)
456                                         target->OutlineColour = target->BackColour;
457                         FPVAL(FontSize)
458                         INTVAL(Bold)
459                         INTVAL(Italic)
460                         INTVAL(Underline)
461                         INTVAL(StrikeOut)
462                         FPVAL(Spacing)
463                         INTVAL(Angle)
464                         INTVAL(BorderStyle)
465                         INTVAL(Alignment)
466                                 if (track->track_type == TRACK_TYPE_ASS)
467                                         target->Alignment = numpad2align(target->Alignment);
468                         INTVAL(MarginL)
469                         INTVAL(MarginR)
470                         INTVAL(MarginV)
471                         INTVAL(Encoding)
472                         FPVAL(ScaleX)
473                         FPVAL(ScaleY)
474                         FPVAL(Outline)
475                         FPVAL(Shadow)
476                 }
477         }
478         style->ScaleX /= 100.;
479         style->ScaleY /= 100.;
480         style->Bold = !!style->Bold;
481         style->Italic = !!style->Italic;
482         style->Underline = !!style->Underline;
483         if (!style->Name)
484                 style->Name = strdup("Default");
485         if (!style->FontName)
486                 style->FontName = strdup("Arial");
487         // skip '@' at the start of the font name
488         if (*style->FontName == '@') {
489                 p = style->FontName;
490                 style->FontName = strdup(p + 1);
491                 free(p);
492         }
493         free(format);
494         return 0;
495         
496 }
497
498 static int process_styles_line(ass_track_t* track, char *str)
499 {
500         if (!strncmp(str,"Format:", 7)) {
501                 char* p = str + 7;
502                 skip_spaces(&p);
503                 track->style_format = strdup(p);
504                 mp_msg(MSGT_ASS, MSGL_DBG2, "Style format: %s\n", track->style_format);
505         } else if (!strncmp(str,"Style:", 6)) {
506                 char* p = str + 6;
507                 skip_spaces(&p);
508                 process_style(track, p);
509         }
510         return 0;
511 }
512
513 static int process_info_line(ass_track_t* track, char *str)
514 {
515         if (!strncmp(str, "PlayResX:", 9)) {
516                 track->PlayResX = atoi(str + 9);
517         } else if (!strncmp(str,"PlayResY:", 9)) {
518                 track->PlayResY = atoi(str + 9);
519         } else if (!strncmp(str,"Timer:", 6)) {
520                 track->Timer = atof(str + 6);
521         } else if (!strncmp(str,"WrapStyle:", 10)) {
522                 track->WrapStyle = atoi(str + 10);
523         }
524         return 0;
525 }
526
527 static int process_events_line(ass_track_t* track, char *str)
528 {
529         if (!strncmp(str, "Format:", 7)) {
530                 char* p = str + 7;
531                 skip_spaces(&p);
532                 track->event_format = strdup(p);
533                 mp_msg(MSGT_ASS, MSGL_DBG2, "Event format: %s\n", track->event_format);
534         } else if (!strncmp(str, "Dialogue:", 9)) {
535                 // This should never be reached for embedded subtitles.
536                 // They have slightly different format and are parsed in ass_process_chunk,
537                 // called directly from demuxer
538                 int eid;
539                 ass_event_t* event;
540                 
541                 str += 9;
542                 skip_spaces(&str);
543
544                 eid = ass_alloc_event(track);
545                 event = track->events + eid;
546
547                 process_event_tail(track, event, str, 0);
548         } else {
549                 mp_msg(MSGT_ASS, MSGL_V, "Not understood: %s  \n", str);
550         }
551         return 0;
552 }
553
554 // Copied from mkvtoolnix
555 static unsigned char* decode_chars(unsigned char c1, unsigned char c2,
556                 unsigned char c3, unsigned char c4, unsigned char* dst, int cnt)
557 {
558         uint32_t value;
559         unsigned char bytes[3];
560         int i;
561
562         value = ((c1 - 33) << 18) + ((c2 - 33) << 12) + ((c3 - 33) << 6) + (c4 - 33);
563         bytes[2] = value & 0xff;
564         bytes[1] = (value & 0xff00) >> 8;
565         bytes[0] = (value & 0xff0000) >> 16;
566
567         for (i = 0; i < cnt; ++i)
568                 *dst++ = bytes[i];
569         return dst;
570 }
571
572 static int decode_font(ass_track_t* track)
573 {
574         unsigned char* p;
575         unsigned char* q;
576         int i;
577         int size; // original size
578         int dsize; // decoded size
579         unsigned char* buf = 0;
580
581         mp_msg(MSGT_ASS, MSGL_V, "font: %d bytes encoded data \n", track->parser_priv->fontdata_used);
582         size = track->parser_priv->fontdata_used;
583         if (size % 4 == 1) {
584                 mp_msg(MSGT_ASS, MSGL_ERR, MSGTR_LIBASS_BadEncodedDataSize);
585                 goto error_decode_font;
586         }
587         buf = malloc(size / 4 * 3 + 2);
588         q = buf;
589         for (i = 0, p = (unsigned char*)track->parser_priv->fontdata; i < size / 4; i++, p+=4) {
590                 q = decode_chars(p[0], p[1], p[2], p[3], q, 3);
591         }
592         if (size % 4 == 2) {
593                 q = decode_chars(p[0], p[1], 0, 0, q, 1);
594         } else if (size % 4 == 3) {
595                 q = decode_chars(p[0], p[1], p[2], 0, q, 2);
596         }
597         dsize = q - buf;
598         assert(dsize <= size / 4 * 3 + 2);
599         
600         if (track->library->extract_fonts) {
601                 ass_add_font(track->library, track->parser_priv->fontname, (char*)buf, dsize);
602                 buf = 0;
603         }
604
605 error_decode_font:
606         if (buf) free(buf);
607         free(track->parser_priv->fontname);
608         free(track->parser_priv->fontdata);
609         track->parser_priv->fontname = 0;
610         track->parser_priv->fontdata = 0;
611         track->parser_priv->fontdata_size = 0;
612         track->parser_priv->fontdata_used = 0;
613         return 0;
614 }
615
616 static int process_fonts_line(ass_track_t* track, char *str)
617 {
618         int len;
619
620         if (!strncmp(str, "fontname:", 9)) {
621                 char* p = str + 9;
622                 skip_spaces(&p);
623                 if (track->parser_priv->fontname) {
624                         decode_font(track);
625                 }
626                 track->parser_priv->fontname = strdup(p);
627                 mp_msg(MSGT_ASS, MSGL_V, "fontname: %s\n", track->parser_priv->fontname);
628                 return 0;
629         }
630         
631         if (!track->parser_priv->fontname) {
632                 mp_msg(MSGT_ASS, MSGL_V, "Not understood: %s  \n", str);
633                 return 0;
634         }
635
636         len = strlen(str);
637         if (len > 80) {
638                 mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_LIBASS_FontLineTooLong, len, str);
639                 return 0;
640         }
641         if (track->parser_priv->fontdata_used + len > track->parser_priv->fontdata_size) {
642                 track->parser_priv->fontdata_size += 100 * 1024;
643                 track->parser_priv->fontdata = realloc(track->parser_priv->fontdata, track->parser_priv->fontdata_size);
644         }
645         memcpy(track->parser_priv->fontdata + track->parser_priv->fontdata_used, str, len);
646         track->parser_priv->fontdata_used += len;
647         
648         return 0;
649 }
650
651 /**
652  * \brief Parse a header line
653  * \param track track
654  * \param str string to parse, zero-terminated
655 */ 
656 static int process_line(ass_track_t* track, char *str)
657 {
658         if (!strncasecmp(str, "[Script Info]", 13)) {
659                 track->parser_priv->state = PST_INFO;
660         } else if (!strncasecmp(str, "[V4 Styles]", 11)) {
661                 track->parser_priv->state = PST_STYLES;
662                 track->track_type = TRACK_TYPE_SSA;
663         } else if (!strncasecmp(str, "[V4+ Styles]", 12)) {
664                 track->parser_priv->state = PST_STYLES;
665                 track->track_type = TRACK_TYPE_ASS;
666         } else if (!strncasecmp(str, "[Events]", 8)) {
667                 track->parser_priv->state = PST_EVENTS;
668         } else if (!strncasecmp(str, "[Fonts]", 7)) {
669                 track->parser_priv->state = PST_FONTS;
670         } else {
671                 switch (track->parser_priv->state) {
672                 case PST_INFO:
673                         process_info_line(track, str);
674                         break;
675                 case PST_STYLES:
676                         process_styles_line(track, str);
677                         break;
678                 case PST_EVENTS:
679                         process_events_line(track, str);
680                         break;
681                 case PST_FONTS:
682                         process_fonts_line(track, str);
683                         break;
684                 default:
685                         break;
686                 }
687         }
688
689         // there is no explicit end-of-font marker in ssa/ass
690         if ((track->parser_priv->state != PST_FONTS) && (track->parser_priv->fontname))
691                 decode_font(track);
692
693         return 0;
694 }
695
696 static int process_text(ass_track_t* track, char* str)
697 {
698         char* p = str;
699         while(1) {
700                 char* q;
701                 while (1) {
702                         if ((*p=='\r')||(*p=='\n')) ++p;
703                         else if (p[0]=='\xef' && p[1]=='\xbb' && p[2]=='\xbf') p+=3; // U+FFFE (BOM)
704                         else break;
705                 }
706                 for (q=p; ((*q!='\0')&&(*q!='\r')&&(*q!='\n')); ++q) {};
707                 if (q==p)
708                         break;
709                 if (*q != '\0')
710                         *(q++) = '\0';
711                 process_line(track, p);
712                 if (*q == '\0')
713                         break;
714                 p = q;
715         }
716         return 0;
717 }
718
719 /**
720  * \brief Process CodecPrivate section of subtitle stream
721  * \param track track
722  * \param data string to parse
723  * \param size length of data
724  CodecPrivate section contains [Stream Info] and [V4+ Styles] ([V4 Styles] for SSA) sections
725 */ 
726 void ass_process_codec_private(ass_track_t* track, char *data, int size)
727 {
728         char* str = malloc(size + 1);
729
730         memcpy(str, data, size);
731         str[size] = '\0';
732
733         process_text(track, str);
734         free(str);
735
736         if (!track->event_format) {
737                 // probably an mkv produced by ancient mkvtoolnix
738                 // such files don't have [Events] and Format: headers
739                 track->parser_priv->state = PST_EVENTS;
740                 if (track->track_type == TRACK_TYPE_SSA)
741                         track->event_format = strdup("Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text");
742                 else
743                         track->event_format = strdup("Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text");
744         }
745
746         process_force_style(track);
747 }
748
749 static int check_duplicate_event(ass_track_t* track, int ReadOrder)
750 {
751         int i;
752         for (i = 0; i<track->n_events - 1; ++i) // ignoring last event, it is the one we are comparing with
753                 if (track->events[i].ReadOrder == ReadOrder)
754                         return 1;
755         return 0;
756 }
757
758 /**
759  * \brief Process a chunk of subtitle stream data. In Matroska, this contains exactly 1 event (or a commentary).
760  * \param track track
761  * \param data string to parse
762  * \param size length of data
763  * \param timecode starting time of the event (milliseconds)
764  * \param duration duration of the event (milliseconds)
765 */ 
766 void ass_process_chunk(ass_track_t* track, char *data, int size, long long timecode, long long duration)
767 {
768         char* str;
769         int eid;
770         char* p;
771         char* token;
772         ass_event_t* event;
773
774         if (!track->event_format) {
775                 mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_LIBASS_EventFormatHeaderMissing);
776                 return;
777         }
778         
779         str = malloc(size + 1);
780         memcpy(str, data, size);
781         str[size] = '\0';
782         mp_msg(MSGT_ASS, MSGL_V, "event at %" PRId64 ", +%" PRId64 ": %s  \n", (int64_t)timecode, (int64_t)duration, str);
783
784         eid = ass_alloc_event(track);
785         event = track->events + eid;
786
787         p = str;
788         
789         do { 
790                 NEXT(p, token);
791                 event->ReadOrder = atoi(token);
792                 if (check_duplicate_event(track, event->ReadOrder))
793                         break;
794
795                 NEXT(p, token);
796                 event->Layer = atoi(token);
797
798                 process_event_tail(track, event, p, 3);
799
800                 event->Start = timecode;
801                 event->Duration = duration;
802                 
803                 free(str);
804                 return;
805 //              dump_events(tid);
806         } while (0);
807         // some error
808         ass_free_event(track, eid);
809         track->n_events--;
810         free(str);
811 }
812
813 #ifdef HAVE_ICONV
814 /** \brief recode buffer to utf-8
815  * constraint: codepage != 0
816  * \param data pointer to text buffer
817  * \param size buffer size
818  * \return a pointer to recoded buffer, caller is responsible for freeing it
819 **/
820 static char* sub_recode(char* data, size_t size, char* codepage)
821 {
822         static iconv_t icdsc = (iconv_t)(-1);
823         char* tocp = "UTF-8";
824         char* outbuf;
825         assert(codepage);
826
827         {
828                 const char* cp_tmp = codepage;
829 #ifdef HAVE_ENCA
830                 char enca_lang[3], enca_fallback[100];
831                 if (sscanf(codepage, "enca:%2s:%99s", enca_lang, enca_fallback) == 2
832                                 || sscanf(codepage, "ENCA:%2s:%99s", enca_lang, enca_fallback) == 2) {
833                         cp_tmp = guess_buffer_cp((unsigned char*)data, size, enca_lang, enca_fallback);
834                 }
835 #endif
836                 if ((icdsc = iconv_open (tocp, cp_tmp)) != (iconv_t)(-1)){
837                         mp_msg(MSGT_ASS,MSGL_V,"LIBSUB: opened iconv descriptor.\n");
838                 } else
839                         mp_msg(MSGT_ASS,MSGL_ERR,MSGTR_LIBASS_ErrorOpeningIconvDescriptor);
840         }
841
842         {
843                 size_t osize = size;
844                 size_t ileft = size;
845                 size_t oleft = size - 1;
846                 char* ip;
847                 char* op;
848                 size_t rc;
849                 int clear = 0;
850                 
851                 outbuf = malloc(osize);
852                 ip = data;
853                 op = outbuf;
854                 
855                 while (1) {
856                         if (ileft)
857                                 rc = iconv(icdsc, &ip, &ileft, &op, &oleft);
858                         else {// clear the conversion state and leave
859                                 clear = 1;
860                                 rc = iconv(icdsc, NULL, NULL, &op, &oleft);
861                         }
862                         if (rc == (size_t)(-1)) {
863                                 if (errno == E2BIG) {
864                                         size_t offset = op - outbuf;
865                                         outbuf = (char*)realloc(outbuf, osize + size);
866                                         op = outbuf + offset;
867                                         osize += size;
868                                         oleft += size;
869                                 } else {
870                                         mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_LIBASS_ErrorRecodingFile);
871                                         return NULL;
872                                 }
873                         } else
874                                 if (clear)
875                                         break;
876                 }
877                 outbuf[osize - oleft - 1] = 0;
878         }
879
880         if (icdsc != (iconv_t)(-1)) {
881                 (void)iconv_close(icdsc);
882                 icdsc = (iconv_t)(-1);
883                 mp_msg(MSGT_ASS,MSGL_V,"LIBSUB: closed iconv descriptor.\n");
884         }
885         
886         return outbuf;
887 }
888 #endif // ICONV
889
890 /**
891  * \brief read file contents into newly allocated buffer
892  * \param fname file name
893  * \param bufsize out: file size
894  * \return pointer to file contents. Caller is responsible for its deallocation.
895  */
896 static char* read_file(char* fname, size_t *bufsize)
897 {
898         int res;
899         long sz;
900         long bytes_read;
901         char* buf;
902
903         FILE* fp = fopen(fname, "rb");
904         if (!fp) {
905                 mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_LIBASS_FopenFailed, fname);
906                 return 0;
907         }
908         res = fseek(fp, 0, SEEK_END);
909         if (res == -1) {
910                 mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_LIBASS_FseekFailed, fname);
911                 fclose(fp);
912                 return 0;
913         }
914         
915         sz = ftell(fp);
916         rewind(fp);
917
918         if (sz > 10*1024*1024) {
919                 mp_msg(MSGT_ASS, MSGL_INFO, MSGTR_LIBASS_RefusingToLoadSubtitlesLargerThan10M, fname);
920                 fclose(fp);
921                 return 0;
922         }
923         
924         mp_msg(MSGT_ASS, MSGL_V, "file size: %ld\n", sz);
925         
926         buf = malloc(sz + 1);
927         assert(buf);
928         bytes_read = 0;
929         do {
930                 res = fread(buf + bytes_read, 1, sz - bytes_read, fp);
931                 if (res <= 0) {
932                         mp_msg(MSGT_ASS, MSGL_INFO, MSGTR_LIBASS_ReadFailed, errno, strerror(errno));
933                         fclose(fp);
934                         free(buf);
935                         return 0;
936                 }
937                 bytes_read += res;
938         } while (sz - bytes_read > 0);
939         buf[sz] = '\0';
940         fclose(fp);
941         
942         if (bufsize)
943                 *bufsize = sz;
944         return buf;
945 }
946
947 /*
948  * \param buf pointer to subtitle text in utf-8
949  */
950 static ass_track_t* parse_memory(ass_library_t* library, char* buf)
951 {
952         ass_track_t* track;
953         int i;
954         
955         track = ass_new_track(library);
956         
957         // process header
958         process_text(track, buf);
959
960         // external SSA/ASS subs does not have ReadOrder field
961         for (i = 0; i < track->n_events; ++i)
962                 track->events[i].ReadOrder = i;
963
964         // there is no explicit end-of-font marker in ssa/ass
965         if (track->parser_priv->fontname)
966                 decode_font(track);
967
968         if (track->track_type == TRACK_TYPE_UNKNOWN) {
969                 ass_free_track(track);
970                 return 0;
971         }
972
973         process_force_style(track);
974
975         return track;
976 }
977
978 /**
979  * \brief Read subtitles from memory.
980  * \param library libass library object
981  * \param buf pointer to subtitles text
982  * \param bufsize size of buffer
983  * \param codepage recode buffer contents from given codepage
984  * \return newly allocated track
985 */ 
986 ass_track_t* ass_read_memory(ass_library_t* library, char* buf, size_t bufsize, char* codepage)
987 {
988         ass_track_t* track;
989         int need_free = 0;
990         
991         if (!buf)
992                 return 0;
993         
994 #ifdef HAVE_ICONV
995         if (codepage)
996                 buf = sub_recode(buf, bufsize, codepage);
997         if (!buf)
998                 return 0;
999         else
1000                 need_free = 1;
1001 #endif
1002         track = parse_memory(library, buf);
1003         if (need_free)
1004                 free(buf);
1005         if (!track)
1006                 return 0;
1007
1008         mp_msg(MSGT_ASS, MSGL_INFO, MSGTR_LIBASS_AddedSubtitleFileMemory, track->n_styles, track->n_events);
1009         return track;
1010 }
1011
1012 char* read_file_recode(char* fname, char* codepage, int* size)
1013 {
1014         char* buf;
1015         size_t bufsize;
1016         
1017         buf = read_file(fname, &bufsize);
1018         if (!buf)
1019                 return 0;
1020 #ifdef HAVE_ICONV
1021         if (codepage) {
1022                  char* tmpbuf = sub_recode(buf, bufsize, codepage);
1023                  free(buf);
1024                  buf = tmpbuf;
1025         }
1026         if (!buf)
1027                 return 0;
1028 #endif
1029         *size = bufsize;
1030         return buf;
1031 }
1032
1033 /**
1034  * \brief Read subtitles from file.
1035  * \param library libass library object
1036  * \param fname file name
1037  * \param codepage recode buffer contents from given codepage
1038  * \return newly allocated track
1039 */ 
1040 ass_track_t* ass_read_file(ass_library_t* library, char* fname, char* codepage)
1041 {
1042         char* buf;
1043         ass_track_t* track;
1044         size_t bufsize;
1045
1046         buf = read_file_recode(fname, codepage, &bufsize);
1047         if (!buf)
1048                 return 0;
1049         track = parse_memory(library, buf);
1050         free(buf);
1051         if (!track)
1052                 return 0;
1053         
1054         track->name = strdup(fname);
1055
1056         mp_msg(MSGT_ASS, MSGL_INFO, MSGTR_LIBASS_AddedSubtitleFileFname, fname, track->n_styles, track->n_events);
1057         
1058 //      dump_events(forced_tid);
1059         return track;
1060 }
1061
1062 /**
1063  * \brief read styles from file into already initialized track
1064  */
1065 int ass_read_styles(ass_track_t* track, char* fname, char* codepage)
1066 {
1067         char* buf;
1068         parser_state_t old_state;
1069         size_t sz;
1070
1071         buf = read_file(fname, &sz);
1072         if (!buf)
1073                 return 1;
1074 #ifdef HAVE_ICONV
1075         if (codepage) {
1076                 char* tmpbuf;
1077                 tmpbuf = sub_recode(buf, sz, codepage);
1078                 free(buf);
1079                 buf = tmpbuf;
1080         }
1081         if (!buf)
1082                 return 0;
1083 #endif
1084
1085         old_state = track->parser_priv->state;
1086         track->parser_priv->state = PST_STYLES;
1087         process_text(track, buf);
1088         track->parser_priv->state = old_state;
1089
1090         return 0;
1091 }
1092
1093 long long ass_step_sub(ass_track_t* track, long long now, int movement) {
1094         int i;
1095
1096         if (movement == 0) return 0;
1097         if (track->n_events == 0) return 0;
1098         
1099         if (movement < 0)
1100                 for (i = 0; (i < track->n_events) && ((long long)(track->events[i].Start + track->events[i].Duration) <= now); ++i) {}
1101         else
1102                 for (i = track->n_events - 1; (i >= 0) && ((long long)(track->events[i].Start) > now); --i) {}
1103         
1104         // -1 and n_events are ok
1105         assert(i >= -1); assert(i <= track->n_events);
1106         i += movement;
1107         if (i < 0) i = 0;
1108         if (i >= track->n_events) i = track->n_events - 1;
1109         return ((long long)track->events[i].Start) - now;
1110 }
1111
1112 ass_track_t* ass_new_track(ass_library_t* library) {
1113         ass_track_t* track = calloc(1, sizeof(ass_track_t));
1114         track->library = library;
1115         track->parser_priv = calloc(1, sizeof(parser_priv_t));
1116         return track;
1117 }
1118