]> granicus.if.org Git - libass/blob - libass/ass.c
Skip BOM at the beginning of text in ASS parser.
[libass] / libass / ass.c
1 // -*- c-basic-offset: 8; indent-tabs-mode: t -*-
2 // vim:ts=8:sw=8:noet:ai:
3 /*
4   Copyright (C) 2006 Evgeniy Stepanov <eugeni.stepanov@gmail.com>
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2 of the License, or
9   (at your option) any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "config.h"
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <assert.h>
27 #include <errno.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <unistd.h>
31 #include <inttypes.h>
32
33 #ifdef USE_ICONV
34 #include <iconv.h>
35 #endif
36
37 #include "ass.h"
38 #include "ass_utils.h"
39 #include "ass_library.h"
40 #include "mputils.h"
41
42 typedef enum {PST_UNKNOWN = 0, PST_INFO, PST_STYLES, PST_EVENTS, PST_FONTS} parser_state_t;
43
44 struct parser_priv_s {
45         parser_state_t state;
46         char* fontname;
47         char* fontdata;
48         int fontdata_size;
49         int fontdata_used;
50 };
51
52 #define ASS_STYLES_ALLOC 20
53 #define ASS_EVENTS_ALLOC 200
54
55 void ass_free_track(ass_track_t* track) {
56         int i;
57         
58         if (track->parser_priv) {
59                 if (track->parser_priv->fontname)
60                         free(track->parser_priv->fontname);
61                 if (track->parser_priv->fontdata)
62                         free(track->parser_priv->fontdata);
63                 free(track->parser_priv);
64         }
65         if (track->style_format)
66                 free(track->style_format);
67         if (track->event_format)
68                 free(track->event_format);
69         if (track->styles) {
70                 for (i = 0; i < track->n_styles; ++i)
71                         ass_free_style(track, i);
72                 free(track->styles);
73         }
74         if (track->events) {
75                 for (i = 0; i < track->n_events; ++i)
76                         ass_free_event(track, i);
77                 free(track->events);
78         }
79 }
80
81 /// \brief Allocate a new style struct
82 /// \param track track
83 /// \return style id
84 int ass_alloc_style(ass_track_t* track) {
85         int sid;
86         
87         assert(track->n_styles <= track->max_styles);
88
89         if (track->n_styles == track->max_styles) {
90                 track->max_styles += ASS_STYLES_ALLOC;
91                 track->styles = (ass_style_t*)realloc(track->styles, sizeof(ass_style_t)*track->max_styles);
92         }
93         
94         sid = track->n_styles++;
95         memset(track->styles + sid, 0, sizeof(ass_style_t));
96         return sid;
97 }
98
99 /// \brief Allocate a new event struct
100 /// \param track track
101 /// \return event id
102 int ass_alloc_event(ass_track_t* track) {
103         int eid;
104         
105         assert(track->n_events <= track->max_events);
106
107         if (track->n_events == track->max_events) {
108                 track->max_events += ASS_EVENTS_ALLOC;
109                 track->events = (ass_event_t*)realloc(track->events, sizeof(ass_event_t)*track->max_events);
110         }
111         
112         eid = track->n_events++;
113         memset(track->events + eid, 0, sizeof(ass_event_t));
114         return eid;
115 }
116
117 void ass_free_event(ass_track_t* track, int eid) {
118         ass_event_t* event = track->events + eid;
119         if (event->Name)
120                 free(event->Name);
121         if (event->Effect)
122                 free(event->Effect);
123         if (event->Text)
124                 free(event->Text);
125         if (event->render_priv)
126                 free(event->render_priv);
127 }
128
129 void ass_free_style(ass_track_t* track, int sid) {
130         ass_style_t* style = track->styles + sid;
131         if (style->Name)
132                 free(style->Name);
133         if (style->FontName)
134                 free(style->FontName);
135 }
136
137 // ==============================================================================================
138
139 static void skip_spaces(char** str) {
140         char* p = *str;
141         while ((*p==' ') || (*p=='\t'))
142                 ++p;
143         *str = p;
144 }
145
146 static void rskip_spaces(char** str, char* limit) {
147         char* p = *str;
148         while ((p >= limit) && ((*p==' ') || (*p=='\t')))
149                 --p;
150         *str = p;
151 }
152
153 /**
154  * \brief find style by name
155  * \param track track
156  * \param name style name
157  * \return index in track->styles
158  * Returnes 0 if no styles found => expects at least 1 style.
159  * Parsing code always adds "Default" style in the end.
160  */
161 static int lookup_style(ass_track_t* track, char* name) {
162         int i;
163         if (*name == '*') ++name; // FIXME: what does '*' really mean ?
164         for (i=0; i<track->n_styles; ++i) {
165                 // FIXME: mb strcasecmp ?
166                 if (strcmp(track->styles[i].Name, name) == 0)
167                         return i;
168         }
169         i = track->default_style;
170         mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_LIBASS_NoStyleNamedXFoundUsingY, track, name, track->styles[i].Name);
171         return i; // use the first style
172 }
173
174 static uint32_t string2color(char* p) {
175         uint32_t tmp;
176         (void)strtocolor(&p, &tmp);
177         return tmp;
178 }
179
180 static long long string2timecode(char* p) {
181         unsigned h, m, s, ms;
182         long long tm;
183         int res = sscanf(p, "%1d:%2d:%2d.%2d", &h, &m, &s, &ms);
184         if (res < 4) {
185                 mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_LIBASS_BadTimestamp);
186                 return 0;
187         }
188         tm = ((h * 60 + m) * 60 + s) * 1000 + ms * 10;
189         return tm;
190 }
191
192 /**
193  * \brief converts numpad-style align to align.
194  */
195 static int numpad2align(int val) {
196         int res, v;
197         v = (val - 1) / 3; // 0, 1 or 2 for vertical alignment
198         if (v != 0) v = 3 - v;
199         res = ((val - 1) % 3) + 1; // horizontal alignment
200         res += v*4;
201         return res;
202 }
203
204 #define NEXT(str,token) \
205         token = next_token(&str); \
206         if (!token) break;
207
208 #define ANYVAL(name,func) \
209         } else if (strcasecmp(tname, #name) == 0) { \
210                 target->name = func(token); \
211                 mp_msg(MSGT_ASS, MSGL_DBG2, "%s = %s\n", #name, token);
212
213 #define STRVAL(name) \
214         } else if (strcasecmp(tname, #name) == 0) { \
215                 if (target->name != NULL) free(target->name); \
216                 target->name = strdup(token); \
217                 mp_msg(MSGT_ASS, MSGL_DBG2, "%s = %s\n", #name, token);
218                 
219 #define COLORVAL(name) ANYVAL(name,string2color)
220 #define INTVAL(name) ANYVAL(name,atoi)
221 #define FPVAL(name) ANYVAL(name,atof)
222 #define TIMEVAL(name) ANYVAL(name,string2timecode)
223 #define STYLEVAL(name) \
224         } else if (strcasecmp(tname, #name) == 0) { \
225                 target->name = lookup_style(track, token); \
226                 mp_msg(MSGT_ASS, MSGL_DBG2, "%s = %s\n", #name, token);
227
228 #define ALIAS(alias,name) \
229         if (strcasecmp(tname, #alias) == 0) {tname = #name;}
230
231 static char* next_token(char** str) {
232         char* p = *str;
233         char* start;
234         skip_spaces(&p);
235         if (*p == '\0') {
236                 *str = p;
237                 return 0;
238         }
239         start = p; // start of the token
240         for (; (*p != '\0') && (*p != ','); ++p) {}
241         if (*p == '\0') {
242                 *str = p; // eos found, str will point to '\0' at exit
243         } else {
244                 *p = '\0';
245                 *str = p + 1; // ',' found, str will point to the next char (beginning of the next token)
246         }
247         --p; // end of current token
248         rskip_spaces(&p, start);
249         if (p < start)
250                 p = start; // empty token
251         else
252                 ++p; // the first space character, or '\0'
253         *p = '\0';
254         return start;
255 }
256 /**
257  * \brief Parse the tail of Dialogue line
258  * \param track track
259  * \param event parsed data goes here
260  * \param str string to parse, zero-terminated
261  * \param n_ignored number of format options to skip at the beginning
262 */ 
263 static int process_event_tail(ass_track_t* track, ass_event_t* event, char* str, int n_ignored)
264 {
265         char* token;
266         char* tname;
267         char* p = str;
268         int i;
269         ass_event_t* target = event;
270
271         char* format = strdup(track->event_format);
272         char* q = format; // format scanning pointer
273
274         if (track->n_styles == 0) {
275                 // add "Default" style to the end
276                 // will be used if track does not contain a default style (or even does not contain styles at all)
277                 int sid = ass_alloc_style(track);
278                 track->styles[sid].Name = strdup("Default");
279                 track->styles[sid].FontName = strdup("Arial");
280         }
281
282         for (i = 0; i < n_ignored; ++i) {
283                 NEXT(q, tname);
284         }
285
286         while (1) {
287                 NEXT(q, tname);
288                 if (strcasecmp(tname, "Text") == 0) {
289                         char* last;
290                         event->Text = strdup(p);
291                         if (*event->Text != 0) {
292                                 last = event->Text + strlen(event->Text) - 1;
293                                 if (last >= event->Text && *last == '\r')
294                                         *last = 0;
295                         }
296                         mp_msg(MSGT_ASS, MSGL_DBG2, "Text = %s\n", event->Text);
297                         event->Duration -= event->Start;
298                         free(format);
299                         return 0; // "Text" is always the last
300                 }
301                 NEXT(p, token);
302
303                 ALIAS(End,Duration) // temporarily store end timecode in event->Duration
304                 if (0) { // cool ;)
305                         INTVAL(Layer)
306                         STYLEVAL(Style)
307                         STRVAL(Name)
308                         STRVAL(Effect)
309                         INTVAL(MarginL)
310                         INTVAL(MarginR)
311                         INTVAL(MarginV)
312                         TIMEVAL(Start)
313                         TIMEVAL(Duration)
314                 }
315         }
316         free(format);
317         return 1;
318 }
319
320 /**
321  * \brief Parse command line style overrides (--ass-force-style option)
322  * \param track track to apply overrides to
323  * The format for overrides is [StyleName.]Field=Value
324  */
325 void process_force_style(ass_track_t* track) {
326         char **fs, *eq, *dt, *style, *tname, *token;
327         ass_style_t* target;
328         int sid;
329         char** list = track->library->style_overrides;
330         
331         if (!list) return;
332         
333         for (fs = list; *fs; ++fs) {
334                 eq = strrchr(*fs, '=');
335                 if (!eq)
336                         continue;
337                 *eq = '\0';
338                 token = eq + 1;
339
340                 if(!strcasecmp(*fs, "PlayResX"))
341                         track->PlayResX = atoi(token);
342                 else if(!strcasecmp(*fs, "PlayResY"))
343                         track->PlayResY = atoi(token);
344                 else if(!strcasecmp(*fs, "Timer"))
345                         track->Timer = atof(token);
346                 else if(!strcasecmp(*fs, "WrapStyle"))
347                         track->WrapStyle = atoi(token);
348
349                 dt = strrchr(*fs, '.');
350                 if (dt) {
351                         *dt = '\0';
352                         style = *fs;
353                         tname = dt + 1;
354                 } else {
355                         style = NULL;
356                         tname = *fs;
357                 }
358                 for (sid = 0; sid < track->n_styles; ++sid) {
359                         if (style == NULL || strcasecmp(track->styles[sid].Name, style) == 0) {
360                                 target = track->styles + sid;
361                                 if (0) {
362                                         STRVAL(FontName)
363                                         COLORVAL(PrimaryColour)
364                                         COLORVAL(SecondaryColour)
365                                         COLORVAL(OutlineColour)
366                                         COLORVAL(BackColour)
367                                         FPVAL(FontSize)
368                                         INTVAL(Bold)
369                                         INTVAL(Italic)
370                                         INTVAL(Underline)
371                                         INTVAL(StrikeOut)
372                                         FPVAL(Spacing)
373                                         INTVAL(Angle)
374                                         INTVAL(BorderStyle)
375                                         INTVAL(Alignment)
376                                         INTVAL(MarginL)
377                                         INTVAL(MarginR)
378                                         INTVAL(MarginV)
379                                         INTVAL(Encoding)
380                                         FPVAL(ScaleX)
381                                         FPVAL(ScaleY)
382                                         FPVAL(Outline)
383                                         FPVAL(Shadow)
384                                 }
385                         }
386                 }
387                 *eq = '=';
388                 if (dt) *dt = '.';
389         }
390 }
391
392 /**
393  * \brief Parse the Style line
394  * \param track track
395  * \param str string to parse, zero-terminated
396  * Allocates a new style struct.
397 */ 
398 static int process_style(ass_track_t* track, char *str)
399 {
400
401         char* token;
402         char* tname;
403         char* p = str;
404         char* format;
405         char* q; // format scanning pointer
406         int sid;
407         ass_style_t* style;
408         ass_style_t* target;
409
410         if (!track->style_format) {
411                 // no style format header
412                 // probably an ancient script version
413                 if (track->track_type == TRACK_TYPE_SSA)
414                         track->style_format = strdup("Name, Fontname, Fontsize, PrimaryColour, SecondaryColour,"
415                                         "TertiaryColour, BackColour, Bold, Italic, BorderStyle, Outline,"
416                                         "Shadow, Alignment, MarginL, MarginR, MarginV, AlphaLevel, Encoding");
417                 else
418                         track->style_format = strdup("Name, Fontname, Fontsize, PrimaryColour, SecondaryColour,"
419                                         "OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut,"
420                                         "ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow,"
421                                         "Alignment, MarginL, MarginR, MarginV, Encoding");
422         }
423
424         q = format = strdup(track->style_format);
425         
426         mp_msg(MSGT_ASS, MSGL_V, "[%p] Style: %s\n", track, str);
427         
428         sid = ass_alloc_style(track);
429
430         style = track->styles + sid;
431         target = style;
432 // fill style with some default values
433         style->ScaleX = 100.;
434         style->ScaleY = 100.;
435         
436         while (1) {
437                 NEXT(q, tname);
438                 NEXT(p, token);
439                 
440 //              ALIAS(TertiaryColour,OutlineColour) // ignore TertiaryColour; it appears only in SSA, and is overridden by BackColour
441                         
442                 if (0) { // cool ;)
443                         STRVAL(Name)
444                                 if ((strcmp(target->Name, "Default")==0) || (strcmp(target->Name, "*Default")==0))
445                                         track->default_style = sid;
446                         STRVAL(FontName)
447                         COLORVAL(PrimaryColour)
448                         COLORVAL(SecondaryColour)
449                         COLORVAL(OutlineColour) // TertiaryColor
450                         COLORVAL(BackColour)
451                                 // SSA uses BackColour for both outline and shadow
452                                 // this will destroy SSA's TertiaryColour, but i'm not going to use it anyway
453                                 if (track->track_type == TRACK_TYPE_SSA)
454                                         target->OutlineColour = target->BackColour;
455                         FPVAL(FontSize)
456                         INTVAL(Bold)
457                         INTVAL(Italic)
458                         INTVAL(Underline)
459                         INTVAL(StrikeOut)
460                         FPVAL(Spacing)
461                         INTVAL(Angle)
462                         INTVAL(BorderStyle)
463                         INTVAL(Alignment)
464                                 if (track->track_type == TRACK_TYPE_ASS)
465                                         target->Alignment = numpad2align(target->Alignment);
466                         INTVAL(MarginL)
467                         INTVAL(MarginR)
468                         INTVAL(MarginV)
469                         INTVAL(Encoding)
470                         FPVAL(ScaleX)
471                         FPVAL(ScaleY)
472                         FPVAL(Outline)
473                         FPVAL(Shadow)
474                 }
475         }
476         style->ScaleX /= 100.;
477         style->ScaleY /= 100.;
478         style->Bold = !!style->Bold;
479         style->Italic = !!style->Italic;
480         style->Underline = !!style->Underline;
481         if (!style->Name)
482                 style->Name = strdup("Default");
483         if (!style->FontName)
484                 style->FontName = strdup("Arial");
485         free(format);
486         return 0;
487         
488 }
489
490 static int process_styles_line(ass_track_t* track, char *str)
491 {
492         if (!strncmp(str,"Format:", 7)) {
493                 char* p = str + 7;
494                 skip_spaces(&p);
495                 track->style_format = strdup(p);
496                 mp_msg(MSGT_ASS, MSGL_DBG2, "Style format: %s\n", track->style_format);
497         } else if (!strncmp(str,"Style:", 6)) {
498                 char* p = str + 6;
499                 skip_spaces(&p);
500                 process_style(track, p);
501         }
502         return 0;
503 }
504
505 static int process_info_line(ass_track_t* track, char *str)
506 {
507         if (!strncmp(str, "PlayResX:", 9)) {
508                 track->PlayResX = atoi(str + 9);
509         } else if (!strncmp(str,"PlayResY:", 9)) {
510                 track->PlayResY = atoi(str + 9);
511         } else if (!strncmp(str,"Timer:", 6)) {
512                 track->Timer = atof(str + 6);
513         } else if (!strncmp(str,"WrapStyle:", 10)) {
514                 track->WrapStyle = atoi(str + 10);
515         }
516         return 0;
517 }
518
519 static int process_events_line(ass_track_t* track, char *str)
520 {
521         if (!strncmp(str, "Format:", 7)) {
522                 char* p = str + 7;
523                 skip_spaces(&p);
524                 track->event_format = strdup(p);
525                 mp_msg(MSGT_ASS, MSGL_DBG2, "Event format: %s\n", track->event_format);
526         } else if (!strncmp(str, "Dialogue:", 9)) {
527                 // This should never be reached for embedded subtitles.
528                 // They have slightly different format and are parsed in ass_process_chunk,
529                 // called directly from demuxer
530                 int eid;
531                 ass_event_t* event;
532                 
533                 str += 9;
534                 skip_spaces(&str);
535
536                 eid = ass_alloc_event(track);
537                 event = track->events + eid;
538
539                 process_event_tail(track, event, str, 0);
540         } else {
541                 mp_msg(MSGT_ASS, MSGL_V, "Not understood: %s  \n", str);
542         }
543         return 0;
544 }
545
546 // Copied from mkvtoolnix
547 static unsigned char* decode_chars(unsigned char c1, unsigned char c2,
548                 unsigned char c3, unsigned char c4, unsigned char* dst, int cnt)
549 {
550         uint32_t value;
551         unsigned char bytes[3];
552         int i;
553
554         value = ((c1 - 33) << 18) + ((c2 - 33) << 12) + ((c3 - 33) << 6) + (c4 - 33);
555         bytes[2] = value & 0xff;
556         bytes[1] = (value & 0xff00) >> 8;
557         bytes[0] = (value & 0xff0000) >> 16;
558
559         for (i = 0; i < cnt; ++i)
560                 *dst++ = bytes[i];
561         return dst;
562 }
563
564 static int decode_font(ass_track_t* track)
565 {
566         unsigned char* p;
567         unsigned char* q;
568         int i;
569         int size; // original size
570         int dsize; // decoded size
571         unsigned char* buf = 0;
572
573         mp_msg(MSGT_ASS, MSGL_V, "font: %d bytes encoded data \n", track->parser_priv->fontdata_used);
574         size = track->parser_priv->fontdata_used;
575         if (size % 4 == 1) {
576                 mp_msg(MSGT_ASS, MSGL_ERR, MSGTR_LIBASS_BadEncodedDataSize);
577                 goto error_decode_font;
578         }
579         buf = malloc(size / 4 * 3 + 2);
580         q = buf;
581         for (i = 0, p = (unsigned char*)track->parser_priv->fontdata; i < size / 4; i++, p+=4) {
582                 q = decode_chars(p[0], p[1], p[2], p[3], q, 3);
583         }
584         if (size % 4 == 2) {
585                 q = decode_chars(p[0], p[1], 0, 0, q, 1);
586         } else if (size % 4 == 3) {
587                 q = decode_chars(p[0], p[1], p[2], 0, q, 2);
588         }
589         dsize = q - buf;
590         assert(dsize <= size / 4 * 3 + 2);
591         
592         if (track->library->extract_fonts) {
593                 ass_add_font(track->library, track->parser_priv->fontname, (char*)buf, dsize);
594                 buf = 0;
595         }
596
597 error_decode_font:
598         if (buf) free(buf);
599         free(track->parser_priv->fontname);
600         free(track->parser_priv->fontdata);
601         track->parser_priv->fontname = 0;
602         track->parser_priv->fontdata = 0;
603         track->parser_priv->fontdata_size = 0;
604         track->parser_priv->fontdata_used = 0;
605         return 0;
606 }
607
608 static int process_fonts_line(ass_track_t* track, char *str)
609 {
610         int len;
611
612         if (!strncmp(str, "fontname:", 9)) {
613                 char* p = str + 9;
614                 skip_spaces(&p);
615                 if (track->parser_priv->fontname) {
616                         decode_font(track);
617                 }
618                 track->parser_priv->fontname = strdup(p);
619                 mp_msg(MSGT_ASS, MSGL_V, "fontname: %s\n", track->parser_priv->fontname);
620                 return 0;
621         }
622         
623         if (!track->parser_priv->fontname) {
624                 mp_msg(MSGT_ASS, MSGL_V, "Not understood: %s  \n", str);
625                 return 0;
626         }
627
628         len = strlen(str);
629         if (len > 80) {
630                 mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_LIBASS_FontLineTooLong, len, str);
631                 return 0;
632         }
633         if (track->parser_priv->fontdata_used + len > track->parser_priv->fontdata_size) {
634                 track->parser_priv->fontdata_size += 100 * 1024;
635                 track->parser_priv->fontdata = realloc(track->parser_priv->fontdata, track->parser_priv->fontdata_size);
636         }
637         memcpy(track->parser_priv->fontdata + track->parser_priv->fontdata_used, str, len);
638         track->parser_priv->fontdata_used += len;
639         
640         return 0;
641 }
642
643 /**
644  * \brief Parse a header line
645  * \param track track
646  * \param str string to parse, zero-terminated
647 */ 
648 static int process_line(ass_track_t* track, char *str)
649 {
650         if (!strncmp(str, "[Script Info]", 13)) {
651                 track->parser_priv->state = PST_INFO;
652         } else if (!strncmp(str, "[V4 Styles]", 11)) {
653                 track->parser_priv->state = PST_STYLES;
654                 track->track_type = TRACK_TYPE_SSA;
655         } else if (!strncmp(str, "[V4+ Styles]", 12)) {
656                 track->parser_priv->state = PST_STYLES;
657                 track->track_type = TRACK_TYPE_ASS;
658         } else if (!strncmp(str, "[Events]", 8)) {
659                 track->parser_priv->state = PST_EVENTS;
660         } else if (!strncmp(str, "[Fonts]", 7)) {
661                 track->parser_priv->state = PST_FONTS;
662         } else {
663                 switch (track->parser_priv->state) {
664                 case PST_INFO:
665                         process_info_line(track, str);
666                         break;
667                 case PST_STYLES:
668                         process_styles_line(track, str);
669                         break;
670                 case PST_EVENTS:
671                         process_events_line(track, str);
672                         break;
673                 case PST_FONTS:
674                         process_fonts_line(track, str);
675                         break;
676                 default:
677                         break;
678                 }
679         }
680
681         // there is no explicit end-of-font marker in ssa/ass
682         if ((track->parser_priv->state != PST_FONTS) && (track->parser_priv->fontname))
683                 decode_font(track);
684
685         return 0;
686 }
687
688 static int process_text(ass_track_t* track, char* str)
689 {
690         char* p = str;
691         while(1) {
692                 char* q;
693                 while (1) {
694                         if ((*p=='\r')||(*p=='\n')) ++p;
695                         else if (p[0]=='\xef' && p[1]=='\xbb' && p[2]=='\xbf') p+=3; // U+FFFE (BOM)
696                         else break;
697                 }
698                 for (q=p; ((*q!='\0')&&(*q!='\r')&&(*q!='\n')); ++q) {};
699                 if (q==p)
700                         break;
701                 if (*q != '\0')
702                         *(q++) = '\0';
703                 process_line(track, p);
704                 if (*q == '\0')
705                         break;
706                 p = q;
707         }
708         return 0;
709 }
710
711 /**
712  * \brief Process CodecPrivate section of subtitle stream
713  * \param track track
714  * \param data string to parse
715  * \param size length of data
716  CodecPrivate section contains [Stream Info] and [V4+ Styles] ([V4 Styles] for SSA) sections
717 */ 
718 void ass_process_codec_private(ass_track_t* track, char *data, int size)
719 {
720         char* str = malloc(size + 1);
721
722         memcpy(str, data, size);
723         str[size] = '\0';
724
725         process_text(track, str);
726         free(str);
727
728         if (!track->event_format) {
729                 // probably an mkv produced by ancient mkvtoolnix
730                 // such files don't have [Events] and Format: headers
731                 track->parser_priv->state = PST_EVENTS;
732                 if (track->track_type == TRACK_TYPE_SSA)
733                         track->event_format = strdup("Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text");
734                 else
735                         track->event_format = strdup("Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text");
736         }
737
738         process_force_style(track);
739 }
740
741 static int check_duplicate_event(ass_track_t* track, int ReadOrder)
742 {
743         int i;
744         for (i = 0; i<track->n_events - 1; ++i) // ignoring last event, it is the one we are comparing with
745                 if (track->events[i].ReadOrder == ReadOrder)
746                         return 1;
747         return 0;
748 }
749
750 /**
751  * \brief Process a chunk of subtitle stream data. In Matroska, this contains exactly 1 event (or a commentary).
752  * \param track track
753  * \param data string to parse
754  * \param size length of data
755  * \param timecode starting time of the event (milliseconds)
756  * \param duration duration of the event (milliseconds)
757 */ 
758 void ass_process_chunk(ass_track_t* track, char *data, int size, long long timecode, long long duration)
759 {
760         char* str;
761         int eid;
762         char* p;
763         char* token;
764         ass_event_t* event;
765
766         if (!track->event_format) {
767                 mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_LIBASS_EventFormatHeaderMissing);
768                 return;
769         }
770         
771         str = malloc(size + 1);
772         memcpy(str, data, size);
773         str[size] = '\0';
774         mp_msg(MSGT_ASS, MSGL_V, "event at %" PRId64 ", +%" PRId64 ": %s  \n", (int64_t)timecode, (int64_t)duration, str);
775
776         eid = ass_alloc_event(track);
777         event = track->events + eid;
778
779         p = str;
780         
781         do { 
782                 NEXT(p, token);
783                 event->ReadOrder = atoi(token);
784                 if (check_duplicate_event(track, event->ReadOrder))
785                         break;
786
787                 NEXT(p, token);
788                 event->Layer = atoi(token);
789
790                 process_event_tail(track, event, p, 3);
791
792                 event->Start = timecode;
793                 event->Duration = duration;
794                 
795                 free(str);
796                 return;
797 //              dump_events(tid);
798         } while (0);
799         // some error
800         ass_free_event(track, eid);
801         track->n_events--;
802         free(str);
803 }
804
805 #ifdef USE_ICONV
806 /** \brief recode buffer to utf-8
807  * constraint: codepage != 0
808  * \param data pointer to text buffer
809  * \param size buffer size
810  * \return a pointer to recoded buffer, caller is responsible for freeing it
811 **/
812 static char* sub_recode(char* data, size_t size, char* codepage)
813 {
814         static iconv_t icdsc = (iconv_t)(-1);
815         char* tocp = "UTF-8";
816         char* outbuf;
817         assert(codepage);
818
819         {
820                 const char* cp_tmp = codepage;
821 #ifdef HAVE_ENCA
822                 char enca_lang[3], enca_fallback[100];
823                 if (sscanf(codepage, "enca:%2s:%99s", enca_lang, enca_fallback) == 2
824                                 || sscanf(codepage, "ENCA:%2s:%99s", enca_lang, enca_fallback) == 2) {
825                         cp_tmp = guess_buffer_cp((unsigned char*)data, size, enca_lang, enca_fallback);
826                 }
827 #endif
828                 if ((icdsc = iconv_open (tocp, cp_tmp)) != (iconv_t)(-1)){
829                         mp_msg(MSGT_ASS,MSGL_V,"LIBSUB: opened iconv descriptor.\n");
830                 } else
831                         mp_msg(MSGT_ASS,MSGL_ERR,MSGTR_LIBASS_ErrorOpeningIconvDescriptor);
832         }
833
834         {
835                 size_t osize = size;
836                 size_t ileft = size;
837                 size_t oleft = size - 1;
838                 char* ip;
839                 char* op;
840                 size_t rc;
841                 
842                 outbuf = malloc(size);
843                 ip = data;
844                 op = outbuf;
845                 
846                 while (ileft) {
847                         rc = iconv(icdsc, &ip, &ileft, &op, &oleft);
848                         if (rc == (size_t)(-1)) {
849                                 if (errno == E2BIG) {
850                                         int offset = op - outbuf;
851                                         outbuf = (char*)realloc(outbuf, osize + size);
852                                         op = outbuf + offset;
853                                         osize += size;
854                                         oleft += size;
855                                 } else {
856                                         mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_LIBASS_ErrorRecodingFile);
857                                         return NULL;
858                                 }
859                         }
860                 }
861                 outbuf[osize - oleft - 1] = 0;
862         }
863
864         if (icdsc != (iconv_t)(-1)) {
865                 (void)iconv_close(icdsc);
866                 icdsc = (iconv_t)(-1);
867                 mp_msg(MSGT_ASS,MSGL_V,"LIBSUB: closed iconv descriptor.\n");
868         }
869         
870         return outbuf;
871 }
872 #endif // ICONV
873
874 /**
875  * \brief read file contents into newly allocated buffer
876  * \param fname file name
877  * \param bufsize out: file size
878  * \return pointer to file contents. Caller is responsible for its deallocation.
879  */
880 static char* read_file(char* fname, size_t *bufsize)
881 {
882         int res;
883         long sz;
884         long bytes_read;
885         char* buf;
886
887         FILE* fp = fopen(fname, "rb");
888         if (!fp) {
889                 mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_LIBASS_FopenFailed, fname);
890                 return 0;
891         }
892         res = fseek(fp, 0, SEEK_END);
893         if (res == -1) {
894                 mp_msg(MSGT_ASS, MSGL_WARN, MSGTR_LIBASS_FseekFailed, fname);
895                 fclose(fp);
896                 return 0;
897         }
898         
899         sz = ftell(fp);
900         rewind(fp);
901
902         if (sz > 10*1024*1024) {
903                 mp_msg(MSGT_ASS, MSGL_INFO, MSGTR_LIBASS_RefusingToLoadSubtitlesLargerThan10M, fname);
904                 fclose(fp);
905                 return 0;
906         }
907         
908         mp_msg(MSGT_ASS, MSGL_V, "file size: %ld\n", sz);
909         
910         buf = malloc(sz + 1);
911         assert(buf);
912         bytes_read = 0;
913         do {
914                 res = fread(buf + bytes_read, 1, sz - bytes_read, fp);
915                 if (res <= 0) {
916                         mp_msg(MSGT_ASS, MSGL_INFO, MSGTR_LIBASS_ReadFailed, errno, strerror(errno));
917                         fclose(fp);
918                         free(buf);
919                         return 0;
920                 }
921                 bytes_read += res;
922         } while (sz - bytes_read > 0);
923         buf[sz] = '\0';
924         fclose(fp);
925         
926         if (bufsize)
927                 *bufsize = sz;
928         return buf;
929 }
930
931 /*
932  * \param buf pointer to subtitle text in utf-8
933  */
934 static ass_track_t* parse_memory(ass_library_t* library, char* buf)
935 {
936         ass_track_t* track;
937         int i;
938         
939         track = ass_new_track(library);
940         
941         // process header
942         process_text(track, buf);
943
944         // external SSA/ASS subs does not have ReadOrder field
945         for (i = 0; i < track->n_events; ++i)
946                 track->events[i].ReadOrder = i;
947
948         // there is no explicit end-of-font marker in ssa/ass
949         if (track->parser_priv->fontname)
950                 decode_font(track);
951
952         if (track->track_type == TRACK_TYPE_UNKNOWN) {
953                 ass_free_track(track);
954                 return 0;
955         }
956
957         process_force_style(track);
958
959         return track;
960 }
961
962 /**
963  * \brief Read subtitles from memory.
964  * \param library libass library object
965  * \param buf pointer to subtitles text
966  * \param bufsize size of buffer
967  * \param codepage recode buffer contents from given codepage
968  * \return newly allocated track
969 */ 
970 ass_track_t* ass_read_memory(ass_library_t* library, char* buf, size_t bufsize, char* codepage)
971 {
972         ass_track_t* track;
973         int need_free = 0;
974         
975         if (!buf)
976                 return 0;
977         
978 #ifdef USE_ICONV
979         if (codepage)
980                 buf = sub_recode(buf, bufsize, codepage);
981         if (!buf)
982                 return 0;
983         else
984                 need_free = 1;
985 #endif
986         track = parse_memory(library, buf);
987         if (need_free)
988                 free(buf);
989         if (!track)
990                 return 0;
991
992         mp_msg(MSGT_ASS, MSGL_INFO, MSGTR_LIBASS_AddedSubtitleFileMemory, track->n_styles, track->n_events);
993         return track;
994 }
995
996 char* read_file_recode(char* fname, char* codepage, int* size)
997 {
998         char* buf;
999         size_t bufsize;
1000         
1001         buf = read_file(fname, &bufsize);
1002         if (!buf)
1003                 return 0;
1004 #ifdef USE_ICONV
1005         if (codepage) {
1006                  char* tmpbuf = sub_recode(buf, bufsize, codepage);
1007                  free(buf);
1008                  buf = tmpbuf;
1009         }
1010         if (!buf)
1011                 return 0;
1012 #endif
1013         *size = bufsize;
1014         return buf;
1015 }
1016
1017 /**
1018  * \brief Read subtitles from file.
1019  * \param library libass library object
1020  * \param fname file name
1021  * \param codepage recode buffer contents from given codepage
1022  * \return newly allocated track
1023 */ 
1024 ass_track_t* ass_read_file(ass_library_t* library, char* fname, char* codepage)
1025 {
1026         char* buf;
1027         ass_track_t* track;
1028         size_t bufsize;
1029
1030         buf = read_file_recode(fname, codepage, &bufsize);
1031         if (!buf)
1032                 return 0;
1033         track = parse_memory(library, buf);
1034         free(buf);
1035         if (!track)
1036                 return 0;
1037         
1038         track->name = strdup(fname);
1039
1040         mp_msg(MSGT_ASS, MSGL_INFO, MSGTR_LIBASS_AddedSubtitleFileFname, fname, track->n_styles, track->n_events);
1041         
1042 //      dump_events(forced_tid);
1043         return track;
1044 }
1045
1046 /**
1047  * \brief read styles from file into already initialized track
1048  */
1049 int ass_read_styles(ass_track_t* track, char* fname, char* codepage)
1050 {
1051         char* buf;
1052         parser_state_t old_state;
1053         size_t sz;
1054
1055         buf = read_file(fname, &sz);
1056         if (!buf)
1057                 return 1;
1058 #ifdef USE_ICONV
1059         if (codepage) {
1060                 char* tmpbuf;
1061                 tmpbuf = sub_recode(buf, sz, codepage);
1062                 free(buf);
1063                 buf = tmpbuf;
1064         }
1065         if (!buf)
1066                 return 0;
1067 #endif
1068
1069         old_state = track->parser_priv->state;
1070         track->parser_priv->state = PST_STYLES;
1071         process_text(track, buf);
1072         track->parser_priv->state = old_state;
1073
1074         return 0;
1075 }
1076
1077 long long ass_step_sub(ass_track_t* track, long long now, int movement) {
1078         int i;
1079
1080         if (movement == 0) return 0;
1081         if (track->n_events == 0) return 0;
1082         
1083         if (movement < 0)
1084                 for (i = 0; (i < track->n_events) && ((long long)(track->events[i].Start + track->events[i].Duration) <= now); ++i) {}
1085         else
1086                 for (i = track->n_events - 1; (i >= 0) && ((long long)(track->events[i].Start) > now); --i) {}
1087         
1088         // -1 and n_events are ok
1089         assert(i >= -1); assert(i <= track->n_events);
1090         i += movement;
1091         if (i < 0) i = 0;
1092         if (i >= track->n_events) i = track->n_events - 1;
1093         return ((long long)track->events[i].Start) - now;
1094 }
1095
1096 ass_track_t* ass_new_track(ass_library_t* library) {
1097         ass_track_t* track = calloc(1, sizeof(ass_track_t));
1098         track->library = library;
1099         track->parser_priv = calloc(1, sizeof(parser_priv_t));
1100         return track;
1101 }
1102