From b397532f5290e7c0d67ba298bd4a9851bafa62af Mon Sep 17 00:00:00 2001 From: eugeni Date: Tue, 22 Aug 2006 22:11:01 +0000 Subject: [PATCH] SSA/ASS parser reworked, with 2 main results: support for script embedded fonts (fonts, uuencoded directly into script) added; matroska interface functions have got more sensible names. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@19498 b3059339-0415-0410-9bf9-f77b7e298cf2 --- libass/ass.c | 342 ++++++++++++++++++++++++++++++--------------- libass/ass.h | 4 +- libass/ass_types.h | 6 +- 3 files changed, 237 insertions(+), 115 deletions(-) diff --git a/libass/ass.c b/libass/ass.c index e5f9f5b..f0a9bb1 100644 --- a/libass/ass.c +++ b/libass/ass.c @@ -18,6 +18,7 @@ #include extern char *sub_cp; #endif +extern int extract_embedded_fonts; #include "mp_msg.h" #include "ass.h" @@ -26,12 +27,27 @@ extern char *sub_cp; char *get_path(char *); +struct parser_priv_s { + enum {PST_UNKNOWN = 0, PST_INFO, PST_STYLES, PST_EVENTS, PST_FONTS} state; + char* fontname; + char* fontdata; + int fontdata_size; + int fontdata_used; +}; + #define ASS_STYLES_ALLOC 20 #define ASS_EVENTS_ALLOC 200 void ass_free_track(ass_track_t* track) { int i; + if (track->parser_priv) { + if (track->parser_priv->fontname) + free(track->parser_priv->fontname); + if (track->parser_priv->fontdata) + free(track->parser_priv->fontdata); + free(track->parser_priv); + } if (track->style_format) free(track->style_format); if (track->event_format) @@ -379,67 +395,207 @@ static int process_style(ass_track_t* track, char *str) } -/** - * \brief Parse a header line - * \param track track - * \param str string to parse, zero-terminated -*/ -static int process_header_line(ass_track_t* track, char *str) +static int process_styles_line(ass_track_t* track, char *str) { - static int events_section_started = 0; - - mp_msg(MSGT_GLOBAL, MSGL_DBG2, "=== Header: %s\n", str); - if (strncmp(str, "PlayResX:", 9)==0) { - track->PlayResX = atoi(str + 9); - } else if (strncmp(str,"PlayResY:", 9)==0) { - track->PlayResY = atoi(str + 9); - } else if (strncmp(str,"Timer:", 6)==0) { - track->Timer = atof(str + 6); - } else if (strstr(str,"Styles]")) { - events_section_started = 0; - if (strchr(str, '+')) - track->track_type = TRACK_TYPE_ASS; - else - track->track_type = TRACK_TYPE_SSA; - } else if (strncmp(str,"[Events]", 8)==0) { - events_section_started = 1; - } else if (strncmp(str,"Format:", 7)==0) { + if (!strncmp(str,"Format:", 7)) { char* p = str + 7; skip_spaces(&p); - if (events_section_started) { - track->event_format = strdup(p); - mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Event format: %s\n", track->event_format); - } else { - track->style_format = strdup(p); - mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Style format: %s\n", track->style_format); - } - } else if (strncmp(str,"Style:", 6)==0) { + track->style_format = strdup(p); + mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Style format: %s\n", track->style_format); + } else if (!strncmp(str,"Style:", 6)) { char* p = str + 6; skip_spaces(&p); process_style(track, p); - } else if (strncmp(str,"WrapStyle:", 10)==0) { + } + return 0; +} + +static int process_info_line(ass_track_t* track, char *str) +{ + if (!strncmp(str, "PlayResX:", 9)) { + track->PlayResX = atoi(str + 9); + } else if (!strncmp(str,"PlayResY:", 9)) { + track->PlayResY = atoi(str + 9); + } else if (!strncmp(str,"Timer:", 6)) { + track->Timer = atof(str + 6); + } else if (!strncmp(str,"WrapStyle:", 10)) { track->WrapStyle = atoi(str + 10); } return 0; } +static int process_events_line(ass_track_t* track, char *str) +{ + if (!strncmp(str, "Format:", 7)) { + char* p = str + 7; + skip_spaces(&p); + track->event_format = strdup(p); + mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Event format: %s\n", track->event_format); + } else if (!strncmp(str, "Dialogue:", 9)) { + // This should never be reached for embedded subtitles. + // They have slightly different format and are parsed in ass_process_chunk, + // called directly from demuxer + int eid; + ass_event_t* event; + + str += 9; + skip_spaces(&str); + + eid = ass_alloc_event(track); + event = track->events + eid; + + process_event_tail(track, event, str, 0); + } else { + mp_msg(MSGT_GLOBAL, MSGL_V, "Not understood: %s \n", str); + } + return 0; +} + +// Copied from mkvtoolnix +static unsigned char* decode_chars(unsigned char c1, unsigned char c2, + unsigned char c3, unsigned char c4, unsigned char* dst, int cnt) +{ + uint32_t value; + unsigned char bytes[3]; + int i; + + value = ((c1 - 33) << 18) + ((c2 - 33) << 12) + ((c3 - 33) << 6) + (c4 - 33); + bytes[2] = value & 0xff; + bytes[1] = (value & 0xff00) >> 8; + bytes[0] = (value & 0xff0000) >> 16; + + for (i = 0; i < cnt; ++i) + *dst++ = bytes[i]; + return dst; +} + +static int decode_font(ass_track_t* track) +{ + unsigned char* p; + unsigned char* q; + int i; + int size; // original size + int dsize; // decoded size + unsigned char* buf = 0; + + mp_msg(MSGT_GLOBAL, MSGL_V, "font: %d bytes encoded data \n", track->parser_priv->fontdata_used); + size = track->parser_priv->fontdata_used; + if (size % 4 == 1) { + mp_msg(MSGT_GLOBAL, MSGL_ERR, "bad encoded data size\n"); + goto error_decode_font; + } + buf = malloc(size / 4 * 3 + 2); + q = buf; + for (i = 0, p = (unsigned char*)track->parser_priv->fontdata; i < size / 4; i++, p+=4) { + q = decode_chars(p[0], p[1], p[2], p[3], q, 3); + } + if (size % 4 == 2) { + q = decode_chars(p[0], p[1], 0, 0, q, 1); + } else if (size % 4 == 3) { + q = decode_chars(p[0], p[1], p[2], 0, q, 2); + } + dsize = q - buf; + assert(dsize <= size / 4 * 3 + 2); + + if (extract_embedded_fonts) + ass_process_font(track->parser_priv->fontname, (char*)buf, dsize); + +error_decode_font: + if (buf) free(buf); + free(track->parser_priv->fontname); + free(track->parser_priv->fontdata); + track->parser_priv->fontname = 0; + track->parser_priv->fontdata = 0; + track->parser_priv->fontdata_size = 0; + track->parser_priv->fontdata_used = 0; + return 0; +} + +static char* validate_fname(char* name); + +static int process_fonts_line(ass_track_t* track, char *str) +{ + int len; + + if (!strncmp(str, "fontname:", 9)) { + char* p = str + 9; + skip_spaces(&p); + if (track->parser_priv->fontname) { + decode_font(track); + } + track->parser_priv->fontname = validate_fname(p); + mp_msg(MSGT_GLOBAL, MSGL_V, "fontname: %s\n", track->parser_priv->fontname); + return 0; + } + + if (!track->parser_priv->fontname) { + mp_msg(MSGT_GLOBAL, MSGL_V, "Not understood: %s \n", str); + return 0; + } + + len = strlen(str); + if (len > 80) { + mp_msg(MSGT_GLOBAL, MSGL_WARN, "Font line too long: %d, %s\n", len, str); + return 0; + } + if (track->parser_priv->fontdata_used + len > track->parser_priv->fontdata_size) { + track->parser_priv->fontdata_size += 100 * 1024; + track->parser_priv->fontdata = realloc(track->parser_priv->fontdata, track->parser_priv->fontdata_size); + } + memcpy(track->parser_priv->fontdata + track->parser_priv->fontdata_used, str, len); + track->parser_priv->fontdata_used += len; + + return 0; +} + /** - * \brief Process CodecPrivate section of subtitle stream + * \brief Parse a header line * \param track track - * \param data string to parse - * \param size length of data - CodecPrivate section contains [Stream Info] and [V4+ Styles] sections + * \param str string to parse, zero-terminated */ -void ass_process_chunk(ass_track_t* track, char *data, int size) +static int process_line(ass_track_t* track, char *str) { - char* str = malloc(size + 1); - char* p; - int sid; + if (strstr(str, "[Script Info]")) { // FIXME: strstr to skip possible BOM at the beginning of the script + track->parser_priv->state = PST_INFO; + } else if (!strncmp(str, "[V4 Styles]", 11)) { + track->parser_priv->state = PST_STYLES; + track->track_type = TRACK_TYPE_SSA; + } else if (!strncmp(str, "[V4+ Styles]", 12)) { + track->parser_priv->state = PST_STYLES; + track->track_type = TRACK_TYPE_ASS; + } else if (!strncmp(str, "[Events]", 8)) { + track->parser_priv->state = PST_EVENTS; + } else if (!strncmp(str, "[Fonts]", 7)) { + track->parser_priv->state = PST_FONTS; + } else { + switch (track->parser_priv->state) { + case PST_INFO: + process_info_line(track, str); + break; + case PST_STYLES: + process_styles_line(track, str); + break; + case PST_EVENTS: + process_events_line(track, str); + break; + case PST_FONTS: + process_fonts_line(track, str); + break; + default: + break; + } + } - memcpy(str, data, size); - str[size] = '\0'; + // there is no explicit end-of-font marker in ssa/ass + if ((track->parser_priv->state != PST_FONTS) && (track->parser_priv->fontname)) + decode_font(track); - p = str; + return 0; +} + +static int process_text(ass_track_t* track, char* str) +{ + char* p = str; while(1) { char* q; for (;((*p=='\r')||(*p=='\n'));++p) {} @@ -448,11 +604,30 @@ void ass_process_chunk(ass_track_t* track, char *data, int size) break; if (*q != '\0') *(q++) = '\0'; - process_header_line(track, p); + process_line(track, p); if (*q == '\0') break; p = q; } + return 0; +} + +/** + * \brief Process CodecPrivate section of subtitle stream + * \param track track + * \param data string to parse + * \param size length of data + CodecPrivate section contains [Stream Info] and [V4+ Styles] ([V4 Styles] for SSA) sections +*/ +void ass_process_codec_private(ass_track_t* track, char *data, int size) +{ + char* str = malloc(size + 1); + int sid; + + memcpy(str, data, size); + str[size] = '\0'; + + process_text(track, str); free(str); // add "Default" style to the end @@ -464,6 +639,7 @@ void ass_process_chunk(ass_track_t* track, char *data, int size) if (!track->event_format) { // probably an mkv produced by ancient mkvtoolnix // such files don't have [Events] and Format: headers + track->parser_priv->state = PST_EVENTS; if (track->track_type == TRACK_TYPE_SSA) track->event_format = strdup("Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text"); else @@ -488,7 +664,7 @@ static int check_duplicate_event(ass_track_t* track, int ReadOrder) * \param timecode starting time of the event (milliseconds) * \param duration duration of the event (milliseconds) */ -void ass_process_line(ass_track_t* track, char *data, int size, long long timecode, long long duration) +void ass_process_chunk(ass_track_t* track, char *data, int size, long long timecode, long long duration) { char* str; int eid; @@ -535,29 +711,6 @@ void ass_process_line(ass_track_t* track, char *data, int size, long long timeco free(str); } -/** - * \brief Process a line from external file. - * \param track track - * \param str string to parse - * \param size length of data -*/ -static void ass_process_external_line(ass_track_t* track, char *str, int size) -{ - int eid; - ass_event_t* event; - - eid = ass_alloc_event(track); - event = track->events + eid; - - if (strncmp("Dialogue:", str, 9) != 0) - return; - - str += 9; - while (*str == ' ') {++str;} - - process_event_tail(track, event, str, 0); -} - #ifdef USE_ICONV /** \brief recode buffer to utf-8 * constraint: sub_cp != 0 @@ -641,8 +794,6 @@ ass_track_t* ass_read_file(char* fname) long sz; long bytes_read; char* buf; - char* p; - int events_reached; ass_track_t* track; FILE* fp = fopen(fname, "rb"); @@ -698,49 +849,15 @@ ass_track_t* ass_read_file(char* fname) track->name = strdup(fname); // process header - events_reached = 0; - p = buf; - while (p && (*p)) { - while (*p == '\n') {++p;} - if (strncmp(p, "[Events]", 8) == 0) { - events_reached = 1; - } else if ((strncmp(p, "Format:", 7) == 0) && (events_reached)) { - p = strchr(p, '\n'); - if (p == 0) { - mp_msg(MSGT_GLOBAL, MSGL_WARN, "Incomplete subtitles\n"); - free(buf); - return 0; - } - ass_process_chunk(track, buf, p - buf + 1); - ++p; - break; - } - p = strchr(p, '\n'); - } - // process events - while (p && (*p)) { - char* next; - int len; - while (*p == '\n') {++p;} - next = strchr(p, '\n'); - len = 0; - if (next) { - len = next - p; - *next = 0; - } else { - len = strlen(p); - } - ass_process_external_line(track, p, len); - if (next) { - p = next + 1; - continue; - } else - break; - } - + process_text(track, buf); + + // there is no explicit end-of-font marker in ssa/ass + if (track->parser_priv->fontname) + decode_font(track); + free(buf); - if (!events_reached) { + if (track->track_type == TRACK_TYPE_UNKNOWN) { ass_free_track(track); return 0; } @@ -853,6 +970,7 @@ long long ass_step_sub(ass_track_t* track, long long now, int movement) { ass_track_t* ass_new_track(void) { ass_track_t* track = calloc(1, sizeof(ass_track_t)); + track->parser_priv = calloc(1, sizeof(parser_priv_t)); return track; } diff --git a/libass/ass.h b/libass/ass.h index 54fef6a..493ba2f 100644 --- a/libass/ass.h +++ b/libass/ass.h @@ -131,7 +131,7 @@ void ass_free_event(ass_track_t* track, int eid); * \param data string to parse * \param size length of data */ -void ass_process_chunk(ass_track_t* track, char *data, int size); +void ass_process_codec_private(ass_track_t* track, char *data, int size); /** * \brief Process a chunk of subtitle stream data. In matroska, this containes exactly 1 event (or a commentary) @@ -141,7 +141,7 @@ void ass_process_chunk(ass_track_t* track, char *data, int size); * \param timecode starting time of the event (milliseconds) * \param duration duration of the event (milliseconds) */ -void ass_process_line(ass_track_t* track, char *data, int size, long long timecode, long long duration); +void ass_process_chunk(ass_track_t* track, char *data, int size, long long timecode, long long duration); /** * \brief Read subtitles from file. diff --git a/libass/ass_types.h b/libass/ass_types.h index d742803..1743bfe 100644 --- a/libass/ass_types.h +++ b/libass/ass_types.h @@ -53,6 +53,8 @@ typedef struct ass_event_s { char* Text; } ass_event_t; +typedef struct parser_priv_s parser_priv_t; + /// ass track represent either an external script or a matroska subtitle stream (no real difference between them) /// it can be used in rendering after the headers are parsed (i.e. events format line read) typedef struct ass_track_s { @@ -66,7 +68,7 @@ typedef struct ass_track_s { char* style_format; // style format line (everything after "Format: ") char* event_format; // event format line - enum {TRACK_TYPE_ASS, TRACK_TYPE_SSA} track_type; + enum {TRACK_TYPE_UNKNOWN = 0, TRACK_TYPE_ASS, TRACK_TYPE_SSA} track_type; // script header fields int PlayResX; @@ -77,6 +79,8 @@ typedef struct ass_track_s { int default_style; // index of default style char* name; // file name in case of external subs, 0 for streams + + parser_priv_t* parser_priv; } ass_track_t; #endif -- 2.40.0