From 820b5a85f9bc56ac3361ff546331f0b318570005 Mon Sep 17 00:00:00 2001 From: "Dr.Smile" Date: Mon, 9 Feb 2015 05:01:55 +0300 Subject: [PATCH] Skip memset() when using internal rasterizer --- libass/ass_bitmap.c | 13 ++++--------- libass/ass_rasterizer.c | 17 +++++++++-------- libass/ass_rasterizer.h | 6 +++--- libass/ass_rasterizer_c.c | 8 ++++---- libass/x86/rasterizer.asm | 15 ++++++++++++--- libass/x86/rasterizer.h | 8 ++++---- 6 files changed, 36 insertions(+), 31 deletions(-) diff --git a/libass/ass_bitmap.c b/libass/ass_bitmap.c index 6f77a17..b14336f 100644 --- a/libass/ass_bitmap.c +++ b/libass/ass_bitmap.c @@ -319,20 +319,15 @@ Bitmap *outline_to_bitmap(ASS_Renderer *render_priv, int tile_w = (w + 2 * bord + mask) & ~mask; int tile_h = (h + 2 * bord + mask) & ~mask; - Bitmap *bm = alloc_bitmap(tile_w, tile_h); + Bitmap *bm = alloc_bitmap_raw(tile_w, tile_h); if (!bm) return NULL; bm->left = x_min - bord; bm->top = y_min - bord; - int offs = bord & ~mask; - if (!rasterizer_fill(rst, - bm->buffer + offs * (bm->stride + 1), - x_min - bord + offs, - y_min - bord + offs, - ((w + bord + mask) & ~mask) - offs, - ((h + bord + mask) & ~mask) - offs, - bm->stride)) { + if (!rasterizer_fill(rst, bm->buffer, + x_min - bord, y_min - bord, + bm->stride, tile_h, bm->stride)) { ass_msg(render_priv->library, MSGL_WARN, "Failed to rasterize glyph!\n"); ass_free_bitmap(bm); return NULL; diff --git a/libass/ass_rasterizer.c b/libass/ass_rasterizer.c index ee19ad4..bf217a7 100644 --- a/libass/ass_rasterizer.c +++ b/libass/ass_rasterizer.c @@ -592,7 +592,8 @@ static int polyline_split_vert(const struct segment *src, size_t n_src, static inline void rasterizer_fill_solid(ASS_Rasterizer *rst, - uint8_t *buf, int width, int height, ptrdiff_t stride) + uint8_t *buf, int width, int height, ptrdiff_t stride, + int set) { assert(!(width & ((1 << rst->tile_order) - 1))); assert(!(height & ((1 << rst->tile_order) - 1))); @@ -604,7 +605,7 @@ static inline void rasterizer_fill_solid(ASS_Rasterizer *rst, height >>= rst->tile_order; for (j = 0; j < height; ++j) { for (i = 0; i < width; ++i) - rst->fill_solid(buf + i * step, stride); + rst->fill_solid(buf + i * step, stride, set); buf += tile_stride; } } @@ -637,8 +638,9 @@ static inline void rasterizer_fill_halfplane(ASS_Rasterizer *rst, int64_t abs_c = offs_c < 0 ? -offs_c : offs_c; if (abs_c < size) rst->fill_halfplane(buf + i * step, stride, a, b, cc, scale); - else if (((uint32_t)(offs_c >> 32) ^ scale) & 0x80000000) - rst->fill_solid(buf + i * step, stride); + else + rst->fill_solid(buf + i * step, stride, + ((uint32_t)(offs_c >> 32) ^ scale) & 0x80000000); } buf += tile_stride; } @@ -664,8 +666,7 @@ static int rasterizer_fill_level(ASS_Rasterizer *rst, size_t n = rst->size[index] - offs; struct segment *line = rst->linebuf[index] + offs; if (!n) { - if (winding) - rasterizer_fill_solid(rst, buf, width, height, stride); + rasterizer_fill_solid(rst, buf, width, height, stride, winding); return 1; } if (n == 1) { @@ -682,8 +683,8 @@ static int rasterizer_fill_level(ASS_Rasterizer *rst, rasterizer_fill_halfplane(rst, buf, width, height, stride, line->a, line->b, line->c, flag & 2 ? -line->scale : line->scale); - else if (flag & 2) - rasterizer_fill_solid(rst, buf, width, height, stride); + else + rasterizer_fill_solid(rst, buf, width, height, stride, flag & 2); rst->size[index] = offs; return 1; } diff --git a/libass/ass_rasterizer.h b/libass/ass_rasterizer.h index 6630317..d20feb3 100644 --- a/libass/ass_rasterizer.h +++ b/libass/ass_rasterizer.h @@ -43,15 +43,15 @@ struct segment { }; -typedef void (*FillSolidTileFunc)(uint8_t *buf, ptrdiff_t stride); +typedef void (*FillSolidTileFunc)(uint8_t *buf, ptrdiff_t stride, int set); typedef void (*FillHalfplaneTileFunc)(uint8_t *buf, ptrdiff_t stride, int32_t a, int32_t b, int64_t c, int32_t scale); typedef void (*FillGenericTileFunc)(uint8_t *buf, ptrdiff_t stride, const struct segment *line, size_t n_lines, int winding); -void ass_fill_solid_tile16_c(uint8_t *buf, ptrdiff_t stride); -void ass_fill_solid_tile32_c(uint8_t *buf, ptrdiff_t stride); +void ass_fill_solid_tile16_c(uint8_t *buf, ptrdiff_t stride, int set); +void ass_fill_solid_tile32_c(uint8_t *buf, ptrdiff_t stride, int set); void ass_fill_halfplane_tile16_c(uint8_t *buf, ptrdiff_t stride, int32_t a, int32_t b, int64_t c, int32_t scale); void ass_fill_halfplane_tile32_c(uint8_t *buf, ptrdiff_t stride, diff --git a/libass/ass_rasterizer_c.c b/libass/ass_rasterizer_c.c index f15f91b..38d4050 100644 --- a/libass/ass_rasterizer_c.c +++ b/libass/ass_rasterizer_c.c @@ -22,10 +22,10 @@ -void ass_fill_solid_tile16_c(uint8_t *buf, ptrdiff_t stride) +void ass_fill_solid_tile16_c(uint8_t *buf, ptrdiff_t stride, int set) { int i, j; - int8_t value = 255; + int8_t value = set ? 255 : 0; for (j = 0; j < 16; ++j) { for (i = 0; i < 16; ++i) buf[i] = value; @@ -33,10 +33,10 @@ void ass_fill_solid_tile16_c(uint8_t *buf, ptrdiff_t stride) } } -void ass_fill_solid_tile32_c(uint8_t *buf, ptrdiff_t stride) +void ass_fill_solid_tile32_c(uint8_t *buf, ptrdiff_t stride, int set) { int i, j; - int8_t value = 255; + int8_t value = set ? 255 : 0; for (j = 0; j < 32; ++j) { for (i = 0; i < 32; ++i) buf[i] = value; diff --git a/libass/x86/rasterizer.asm b/libass/x86/rasterizer.asm index d4941c6..bbb2921 100644 --- a/libass/x86/rasterizer.asm +++ b/libass/x86/rasterizer.asm @@ -116,12 +116,21 @@ SECTION .text ;------------------------------------------------------------------------------ ; FILL_SOLID_TILE tile_order, suffix -; void fill_solid_tile%2(uint8_t *buf, ptrdiff_t stride); +; void fill_solid_tile%2(uint8_t *buf, ptrdiff_t stride, int set); ;------------------------------------------------------------------------------ %macro FILL_SOLID_TILE 2 -cglobal fill_solid_tile%2, 2,2,1 - pcmpeqd m0, m0 +cglobal fill_solid_tile%2, 3,4,1 + mov r3d, -1 + test r2d, r2d + cmovnz r2d, r3d + movd xm0, r2d +%if mmsize == 32 + vpbroadcastd m0, xm0 +%else + pshufd m0, m0, q0000 +%endif + %rep (1 << %1) - 1 FILL_LINE r0, 0, 1 << %1 add r0, r1 diff --git a/libass/x86/rasterizer.h b/libass/x86/rasterizer.h index 11ea3d4..47d72d4 100644 --- a/libass/x86/rasterizer.h +++ b/libass/x86/rasterizer.h @@ -25,8 +25,8 @@ struct segment; -void ass_fill_solid_tile16_sse2(uint8_t *buf, ptrdiff_t stride); -void ass_fill_solid_tile32_sse2(uint8_t *buf, ptrdiff_t stride); +void ass_fill_solid_tile16_sse2(uint8_t *buf, ptrdiff_t stride, int set); +void ass_fill_solid_tile32_sse2(uint8_t *buf, ptrdiff_t stride, int set); void ass_fill_halfplane_tile16_sse2(uint8_t *buf, ptrdiff_t stride, int32_t a, int32_t b, int64_t c, int32_t scale); void ass_fill_halfplane_tile32_sse2(uint8_t *buf, ptrdiff_t stride, @@ -38,8 +38,8 @@ void ass_fill_generic_tile32_sse2(uint8_t *buf, ptrdiff_t stride, const struct segment *line, size_t n_lines, int winding); -void ass_fill_solid_tile16_avx2(uint8_t *buf, ptrdiff_t stride); -void ass_fill_solid_tile32_avx2(uint8_t *buf, ptrdiff_t stride); +void ass_fill_solid_tile16_avx2(uint8_t *buf, ptrdiff_t stride, int set); +void ass_fill_solid_tile32_avx2(uint8_t *buf, ptrdiff_t stride, int set); void ass_fill_halfplane_tile16_avx2(uint8_t *buf, ptrdiff_t stride, int32_t a, int32_t b, int64_t c, int32_t scale); void ass_fill_halfplane_tile32_avx2(uint8_t *buf, ptrdiff_t stride, -- 2.40.0