From 793edc38e41700e9b4cda4b7f9c79aba95b8d989 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 5 Jul 2017 23:48:28 +0200 Subject: [PATCH] Decrease memory consumption for whole image single tile decoding. We can use the same buffer for the tile decoding and the final image, and save the intermediate buffer to transfer between those. Effect on the decoding of MAPA (9944 x 13498 x 3 components of size byte) Peak memory from 4.5 GB to 2.7 GB Now: n5: 2699708767 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E77E07: opj_aligned_malloc (opj_malloc.c:61) <-- final image n1: 1610689344 0x4E7195B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E722D2: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BCF1: opj_j2k_read_tile_header (j2k.c:8597) n1: 1610689344 0x4E4C742: opj_j2k_decode_tiles (j2k.c:10324) n1: 1610689344 0x4E4E20E: opj_j2k_decode (j2k.c:7826) n1: 1610689344 0x4E52E42: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40369E: main (opj_decompress.c:1459) n1: 815554560 0x4E72231: opj_tcd_init_decode_tile (tcd.c:1217) <-- working memory for code blocks: 9944*13498/64/64*8192*3 n1: 815554560 0x4E4BCF1: opj_j2k_read_tile_header (j2k.c:8597) n1: 815554560 0x4E4C742: opj_j2k_decode_tiles (j2k.c:10324) n1: 815554560 0x4E4E20E: opj_j2k_decode (j2k.c:7826) n1: 815554560 0x4E52E42: opj_jp2_decode (jp2.c:1564) n0: 815554560 0x40369E: main (opj_decompress.c:1459) n1: 219758391 0x4E4C0BF: opj_j2k_read_tile_header (j2k.c:4661) <-- ingestion of code stream n1: 219758391 0x4E4C742: opj_j2k_decode_tiles (j2k.c:10324) n1: 219758391 0x4E4E20E: opj_j2k_decode (j2k.c:7826) n1: 219758391 0x4E52E42: opj_jp2_decode (jp2.c:1564) n0: 219758391 0x40369E: main (opj_decompress.c:1459) n1: 39822000 0x4E7224F: opj_tcd_init_decode_tile (tcd.c:1224) <-- OPJ_J2K_DEFAULT_NB_SEGS*sizeof(opj_tcd_seg_t) per codeblock n1: 39822000 0x4E4BCF1: opj_j2k_read_tile_header (j2k.c:8597) n1: 39822000 0x4E4C742: opj_j2k_decode_tiles (j2k.c:10324) n1: 39822000 0x4E4E20E: opj_j2k_decode (j2k.c:7826) n1: 39822000 0x4E52E42: opj_jp2_decode (jp2.c:1564) n0: 39822000 0x40369E: main (opj_decompress.c:1459) n0: 13884472 in 49 places, all below massif's threshold (1.00%) Before: n5: 4493329848 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n2: 1610709160 0x4E77C87: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E717DB: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E72152: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BCF1: opj_j2k_read_tile_header (j2k.c:8597) n1: 1610689344 0x4E4C64A: opj_j2k_decode_tiles (j2k.c:10318) n1: 1610689344 0x4E4E08E: opj_j2k_decode (j2k.c:7826) n1: 1610689344 0x4E52CC2: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40369E: main (opj_decompress.c:1459) n0: 19816 in 2 places, all below massif's threshold (1.00%) n1: 1610689344 0x4E43F36: opj_j2k_update_image_data.isra.7 (j2k.c:8743) n1: 1610689344 0x4E4C5C1: opj_j2k_decode_tiles (j2k.c:10358) n1: 1610689344 0x4E4E08E: opj_j2k_decode (j2k.c:7826) n1: 1610689344 0x4E52CC2: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40369E: main (opj_decompress.c:1459) n1: 815554560 0x4E720B1: opj_tcd_init_decode_tile (tcd.c:1217) n1: 815554560 0x4E4BCF1: opj_j2k_read_tile_header (j2k.c:8597) n1: 815554560 0x4E4C64A: opj_j2k_decode_tiles (j2k.c:10318) n1: 815554560 0x4E4E08E: opj_j2k_decode (j2k.c:7826) n1: 815554560 0x4E52CC2: opj_jp2_decode (jp2.c:1564) n0: 815554560 0x40369E: main (opj_decompress.c:1459) n1: 402672336 0x4E4C545: opj_j2k_decode_tiles (j2k.c:10336) n1: 402672336 0x4E4E08E: opj_j2k_decode (j2k.c:7826) n1: 402672336 0x4E52CC2: opj_jp2_decode (jp2.c:1564) n0: 402672336 0x40369E: main (opj_decompress.c:1459) n0: 53704448 in 58 places, all below massif's threshold (1.00%) --- src/lib/openjp2/j2k.c | 62 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index 76efb018..b665924d 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -8716,15 +8716,20 @@ OPJ_BOOL opj_j2k_decode_tile(opj_j2k_t * p_j2k, return OPJ_FALSE; } - if (! opj_tcd_update_tile_data(p_j2k->m_tcd, p_data, p_data_size)) { - return OPJ_FALSE; - } + /* p_data can be set to NULL when the call will take care of using */ + /* itself the TCD data. This is typically the case for whole single */ + /* tile decoding optimization. */ + if (p_data != NULL) { + if (! opj_tcd_update_tile_data(p_j2k->m_tcd, p_data, p_data_size)) { + return OPJ_FALSE; + } - /* To avoid to destroy the tcp which can be useful when we try to decode a tile decoded before (cf j2k_random_tile_access) - * we destroy just the data which will be re-read in read_tile_header*/ - /*opj_j2k_tcp_destroy(l_tcp); - p_j2k->m_tcd->tcp = 0;*/ - opj_j2k_tcp_data_destroy(l_tcp); + /* To avoid to destroy the tcp which can be useful when we try to decode a tile decoded before (cf j2k_random_tile_access) + * we destroy just the data which will be re-read in read_tile_header*/ + /*opj_j2k_tcp_destroy(l_tcp); + p_j2k->m_tcd->tcp = 0;*/ + opj_j2k_tcp_data_destroy(l_tcp); + } p_j2k->m_specific_param.m_decoder.m_can_decode = 0; p_j2k->m_specific_param.m_decoder.m_state &= (~(OPJ_UINT32)J2K_STATE_DATA); @@ -10381,6 +10386,47 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, OPJ_BYTE * l_current_data; OPJ_UINT32 nr_tiles = 0; + /* Particular case for whole single tile decoding */ + /* We can avoid allocating intermediate tile buffers */ + if (p_j2k->m_cp.tw == 1 && p_j2k->m_cp.th == 1 && + p_j2k->m_cp.tx0 == 0 && p_j2k->m_cp.ty0 == 0 && + p_j2k->m_output_image->x0 == 0 && + p_j2k->m_output_image->y0 == 0 && + p_j2k->m_output_image->x1 == p_j2k->m_cp.tdx && + p_j2k->m_output_image->y1 == p_j2k->m_cp.tdy && + p_j2k->m_output_image->comps[0].factor == 0) { + OPJ_UINT32 i; + if (! opj_j2k_read_tile_header(p_j2k, + &l_current_tile_no, + &l_data_size, + &l_tile_x0, &l_tile_y0, + &l_tile_x1, &l_tile_y1, + &l_nb_comps, + &l_go_on, + p_stream, + p_manager)) { + return OPJ_FALSE; + } + + if (! opj_j2k_decode_tile(p_j2k, l_current_tile_no, NULL, 0, + p_stream, p_manager)) { + opj_event_msg(p_manager, EVT_ERROR, "Failed to decode tile 1/1\n"); + return OPJ_FALSE; + } + + /* Transfer TCD data to output image data */ + for (i = 0; i < p_j2k->m_output_image->numcomps; i++) { + opj_free(p_j2k->m_output_image->comps[i].data); + p_j2k->m_output_image->comps[i].data = + p_j2k->m_tcd->tcd_image->tiles->comps[i].data; + p_j2k->m_output_image->comps[i].resno_decoded = + p_j2k->m_tcd->image->comps[i].resno_decoded; + p_j2k->m_tcd->tcd_image->tiles->comps[i].data = NULL; + } + + return OPJ_TRUE; + } + l_current_data = (OPJ_BYTE*)opj_malloc(1000); if (! l_current_data) { opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to decode tiles\n"); -- 2.40.0