#include "common.h"
+static int align_stride( int x, int align, int disalign )
+{
+ x = ALIGN( x, align );
+ if( !(x&(disalign-1)) )
+ x += align;
+ return x;
+}
+
+static int align_plane_size( int x, int disalign )
+{
+ if( !(x&(disalign-1)) )
+ x += 128;
+ return x;
+}
+
x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
{
x264_frame_t *frame;
int i_mb_count = h->mb.i_mb_count;
int i_stride, i_width, i_lines;
int i_padv = PADV << h->param.b_interlaced;
- int luma_plane_size;
- int chroma_plane_size;
+ int luma_plane_size, chroma_plane_size;
int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
+ int disalign = h->param.cpu&X264_CPU_ALTIVEC ? 1<<9 : 1<<10;
CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
/* allocate frame data (+64 for extra data for me) */
i_width = h->mb.i_mb_width*16;
- i_stride = ALIGN( i_width + 2*PADH, align );
i_lines = h->mb.i_mb_height*16;
+ i_stride = align_stride( i_width + 2*PADH, align, disalign );
frame->i_plane = 2;
for( int i = 0; i < 2; i++ )
{
- frame->i_stride[i] = ALIGN( i_stride, align );
frame->i_width[i] = i_width >> i;
frame->i_lines[i] = i_lines >> i;
+ frame->i_stride[i] = i_stride;
}
+ frame->i_width_lowres = frame->i_width[0]/2;
+ frame->i_lines_lowres = frame->i_lines[0]/2;
+ frame->i_stride_lowres = align_stride( frame->i_width_lowres + 2*PADH, align, disalign<<1 );
+
for( int i = 0; i < h->param.i_bframe + 2; i++ )
for( int j = 0; j < h->param.i_bframe + 2; j++ )
CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
frame->orig = frame;
- luma_plane_size = (frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv));
+ luma_plane_size = align_plane_size( frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv), disalign );
chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + i_padv));
CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
{
if( h->frames.b_have_lowres )
{
- frame->i_width_lowres = frame->i_width[0]/2;
- frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align );
- frame->i_lines_lowres = frame->i_lines[0]/2;
-
- luma_plane_size = frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV);
+ luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
for( int i = 0; i < 4; i++ )
if( h->param.analyse.i_weighted_pred )
{
int i_padv = PADV << h->param.b_interlaced;
- int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
- int i_stride, luma_plane_size = 0;
+ int luma_plane_size = 0;
int numweightbuf;
if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE )
if( !h->param.i_sync_lookahead || h == h->thread[h->param.i_threads] )
{
// Fake analysis only works on lowres
- i_stride = ALIGN( h->mb.i_mb_width*8 + 2*PADH, align );
- luma_plane_size = i_stride * (h->mb.i_mb_height*8+2*i_padv);
+ luma_plane_size = h->fdec->i_stride_lowres * (h->mb.i_mb_height*8+2*i_padv);
// Only need 1 buffer for analysis
numweightbuf = 1;
}
}
else
{
- i_stride = ALIGN( h->mb.i_mb_width*16 + 2*PADH, align );
- luma_plane_size = i_stride * (h->mb.i_mb_height*16+2*i_padv);
+ luma_plane_size = h->fdec->i_stride[0] * (h->mb.i_mb_height*16+2*i_padv);
if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
//SMART can weight one ref and one offset -1
for( int i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
- if( x264_lookahead_init( h, i_slicetype_length ) )
- goto fail;
-
for( int i = 0; i < h->param.i_threads; i++ )
{
int init_nal_count = h->param.i_slice_count + 3;
goto fail;
}
+ if( x264_lookahead_init( h, i_slicetype_length ) )
+ goto fail;
+
for( int i = 0; i < h->param.i_threads; i++ )
if( x264_macroblock_thread_allocate( h->thread[i], 0 ) < 0 )
goto fail;