]> granicus.if.org Git - libvpx/blob - vp8/decoder/threading.c
safety check to avoid divide by 0s
[libvpx] / vp8 / decoder / threading.c
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11
12 #ifndef WIN32
13 # include <unistd.h>
14 #endif
15 #ifdef __APPLE__
16 #include <mach/mach_init.h>
17 #endif
18 #include "onyxd_int.h"
19 #include "vpx_mem/vpx_mem.h"
20 #include "threading.h"
21
22 #include "loopfilter.h"
23 #include "extend.h"
24 #include "vpx_ports/vpx_timer.h"
25 #include "detokenize.h"
26 #include "reconinter.h"
27 #include "reconintra_mt.h"
28
29 extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
30 extern void clamp_mvs(MACROBLOCKD *xd);
31 extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
32
33 #if CONFIG_RUNTIME_CPU_DETECT
34 #define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
35 #else
36 #define RTCD_VTABLE(x) NULL
37 #endif
38
39 void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
40 {
41 #if CONFIG_MULTITHREAD
42     VP8_COMMON *const pc = & pbi->common;
43     int i, j;
44
45     for (i = 0; i < count; i++)
46     {
47         MACROBLOCKD *mbd = &mbrd[i].mbd;
48 #if CONFIG_RUNTIME_CPU_DETECT
49         mbd->rtcd = xd->rtcd;
50 #endif
51         mbd->subpixel_predict        = xd->subpixel_predict;
52         mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
53         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
54         mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
55
56         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
57         mbd->mode_info_stride  = pc->mode_info_stride;
58
59         mbd->frame_type = pc->frame_type;
60         mbd->frames_since_golden      = pc->frames_since_golden;
61         mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
62
63         mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
64         mbd->dst = pc->yv12_fb[pc->new_fb_idx];
65
66         vp8_setup_block_dptrs(mbd);
67         vp8_build_block_doffsets(mbd);
68         mbd->segmentation_enabled    = xd->segmentation_enabled;
69         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
70         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
71
72         //signed char ref_lf_deltas[MAX_REF_LF_DELTAS];
73         vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
74         //signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];
75         vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
76         //unsigned char mode_ref_lf_delta_enabled;
77         //unsigned char mode_ref_lf_delta_update;
78         mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
79         mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
80
81         mbd->current_bc = &pbi->bc2;
82
83         for (j = 0; j < 25; j++)
84         {
85             mbd->block[j].dequant = xd->block[j].dequant;
86         }
87     }
88
89     for (i=0; i< pc->mb_rows; i++)
90         pbi->mt_current_mb_col[i]=-1;
91 #else
92     (void) pbi;
93     (void) xd;
94     (void) mbrd;
95     (void) count;
96 #endif
97 }
98
99
100 void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
101 {
102 #if CONFIG_MULTITHREAD
103     int eobtotal = 0;
104     int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
105     VP8_COMMON *pc = &pbi->common;
106
107     if (xd->mode_info_context->mbmi.mb_skip_coeff)
108     {
109         vp8_reset_mb_tokens_context(xd);
110     }
111     else
112     {
113         eobtotal = vp8_decode_mb_tokens(pbi, xd);
114     }
115
116     // Perform temporary clamping of the MV to be used for prediction
117     if (do_clamp)
118     {
119         clamp_mvs(xd);
120     }
121
122     xd->mode_info_context->mbmi.dc_diff = 1;
123
124     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0)
125     {
126         xd->mode_info_context->mbmi.dc_diff = 0;
127
128         //mt_skip_recon_mb(pbi, xd, mb_row, mb_col);
129         if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
130         {
131             vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
132             vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col);
133         }
134         else
135         {
136             vp8_build_inter_predictors_mb_s(xd);
137         }
138         return;
139     }
140
141     if (xd->segmentation_enabled)
142         mb_init_dequantizer(pbi, xd);
143
144     // do prediction
145     if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
146     {
147         vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
148
149         if (xd->mode_info_context->mbmi.mode != B_PRED)
150         {
151             vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col);
152         } else {
153             vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col);
154         }
155     }
156     else
157     {
158         vp8_build_inter_predictors_mb(xd);
159     }
160
161     // dequantization and idct
162     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
163     {
164         BLOCKD *b = &xd->block[24];
165         DEQUANT_INVOKE(&pbi->dequant, block)(b);
166
167         // do 2nd order transform on the dc block
168         if (xd->eobs[24] > 1)
169         {
170             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
171             ((int *)b->qcoeff)[0] = 0;
172             ((int *)b->qcoeff)[1] = 0;
173             ((int *)b->qcoeff)[2] = 0;
174             ((int *)b->qcoeff)[3] = 0;
175             ((int *)b->qcoeff)[4] = 0;
176             ((int *)b->qcoeff)[5] = 0;
177             ((int *)b->qcoeff)[6] = 0;
178             ((int *)b->qcoeff)[7] = 0;
179         }
180         else
181         {
182             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
183             ((int *)b->qcoeff)[0] = 0;
184         }
185
186         DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
187                         (xd->qcoeff, &xd->block[0].dequant[0][0],
188                          xd->predictor, xd->dst.y_buffer,
189                          xd->dst.y_stride, xd->eobs, xd->block[24].diff);
190     }
191     else if ((xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
192     {
193         for (i = 0; i < 16; i++)
194         {
195             BLOCKD *b = &xd->block[i];
196             vp8mt_predict_intra4x4(pbi, xd, b->bmi.mode, b->predictor, mb_row, mb_col, i);
197
198             if (xd->eobs[i] > 1)
199             {
200                 DEQUANT_INVOKE(&pbi->dequant, idct_add)
201                     (b->qcoeff, &b->dequant[0][0],  b->predictor,
202                     *(b->base_dst) + b->dst, 16, b->dst_stride);
203             }
204             else
205             {
206                 IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
207                     (b->qcoeff[0] * b->dequant[0][0], b->predictor,
208                     *(b->base_dst) + b->dst, 16, b->dst_stride);
209                 ((int *)b->qcoeff)[0] = 0;
210             }
211         }
212     }
213     else
214     {
215         DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
216                         (xd->qcoeff, &xd->block[0].dequant[0][0],
217                          xd->predictor, xd->dst.y_buffer,
218                          xd->dst.y_stride, xd->eobs);
219     }
220
221     DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
222                     (xd->qcoeff+16*16, &xd->block[16].dequant[0][0],
223                      xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
224                      xd->dst.uv_stride, xd->eobs+16);
225 #else
226     (void) pbi;
227     (void) xd;
228     (void) mb_row;
229     (void) mb_col;
230 #endif
231 }
232
233
234 THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
235 {
236 #if CONFIG_MULTITHREAD
237     int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
238     VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
239     MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
240     ENTROPY_CONTEXT_PLANES mb_row_left_context;
241
242     while (1)
243     {
244         if (pbi->b_multithreaded_rd == 0)
245             break;
246
247         //if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)
248         if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
249         {
250             if (pbi->b_multithreaded_rd == 0)
251                 break;
252             else
253             {
254                 VP8_COMMON *pc = &pbi->common;
255                 MACROBLOCKD *xd = &mbrd->mbd;
256
257                 int mb_row;
258                 int num_part = 1 << pbi->common.multi_token_partition;
259                 volatile int *last_row_current_mb_col;
260                 int nsync = pbi->sync_range;
261
262                 for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
263                 {
264                     int i;
265                     int recon_yoffset, recon_uvoffset;
266                     int mb_col;
267                     int ref_fb_idx = pc->lst_fb_idx;
268                     int dst_fb_idx = pc->new_fb_idx;
269                     int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
270                     int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
271
272                     int filter_level;
273                     loop_filter_info *lfi = pc->lf_info;
274                     int alt_flt_enabled = xd->segmentation_enabled;
275                     int Segment;
276
277                     pbi->mb_row_di[ithread].mb_row = mb_row;
278                     pbi->mb_row_di[ithread].mbd.current_bc =  &pbi->mbc[mb_row%num_part];
279
280                     last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
281
282                     recon_yoffset = mb_row * recon_y_stride * 16;
283                     recon_uvoffset = mb_row * recon_uv_stride * 8;
284                     // reset above block coeffs
285
286                     xd->above_context = pc->above_context;
287                     xd->left_context = &mb_row_left_context;
288                     vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
289                     xd->up_available = (mb_row != 0);
290
291                     xd->mb_to_top_edge = -((mb_row * 16)) << 3;
292                     xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
293
294                     for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
295                     {
296                         if ((mb_col & (nsync-1)) == 0)
297                         {
298                             while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
299                             {
300                                 x86_pause_hint();
301                                 thread_sleep(0);
302                             }
303                         }
304
305                         if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
306                         {
307                             for (i = 0; i < 16; i++)
308                             {
309                                 BLOCKD *d = &xd->block[i];
310                                 vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
311                             }
312                         }
313
314                         if(pbi->common.filter_level)
315                         {
316                             //update loopfilter info
317                             Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
318                             filter_level = pbi->mt_baseline_filter_level[Segment];
319                             // Distance of Mb to the various image edges.
320                             // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
321                             // Apply any context driven MB level adjustment
322                             vp8_adjust_mb_lf_value(xd, &filter_level);
323                         }
324
325                         // Distance of Mb to the various image edges.
326                         // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
327                         xd->mb_to_left_edge = -((mb_col * 16) << 3);
328                         xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
329
330                         xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
331                         xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
332                         xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
333
334                         xd->left_available = (mb_col != 0);
335
336                         // Select the appropriate reference frame for this MB
337                         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
338                             ref_fb_idx = pc->lst_fb_idx;
339                         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
340                             ref_fb_idx = pc->gld_fb_idx;
341                         else
342                             ref_fb_idx = pc->alt_fb_idx;
343
344                         xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
345                         xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
346                         xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
347
348                         vp8_build_uvmvs(xd, pc->full_pixel);
349                         vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
350
351                         if (pbi->common.filter_level)
352                         {
353                             if( mb_row != pc->mb_rows-1 )
354                             {
355                                 //Save decoded MB last row data for next-row decoding
356                                 vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
357                                 vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
358                                 vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
359                             }
360
361                             //save left_col for next MB decoding
362                             if(mb_col != pc->mb_cols-1)
363                             {
364                                 MODE_INFO *next = xd->mode_info_context +1;
365
366                                 if (xd->frame_type == KEY_FRAME  ||  next->mbmi.ref_frame == INTRA_FRAME)
367                                 {
368                                     for (i = 0; i < 16; i++)
369                                         pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
370                                     for (i = 0; i < 8; i++)
371                                     {
372                                         pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
373                                         pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
374                                     }
375                                 }
376                             }
377
378                           // loopfilter on this macroblock.
379                             if (filter_level)
380                             {
381                                 if (mb_col > 0)
382                                     pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
383
384                                 if (xd->mode_info_context->mbmi.dc_diff > 0)
385                                     pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
386
387                                 // don't apply across umv border
388                                 if (mb_row > 0)
389                                     pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
390
391                                 if (xd->mode_info_context->mbmi.dc_diff > 0)
392                                     pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
393                             }
394                         }
395
396                         recon_yoffset += 16;
397                         recon_uvoffset += 8;
398
399                         ++xd->mode_info_context;  /* next mb */
400
401                         xd->above_context++;
402
403                         //pbi->mb_row_di[ithread].current_mb_col = mb_col;
404                         pbi->mt_current_mb_col[mb_row] = mb_col;
405                     }
406
407                     // adjust to the next row of mbs
408                     if (pbi->common.filter_level)
409                     {
410                         if(mb_row != pc->mb_rows-1)
411                         {
412                             int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
413                             int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
414
415                             for (i = 0; i < 4; i++)
416                             {
417                                 pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
418                                 pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
419                                 pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
420                             }
421                         }
422                     } else
423                         vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
424
425                     ++xd->mode_info_context;      /* skip prediction column */
426
427                     // since we have multithread
428                     xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
429                 }
430             }
431         }
432         //  add this to each frame
433         if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
434         {
435             //SetEvent(pbi->h_event_end_decoding);
436             sem_post(&pbi->h_event_end_decoding);
437         }
438     }
439 #else
440     (void) p_data;
441 #endif
442
443     return 0 ;
444 }
445
446
447 void vp8_decoder_create_threads(VP8D_COMP *pbi)
448 {
449 #if CONFIG_MULTITHREAD
450     int core_count = 0;
451     int ithread;
452     int i;
453
454     pbi->b_multithreaded_rd = 0;
455     pbi->allocated_decoding_thread_count = 0;
456     core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads;
457
458     if (core_count > 1)
459     {
460         pbi->b_multithreaded_rd = 1;
461         pbi->decoding_thread_count = core_count -1;
462
463         CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
464         CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
465         CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
466         vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
467         CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
468
469         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
470         {
471             sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
472
473             pbi->de_thread_data[ithread].ithread  = ithread;
474             pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
475             pbi->de_thread_data[ithread].ptr2     = (void *) &pbi->mb_row_di[ithread];
476
477             pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread]));
478         }
479
480         sem_init(&pbi->h_event_end_decoding, 0, 0);
481
482         pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
483     }
484
485 #else
486     (void) pbi;
487 #endif
488 }
489
490
491 void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
492 {
493 #if CONFIG_MULTITHREAD
494     VP8_COMMON *const pc = & pbi->common;
495     int i;
496
497     if (pbi->b_multithreaded_rd)
498     {
499         if (pbi->mt_current_mb_col)
500         {
501             vpx_free(pbi->mt_current_mb_col);
502             pbi->mt_current_mb_col = NULL ;
503         }
504
505         // Free above_row buffers.
506         if (pbi->mt_yabove_row)
507         {
508             for (i=0; i< mb_rows; i++)
509             {
510                 if (pbi->mt_yabove_row[i])
511                 {
512                     vpx_free(pbi->mt_yabove_row[i]);
513                     pbi->mt_yabove_row[i] = NULL ;
514                 }
515             }
516             vpx_free(pbi->mt_yabove_row);
517             pbi->mt_yabove_row = NULL ;
518         }
519
520         if (pbi->mt_uabove_row)
521         {
522             for (i=0; i< mb_rows; i++)
523             {
524                 if (pbi->mt_uabove_row[i])
525                 {
526                     vpx_free(pbi->mt_uabove_row[i]);
527                     pbi->mt_uabove_row[i] = NULL ;
528                 }
529             }
530             vpx_free(pbi->mt_uabove_row);
531             pbi->mt_uabove_row = NULL ;
532         }
533
534         if (pbi->mt_vabove_row)
535         {
536             for (i=0; i< mb_rows; i++)
537             {
538                 if (pbi->mt_vabove_row[i])
539                 {
540                     vpx_free(pbi->mt_vabove_row[i]);
541                     pbi->mt_vabove_row[i] = NULL ;
542                 }
543             }
544             vpx_free(pbi->mt_vabove_row);
545             pbi->mt_vabove_row = NULL ;
546         }
547
548         // Free left_col buffers.
549         if (pbi->mt_yleft_col)
550         {
551             for (i=0; i< mb_rows; i++)
552             {
553                 if (pbi->mt_yleft_col[i])
554                 {
555                     vpx_free(pbi->mt_yleft_col[i]);
556                     pbi->mt_yleft_col[i] = NULL ;
557                 }
558             }
559             vpx_free(pbi->mt_yleft_col);
560             pbi->mt_yleft_col = NULL ;
561         }
562
563         if (pbi->mt_uleft_col)
564         {
565             for (i=0; i< mb_rows; i++)
566             {
567                 if (pbi->mt_uleft_col[i])
568                 {
569                     vpx_free(pbi->mt_uleft_col[i]);
570                     pbi->mt_uleft_col[i] = NULL ;
571                 }
572             }
573             vpx_free(pbi->mt_uleft_col);
574             pbi->mt_uleft_col = NULL ;
575         }
576
577         if (pbi->mt_vleft_col)
578         {
579             for (i=0; i< mb_rows; i++)
580             {
581                 if (pbi->mt_vleft_col[i])
582                 {
583                     vpx_free(pbi->mt_vleft_col[i]);
584                     pbi->mt_vleft_col[i] = NULL ;
585                 }
586             }
587             vpx_free(pbi->mt_vleft_col);
588             pbi->mt_vleft_col = NULL ;
589         }
590     }
591 #else
592     (void) pbi;
593 #endif
594 }
595
596
597 int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
598 {
599 #if CONFIG_MULTITHREAD
600     VP8_COMMON *const pc = & pbi->common;
601     int i;
602     int uv_width;
603
604     if (pbi->b_multithreaded_rd)
605     {
606         vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
607
608         // our internal buffers are always multiples of 16
609         if ((width & 0xf) != 0)
610             width += 16 - (width & 0xf);
611
612         if (width < 640) pbi->sync_range = 1;
613         else if (width <= 1280) pbi->sync_range = 8;
614         else if (width <= 2560) pbi->sync_range =16;
615         else pbi->sync_range = 32;
616
617         uv_width = width >>1;
618
619         // Allocate an int for each mb row.
620         CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
621
622         // Allocate memory for above_row buffers.
623         CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
624         for (i=0; i< pc->mb_rows; i++)
625             CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1));
626
627         CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
628         for (i=0; i< pc->mb_rows; i++)
629             CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
630
631         CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
632         for (i=0; i< pc->mb_rows; i++)
633             CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
634
635         // Allocate memory for left_col buffers.
636         CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
637         for (i=0; i< pc->mb_rows; i++)
638             CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
639
640         CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
641         for (i=0; i< pc->mb_rows; i++)
642             CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
643
644         CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
645         for (i=0; i< pc->mb_rows; i++)
646             CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
647     }
648     return 0;
649 #else
650     (void) pbi;
651     (void) width;
652 #endif
653 }
654
655
656 void vp8_decoder_remove_threads(VP8D_COMP *pbi)
657 {
658 #if CONFIG_MULTITHREAD
659
660     //shutdown MB Decoding thread;
661     if (pbi->b_multithreaded_rd)
662     {
663         int i;
664
665         pbi->b_multithreaded_rd = 0;
666
667         // allow all threads to exit
668         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
669         {
670             sem_post(&pbi->h_event_start_decoding[i]);
671             pthread_join(pbi->h_decoding_thread[i], NULL);
672         }
673
674         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
675         {
676             sem_destroy(&pbi->h_event_start_decoding[i]);
677         }
678
679         sem_destroy(&pbi->h_event_end_decoding);
680
681         if (pbi->h_decoding_thread)
682         {
683             vpx_free(pbi->h_decoding_thread);
684             pbi->h_decoding_thread = NULL;
685         }
686
687         if (pbi->h_event_start_decoding)
688         {
689             vpx_free(pbi->h_event_start_decoding);
690             pbi->h_event_start_decoding = NULL;
691         }
692
693         if (pbi->mb_row_di)
694         {
695             vpx_free(pbi->mb_row_di);
696             pbi->mb_row_di = NULL ;
697         }
698
699         if (pbi->de_thread_data)
700         {
701             vpx_free(pbi->de_thread_data);
702             pbi->de_thread_data = NULL;
703         }
704     }
705 #else
706     (void) pbi;
707 #endif
708 }
709
710
711 void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
712 {
713 #if CONFIG_MULTITHREAD
714     VP8_COMMON *cm  = &pbi->common;
715     MACROBLOCKD *mbd = &pbi->mb;
716     //YV12_BUFFER_CONFIG *post = &cm->new_frame;  //frame_to_show;
717     loop_filter_info *lfi = cm->lf_info;
718     int frame_type = cm->frame_type;
719
720     //int mb_row;
721     //int mb_col;
722     //int baseline_filter_level[MAX_MB_SEGMENTS];
723     int filter_level;
724     int alt_flt_enabled = mbd->segmentation_enabled;
725
726     int i;
727     //unsigned char *y_ptr, *u_ptr, *v_ptr;
728
729     // Note the baseline filter values for each segment
730     if (alt_flt_enabled)
731     {
732         for (i = 0; i < MAX_MB_SEGMENTS; i++)
733         {
734             // Abs value
735             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
736                 pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
737             // Delta Value
738             else
739             {
740                 pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
741                 pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
742             }
743         }
744     }
745     else
746     {
747         for (i = 0; i < MAX_MB_SEGMENTS; i++)
748             pbi->mt_baseline_filter_level[i] = default_filt_lvl;
749     }
750
751     // Initialize the loop filter for this frame.
752     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
753         vp8_init_loop_filter(cm);
754     else if (frame_type != cm->last_frame_type)
755         vp8_frame_init_loop_filter(lfi, frame_type);
756 #else
757     (void) pbi;
758     (void) default_filt_lvl;
759 #endif
760 }
761
762
763 void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
764 {
765 #if CONFIG_MULTITHREAD
766     int mb_row;
767     VP8_COMMON *pc = &pbi->common;
768
769     int ibc = 0;
770     int num_part = 1 << pbi->common.multi_token_partition;
771     int i, j;
772     volatile int *last_row_current_mb_col = NULL;
773     int nsync = pbi->sync_range;
774
775     int filter_level;
776     loop_filter_info *lfi = pc->lf_info;
777     int alt_flt_enabled = xd->segmentation_enabled;
778     int Segment;
779
780     if(pbi->common.filter_level)
781     {
782         //Set above_row buffer to 127 for decoding first MB row
783         vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
784         vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
785         vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
786
787         for (i=1; i<pc->mb_rows; i++)
788         {
789             vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
790             vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
791             vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
792         }
793
794         //Set left_col to 129 initially
795         for (i=0; i<pc->mb_rows; i++)
796         {
797             vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
798             vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
799             vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
800         }
801         vp8mt_lpf_init(pbi, pc->filter_level);
802     }
803
804     vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
805
806     for (i = 0; i < pbi->decoding_thread_count; i++)
807         sem_post(&pbi->h_event_start_decoding[i]);
808
809     for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
810     {
811         int i;
812
813         xd->current_bc = &pbi->mbc[mb_row%num_part];
814
815         //vp8_decode_mb_row(pbi, pc, mb_row, xd);
816         {
817             int i;
818             int recon_yoffset, recon_uvoffset;
819             int mb_col;
820             int ref_fb_idx = pc->lst_fb_idx;
821             int dst_fb_idx = pc->new_fb_idx;
822             int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
823             int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
824
825            // volatile int *last_row_current_mb_col = NULL;
826             if (mb_row > 0)
827                 last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
828
829             vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
830             recon_yoffset = mb_row * recon_y_stride * 16;
831             recon_uvoffset = mb_row * recon_uv_stride * 8;
832             // reset above block coeffs
833
834             xd->above_context = pc->above_context;
835             xd->up_available = (mb_row != 0);
836
837             xd->mb_to_top_edge = -((mb_row * 16)) << 3;
838             xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
839
840             for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
841             {
842                 if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){
843                     while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
844                     {
845                         x86_pause_hint();
846                         thread_sleep(0);
847                     }
848                 }
849
850                 if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
851                 {
852                     for (i = 0; i < 16; i++)
853                     {
854                         BLOCKD *d = &xd->block[i];
855                         vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
856                     }
857                 }
858
859                 if(pbi->common.filter_level)
860                 {
861                     //update loopfilter info
862                     Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
863                     filter_level = pbi->mt_baseline_filter_level[Segment];
864                     // Distance of Mb to the various image edges.
865                     // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
866                     // Apply any context driven MB level adjustment
867                     vp8_adjust_mb_lf_value(xd, &filter_level);
868                 }
869
870                 // Distance of Mb to the various image edges.
871                 // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
872                 xd->mb_to_left_edge = -((mb_col * 16) << 3);
873                 xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
874
875                 xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
876                 xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
877                 xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
878
879                 xd->left_available = (mb_col != 0);
880
881                 // Select the appropriate reference frame for this MB
882                 if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
883                     ref_fb_idx = pc->lst_fb_idx;
884                 else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
885                     ref_fb_idx = pc->gld_fb_idx;
886                 else
887                     ref_fb_idx = pc->alt_fb_idx;
888
889                 xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
890                 xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
891                 xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
892
893                 vp8_build_uvmvs(xd, pc->full_pixel);
894                 vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
895
896                 if (pbi->common.filter_level)
897                 {
898                     //Save decoded MB last row data for next-row decoding
899                     if(mb_row != pc->mb_rows-1)
900                     {
901                         vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
902                         vpx_memcpy((pbi->mt_uabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
903                         vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
904                     }
905
906                     //save left_col for next MB decoding
907                     if(mb_col != pc->mb_cols-1)
908                     {
909                         MODE_INFO *next = xd->mode_info_context +1;
910
911                         if (xd->frame_type == KEY_FRAME  ||  next->mbmi.ref_frame == INTRA_FRAME)
912                         {
913                             for (i = 0; i < 16; i++)
914                                 pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
915                             for (i = 0; i < 8; i++)
916                             {
917                                 pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
918                                 pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
919                             }
920                         }
921                     }
922
923                     // loopfilter on this macroblock.
924                     if (filter_level)
925                     {
926                         if (mb_col > 0)
927                             pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
928
929                         if (xd->mode_info_context->mbmi.dc_diff > 0)
930                             pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
931
932                         // don't apply across umv border
933                         if (mb_row > 0)
934                             pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
935
936                         if (xd->mode_info_context->mbmi.dc_diff > 0)
937                             pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
938                     }
939                 }
940
941                 recon_yoffset += 16;
942                 recon_uvoffset += 8;
943
944                 ++xd->mode_info_context;  /* next mb */
945
946                 xd->above_context++;
947
948                 pbi->mt_current_mb_col[mb_row] = mb_col;
949             }
950
951             // adjust to the next row of mbs
952             if (pbi->common.filter_level)
953             {
954                 if(mb_row != pc->mb_rows-1)
955                 {
956                     int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
957                     int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
958
959                     for (i = 0; i < 4; i++)
960                     {
961                         pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
962                         pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
963                         pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
964                     }
965                 }
966             }else
967                 vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
968
969             ++xd->mode_info_context;      /* skip prediction column */
970         }
971         xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
972     }
973
974     sem_wait(&pbi->h_event_end_decoding);   // add back for each frame
975 #else
976     (void) pbi;
977     (void) xd;
978 #endif
979 }