From df075e152dd5435e2090eaa73fec5c4c72bbc3b8 Mon Sep 17 00:00:00 2001
From: Anton Mitrofanov <BugMaster@narod.ru>
Date: Mon, 17 May 2021 15:28:01 +0300
Subject: [PATCH] Fix undefined behavior: left shift of negative value

Compilers are good at optimizing multiplication by shift.
---
 common/cabac.c       |  2 +-
 common/deblock.c     | 12 ++++++------
 common/macroblock.c  |  2 +-
 common/macroblock.h  |  4 ++--
 common/mc.c          |  2 +-
 common/mvpred.c      |  6 +++---
 common/quant.c       |  2 +-
 common/x86/mc-c.c    |  6 +++---
 encoder/encoder.c    |  4 ++--
 encoder/macroblock.c |  2 +-
 encoder/me.c         | 32 ++++++++++++++++----------------
 11 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/common/cabac.c b/common/cabac.c
index 65d30a9a..9c699953 100644
--- a/common/cabac.c
+++ b/common/cabac.c
@@ -143,7 +143,7 @@ void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits, int val )
 {
     uint32_t v = val + (1<<exp_bits);
     int k = 31 - x264_clz( v );
-    uint32_t x = (bypass_lut[k-exp_bits]<<exp_bits) + v;
+    uint32_t x = ((uint32_t)bypass_lut[k-exp_bits]<<exp_bits) + v;
     k = 2*k+1-exp_bits;
     int i = ((k-1)&7)+1;
     do {
diff --git a/common/deblock.c b/common/deblock.c
index e986fda1..5b52d9d9 100644
--- a/common/deblock.c
+++ b/common/deblock.c
@@ -102,7 +102,7 @@ static ALWAYS_INLINE void deblock_edge_luma_c( pixel *pix, intptr_t xstride, int
             tc++;
         }
 
-        delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+        delta = x264_clip3( (((q0 - p0 ) * 4) + (p1 - q1) + 4) >> 3, -tc, tc );
         pix[-1*xstride] = x264_clip_pixel( p0 + delta );    /* p0' */
         pix[ 0*xstride] = x264_clip_pixel( q0 - delta );    /* q0' */
     }
@@ -143,7 +143,7 @@ static ALWAYS_INLINE void deblock_edge_chroma_c( pixel *pix, intptr_t xstride, i
 
     if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
     {
-        int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+        int delta = x264_clip3( (((q0 - p0 ) * 4) + (p1 - q1) + 4) >> 3, -tc, tc );
         pix[-1*xstride] = x264_clip_pixel( p0 + delta );    /* p0' */
         pix[ 0*xstride] = x264_clip_pixel( q0 - delta );    /* q0' */
     }
@@ -315,10 +315,10 @@ static ALWAYS_INLINE void deblock_edge( x264_t *h, pixel *pix, intptr_t i_stride
     if( !M32(bS) || !alpha || !beta )
         return;
 
-    tc[0] = (tc0_table(index_a)[bS[0]] << (BIT_DEPTH-8)) + b_chroma;
-    tc[1] = (tc0_table(index_a)[bS[1]] << (BIT_DEPTH-8)) + b_chroma;
-    tc[2] = (tc0_table(index_a)[bS[2]] << (BIT_DEPTH-8)) + b_chroma;
-    tc[3] = (tc0_table(index_a)[bS[3]] << (BIT_DEPTH-8)) + b_chroma;
+    tc[0] = (tc0_table(index_a)[bS[0]] * (1 << (BIT_DEPTH-8))) + b_chroma;
+    tc[1] = (tc0_table(index_a)[bS[1]] * (1 << (BIT_DEPTH-8))) + b_chroma;
+    tc[2] = (tc0_table(index_a)[bS[2]] * (1 << (BIT_DEPTH-8))) + b_chroma;
+    tc[3] = (tc0_table(index_a)[bS[3]] * (1 << (BIT_DEPTH-8))) + b_chroma;
 
     pf_inter( pix, i_stride, alpha, beta, tc );
 }
diff --git a/common/macroblock.c b/common/macroblock.c
index ba07217d..9bc03e96 100644
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -1249,7 +1249,7 @@ static ALWAYS_INLINE void macroblock_cache_load( x264_t *h, int mb_x, int mb_y,
                 if( h->mb.cache.varref[l][index] >= 0 )\
                 {\
                     h->mb.cache.varref[l][index] >>= 1;\
-                    h->mb.cache.varmv[l][index][1] <<= 1;\
+                    h->mb.cache.varmv[l][index][1] *= 2;\
                     h->mb.cache.mvd[l][index][1] <<= 1;\
                 }
                 MAP_MVS
diff --git a/common/macroblock.h b/common/macroblock.h
index 8e2a8118..42e55181 100644
--- a/common/macroblock.h
+++ b/common/macroblock.h
@@ -395,9 +395,9 @@ static ALWAYS_INLINE uint32_t pack8to32( uint32_t a, uint32_t b, uint32_t c, uin
 static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
 {
 #if WORDS_BIGENDIAN
-   return (b&0xFFFF) + (a<<16);
+   return (b&0xFFFF) + ((uint32_t)a<<16);
 #else
-   return (a&0xFFFF) + (b<<16);
+   return (a&0xFFFF) + ((uint32_t)b<<16);
 #endif
 }
 static ALWAYS_INLINE uint64_t pack32to64( uint32_t a, uint32_t b )
diff --git a/common/mc.c b/common/mc.c
index 32f2793f..a2087af3 100644
--- a/common/mc.c
+++ b/common/mc.c
@@ -116,7 +116,7 @@ static void weight_cache( x264_t *h, x264_weight_t *w )
 static void mc_weight( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
                        const x264_weight_t *weight, int i_width, int i_height )
 {
-    int offset = weight->i_offset << (BIT_DEPTH-8);
+    int offset = weight->i_offset * (1 << (BIT_DEPTH-8));
     int scale = weight->i_scale;
     int denom = weight->i_denom;
     if( denom >= 1 )
diff --git a/common/mvpred.c b/common/mvpred.c
index 0d9e9cb9..b8f913e9 100644
--- a/common/mvpred.c
+++ b/common/mvpred.c
@@ -257,13 +257,13 @@ static int mb_predict_mv_direct16x16_temporal( x264_t *h )
 
         int i_part_8x8 = i_mb_8x8 + x8 + (ypart>>1) * h->mb.i_b8_stride;
         int i_ref1_ref = h->fref[1][0]->ref[0][i_part_8x8];
-        int i_ref = (map_col_to_list0(i_ref1_ref>>preshift) << postshift) + (offset&i_ref1_ref&MB_INTERLACED);
+        int i_ref = (map_col_to_list0(i_ref1_ref>>preshift) * (1 << postshift)) + (offset&i_ref1_ref&MB_INTERLACED);
 
         if( i_ref >= 0 )
         {
             int dist_scale_factor = h->mb.dist_scale_factor[i_ref][0];
             int16_t *mv_col = h->fref[1][0]->mv[0][i_mb_4x4 + 3*x8 + ypart * h->mb.i_b4_stride];
-            int16_t mv_y = (mv_col[1]<<yshift)/2;
+            int16_t mv_y = (mv_col[1] * (1 << yshift)) / 2;
             int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
             int l0y = ( dist_scale_factor * mv_y + 128 ) >> 8;
             if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_y > h->mb.mv_max_spel[1]) )
@@ -533,7 +533,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
         int shift = 1 + MB_INTERLACED - h->mb.field[xy]; \
         int16_t *mvp = h->mb.mvr[i_list][i_ref<<1>>shift][xy]; \
         mvc[i][0] = mvp[0]; \
-        mvc[i][1] = mvp[1]<<1>>shift; \
+        mvc[i][1] = mvp[1]*2>>shift; \
         i++; \
     }
 
diff --git a/common/quant.c b/common/quant.c
index c79f45f2..f5972fc9 100644
--- a/common/quant.c
+++ b/common/quant.c
@@ -101,7 +101,7 @@ static int quant_2x2_dc( dctcoef dct[4], int mf, int bias )
 }
 
 #define DEQUANT_SHL( x ) \
-    dct[x] = ( dct[x] * dequant_mf[i_mf][x] ) << i_qbits
+    dct[x] = ( dct[x] * dequant_mf[i_mf][x] ) * (1 << i_qbits)
 
 #define DEQUANT_SHR( x ) \
     dct[x] = ( dct[x] * dequant_mf[i_mf][x] + f ) >> (-i_qbits)
diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
index ea627bf6..da9f6163 100644
--- a/common/x86/mc-c.c
+++ b/common/x86/mc-c.c
@@ -499,13 +499,13 @@ static void weight_cache_mmx2( x264_t *h, x264_weight_t *w )
         else
             w->weightfn = h->mc.offsetadd;
         for( int i = 0; i < 8; i++ )
-            w->cachea[i] = abs(w->i_offset<<(BIT_DEPTH-8));
+            w->cachea[i] = abs(w->i_offset * (1 << (BIT_DEPTH-8)));
         return;
     }
     w->weightfn = h->mc.weight;
     int den1 = 1<<w->i_denom;
     int den2 = w->i_scale<<1;
-    int den3 = 1+(w->i_offset<<(BIT_DEPTH-8+1));
+    int den3 = 1+(w->i_offset * (1 << (BIT_DEPTH-8+1)));
     for( int i = 0; i < 8; i++ )
     {
         w->cachea[i] = den1;
@@ -537,7 +537,7 @@ static void weight_cache_mmx2( x264_t *h, x264_weight_t *w )
         return;
     }
     w->weightfn = h->mc.weight;
-    den1 = w->i_offset << w->i_denom | (w->i_denom ? 1 << (w->i_denom - 1) : 0);
+    den1 = (w->i_offset * (1<<w->i_denom)) | (w->i_denom ? 1 << (w->i_denom - 1) : 0);
     for( i = 0; i < 8; i++ )
     {
         w->cachea[i] = w->i_scale;
diff --git a/encoder/encoder.c b/encoder/encoder.c
index b946cddf..d8190b30 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -206,8 +206,8 @@ static void slice_header_init( x264_t *h, x264_slice_header_t *sh,
         sh->i_disable_deblocking_filter_idc = param->b_sliced_threads ? 2 : 0;
     else
         sh->i_disable_deblocking_filter_idc = 1;
-    sh->i_alpha_c0_offset = param->i_deblocking_filter_alphac0 << 1;
-    sh->i_beta_offset = param->i_deblocking_filter_beta << 1;
+    sh->i_alpha_c0_offset = param->i_deblocking_filter_alphac0 * 2;
+    sh->i_beta_offset = param->i_deblocking_filter_beta * 2;
 }
 
 static void slice_header_write( bs_t *s, x264_slice_header_t *sh, int i_nal_ref_idc )
diff --git a/encoder/macroblock.c b/encoder/macroblock.c
index 55a0beb3..9d02130c 100644
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -1044,7 +1044,7 @@ static ALWAYS_INLINE int macroblock_probe_skip_internal( x264_t *h, int b_bidir,
             if( M32( mvp ) )
                 h->mc.mc_chroma( h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2], FDEC_STRIDE,
                                  h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
-                                 mvp[0], mvp[1]<<chroma422, 8, chroma422?16:8 );
+                                 mvp[0], mvp[1] * (1<<chroma422), 8, chroma422?16:8 );
             else
                 h->mc.load_deinterleave_chroma_fdec( h->mb.pic.p_fdec[1], h->mb.pic.p_fref[0][0][4],
                                                      h->mb.pic.i_stride[1], chroma422?16:8 );
diff --git a/encoder/me.c b/encoder/me.c
index ecf88a12..3ab708e6 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -58,7 +58,7 @@ static const int8_t square1[9][2] = {{0,0}, {0,-1}, {0,1}, {-1,0}, {1,0}, {-1,-1
 static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel );
 
 #define BITS_MVD( mx, my )\
-    (p_cost_mvx[(mx)<<2] + p_cost_mvy[(my)<<2])
+    (p_cost_mvx[(mx)*4] + p_cost_mvy[(my)*4])
 
 #define COST_MV( mx, my )\
 do\
@@ -132,9 +132,9 @@ do\
         p_fref_w + (m1x) + (m1y)*stride,\
         p_fref_w + (m2x) + (m2y)*stride,\
         stride, costs );\
-    costs[0] += p_cost_mvx[(m0x)<<2]; /* no cost_mvy */\
-    costs[1] += p_cost_mvx[(m1x)<<2];\
-    costs[2] += p_cost_mvx[(m2x)<<2];\
+    costs[0] += p_cost_mvx[(m0x)*4]; /* no cost_mvy */\
+    costs[1] += p_cost_mvx[(m1x)*4];\
+    costs[2] += p_cost_mvx[(m2x)*4];\
     COPY3_IF_LT( bcost, costs[0], bmx, m0x, bmy, m0y );\
     COPY3_IF_LT( bcost, costs[1], bmx, m1x, bmy, m1y );\
     COPY3_IF_LT( bcost, costs[2], bmx, m2x, bmy, m2y );\
@@ -176,7 +176,7 @@ do\
 }
 
 #define FPEL(mv) (((mv)+2)>>2) /* Convert subpel MV to fullpel with rounding... */
-#define SPEL(mv) ((mv)<<2)     /* ... and the reverse. */
+#define SPEL(mv) ((mv)*4)      /* ... and the reverse. */
 #define SPELx2(mv) (SPEL(mv)&0xFFFCFFFC) /* for two packed MVs */
 
 void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, int *p_halfpel_thresh )
@@ -201,7 +201,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
     int mv_x_max = h->mb.mv_limit_fpel[1][0];
     int mv_y_max = h->mb.mv_limit_fpel[1][1];
 /* Special version of pack to allow shortcuts in CHECK_MVRANGE */
-#define pack16to32_mask2(mx,my) ((mx<<16)|(my&0x7FFF))
+#define pack16to32_mask2(mx,my) (((uint32_t)(mx)<<16)|((uint32_t)(my)&0x7FFF))
     uint32_t mv_min = pack16to32_mask2( -mv_x_min, -mv_y_min );
     uint32_t mv_max = pack16to32_mask2( mv_x_max, mv_y_max )|0x8000;
     uint32_t pmv, bpred_mv = 0;
@@ -333,8 +333,8 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
                 COPY1_IF_LT( bcost, (costs[3]<<4)+12 );
                 if( !(bcost&15) )
                     break;
-                bmx -= (bcost<<28)>>30;
-                bmy -= (bcost<<30)>>30;
+                bmx -= (int32_t)((uint32_t)bcost<<28)>>30;
+                bmy -= (int32_t)((uint32_t)bcost<<30)>>30;
                 bcost &= ~15;
             } while( --i && CHECK_MVRANGE(bmx, bmy) );
             bcost >>= 4;
@@ -606,7 +606,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
                     if( dir )
                     {
                         bmx = omx + i*(dir>>4);
-                        bmy = omy + i*((dir<<28)>>28);
+                        bmy = omy + i*((int32_t)((uint32_t)dir<<28)>>28);
                     }
                 }
             } while( ++i <= i_me_range>>2 );
@@ -661,7 +661,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
                 for( int my = min_y; my <= max_y; my++ )
                 {
                     int i;
-                    int ycost = p_cost_mvy[my<<2];
+                    int ycost = p_cost_mvy[my*4];
                     if( bsad <= ycost )
                         continue;
                     bsad -= ycost;
@@ -753,7 +753,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
                 for( int my = min_y; my <= max_y; my++ )
                 {
                     int i;
-                    int ycost = p_cost_mvy[my<<2];
+                    int ycost = p_cost_mvy[my*4];
                     if( bcost <= ycost )
                         continue;
                     bcost -= ycost;
@@ -776,7 +776,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
     uint32_t bmv_spel = SPELx2(bmv);
     if( h->mb.i_subpel_refine < 3 )
     {
-        m->cost_mv = p_cost_mvx[bmx<<2] + p_cost_mvy[bmy<<2];
+        m->cost_mv = p_cost_mvx[bmx*4] + p_cost_mvy[bmy*4];
         m->cost = bcost;
         /* compute the real cost */
         if( bmv == pmv ) m->cost += m->cost_mv;
@@ -915,8 +915,8 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
             COPY1_IF_LT( bcost, (costs[3]<<6)+48 );
             if( !(bcost&63) )
                 break;
-            bmx -= (bcost<<26)>>29;
-            bmy -= (bcost<<29)>>29;
+            bmx -= (int32_t)((uint32_t)bcost<<26)>>29;
+            bmy -= (int32_t)((uint32_t)bcost<<29)>>29;
             bcost &= ~63;
         }
         bcost >>= 6;
@@ -980,8 +980,8 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
         COPY1_IF_LT( bcost, (costs[1]<<4)+3 );
         COPY1_IF_LT( bcost, (costs[2]<<4)+4 );
         COPY1_IF_LT( bcost, (costs[3]<<4)+12 );
-        bmx -= (bcost<<28)>>30;
-        bmy -= (bcost<<30)>>30;
+        bmx -= (int32_t)((uint32_t)bcost<<28)>>30;
+        bmy -= (int32_t)((uint32_t)bcost<<30)>>30;
         bcost >>= 4;
     }
 
-- 
2.40.0