Add atomics to vp8 synchronization primitives.

author Peter Boström <pbos@google.com>

Fri, 25 Aug 2017 22:48:11 +0000 (15:48 -0700)

committer Peter Boström <pbos@google.com>

Fri, 1 Sep 2017 00:55:57 +0000 (17:55 -0700)
author Peter Boström <pbos@google.com>
Fri, 25 Aug 2017 22:48:11 +0000 (15:48 -0700)
committer Peter Boström <pbos@google.com>
Fri, 1 Sep 2017 00:55:57 +0000 (17:55 -0700)
diff --git a/vp8/common/threading.h b/vp8/common/threading.h

index ece64f3fb438a13f539477b9374dd9eab429d8d6..b082bf109e413da4f2e88c8de970a479c6eae728 100644 (file)
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -191,47 +191,18 @@ static inline int sem_destroy(sem_t *sem) {
  #define x86_pause_hint()
  #endif
  
-#if defined(__has_feature)
-#if __has_feature(thread_sanitizer)
-#define USE_MUTEX_LOCK 1
-#endif
-#endif
-
  #include "vpx_util/vpx_thread.h"
+#include "vpx_util/vpx_atomics.h"
  
-static INLINE int protected_read(pthread_mutex_t *const mutex, const int *p) {
-  (void)mutex;
-#if defined(USE_MUTEX_LOCK)
-  int ret;
-  pthread_mutex_lock(mutex);
-  ret = *p;
-  pthread_mutex_unlock(mutex);
-  return ret;
-#endif
-  return *p;
-}
-
-static INLINE void sync_read(pthread_mutex_t *const mutex, int mb_col,
-                             const int *last_row_current_mb_col,
-                             const int nsync) {
-  while (mb_col > (protected_read(mutex, last_row_current_mb_col) - nsync)) {
+static INLINE void vp8_atomic_spin_wait(
+    int mb_col, const vpx_atomic_int *last_row_current_mb_col,
+    const int nsync) {
+  while (mb_col > (vpx_atomic_load_acquire(last_row_current_mb_col) - nsync)) {
      x86_pause_hint();
      thread_sleep(0);
    }
  }
  
-static INLINE void protected_write(pthread_mutex_t *mutex, int *p, int v) {
-  (void)mutex;
-#if defined(USE_MUTEX_LOCK)
-  pthread_mutex_lock(mutex);
-  *p = v;
-  pthread_mutex_unlock(mutex);
-  return;
-#endif
-  *p = v;
-}
-
-#undef USE_MUTEX_LOCK
  #endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */
  
  #ifdef __cplusplus
diff --git a/vp8/decoder/decodeframe.c b/vp8/decoder/decodeframe.c

index d900b670d2fdb8bf99ca7aebd7b05997bdfce41e..077bd3da268dbe8df57459b0390c56a2d88835b3 100644 (file)
--- a/vp8/decoder/decodeframe.c
+++ b/vp8/decoder/decodeframe.c
@@ -1205,7 +1205,8 @@ int vp8_decode_frame(VP8D_COMP *pbi) {
    pbi->frame_corrupt_residual = 0;
  
  #if CONFIG_MULTITHREAD
-  if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) {
+  if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) &&
+      pc->multi_token_partition != ONE_PARTITION) {
      unsigned int thread;
      vp8mt_decode_mb_rows(pbi, xd);
      vp8_yv12_extend_frame_borders(yv12_fb_new);
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h

index d05368544e55b66d930394f18e15b2c5e90667c2..5ecacdbb9723934e437857e844fe4ea29d7cbc10 100644 (file)
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -68,7 +68,7 @@ typedef struct VP8D_COMP {
  #if CONFIG_MULTITHREAD
    /* variable for threading */
  
-  int b_multithreaded_rd;
+  vpx_atomic_int b_multithreaded_rd;
    int max_threads;
    int current_mb_col_main;
    unsigned int decoding_thread_count;
@@ -76,9 +76,8 @@ typedef struct VP8D_COMP {
  
    int mt_baseline_filter_level[MAX_MB_SEGMENTS];
    int sync_range;
-  int *mt_current_mb_col; /* Each row remembers its already decoded column. */
-  pthread_mutex_t *pmutex;
-  pthread_mutex_t mt_mutex; /* mutex for b_multithreaded_rd */
+  /* Each row remembers its already decoded column. */
+  vpx_atomic_int *mt_current_mb_col;
  
    unsigned char **mt_yabove_row; /* mb_rows x width */
    unsigned char **mt_uabove_row;
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c

index f5bdae493f4326964de303433406b5645beee953..aadc8dc712f89391b47a74e2b01530db5a3cb32e 100644 (file)
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -79,7 +79,8 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd,
      if (pc->full_pixel) mbd->fullpixel_mask = 0xfffffff8;
    }
  
-  for (i = 0; i < pc->mb_rows; ++i) pbi->mt_current_mb_col[i] = -1;
+  for (i = 0; i < pc->mb_rows; ++i)
+    vpx_atomic_store_release(&pbi->mt_current_mb_col[i], -1);
  }
  
  static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
@@ -247,12 +248,13 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
  
  static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
                                int start_mb_row) {
-  const int *last_row_current_mb_col;
-  int *current_mb_col;
+  const vpx_atomic_int *last_row_current_mb_col;
+  vpx_atomic_int *current_mb_col;
    int mb_row;
    VP8_COMMON *pc = &pbi->common;
    const int nsync = pbi->sync_range;
-  const int first_row_no_sync_above = pc->mb_cols + nsync;
+  const vpx_atomic_int first_row_no_sync_above =
+      VPX_ATOMIC_INIT(pc->mb_cols + nsync);
    int num_part = 1 << pbi->common.multi_token_partition;
    int last_mb_row = start_mb_row;
  
@@ -356,13 +358,11 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
  
      for (mb_col = 0; mb_col < pc->mb_cols; ++mb_col) {
        if (((mb_col - 1) % nsync) == 0) {
-        pthread_mutex_t *mutex = &pbi->pmutex[mb_row];
-        protected_write(mutex, current_mb_col, mb_col - 1);
+        vpx_atomic_store_release(current_mb_col, mb_col - 1);
        }
  
        if (mb_row && !(mb_col & (nsync - 1))) {
-        pthread_mutex_t *mutex = &pbi->pmutex[mb_row - 1];
-        sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
+        vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync);
        }
  
        /* Distance of MB to the various image edges.
@@ -548,7 +548,7 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
      }
  
      /* last MB of row is ready just after extension is done */
-    protected_write(&pbi->pmutex[mb_row], current_mb_col, mb_col + nsync);
+    vpx_atomic_store_release(current_mb_col, mb_col + nsync);
  
      ++xd->mode_info_context; /* skip prediction column */
      xd->up_available = 1;
@@ -568,10 +568,10 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) {
    ENTROPY_CONTEXT_PLANES mb_row_left_context;
  
    while (1) {
-    if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) break;
+    if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) break;
  
      if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) {
-      if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) {
+      if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) {
          break;
        } else {
          MACROBLOCKD *xd = &mbrd->mbd;
@@ -589,9 +589,8 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) {
    int core_count = 0;
    unsigned int ithread;
  
-  pbi->b_multithreaded_rd = 0;
+  vpx_atomic_init(&pbi->b_multithreaded_rd, 0);
    pbi->allocated_decoding_thread_count = 0;
-  pthread_mutex_init(&pbi->mt_mutex, NULL);
  
    /* limit decoding threads to the max number of token partitions */
    core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
@@ -602,7 +601,7 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) {
    }
  
    if (core_count > 1) {
-    pbi->b_multithreaded_rd = 1;
+    vpx_atomic_init(&pbi->b_multithreaded_rd, 1);
      pbi->decoding_thread_count = core_count - 1;
  
      CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count);
@@ -648,16 +647,6 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) {
  void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) {
    int i;
  
-  /* De-allocate mutex */
-  if (pbi->pmutex != NULL) {
-    for (i = 0; i < mb_rows; ++i) {
-      pthread_mutex_destroy(&pbi->pmutex[i]);
-    }
-
-    vpx_free(pbi->pmutex);
-    pbi->pmutex = NULL;
-  }
-
    vpx_free(pbi->mt_current_mb_col);
    pbi->mt_current_mb_col = NULL;
  
@@ -723,7 +712,7 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
    int i;
    int uv_width;
  
-  if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) {
+  if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) {
      vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
  
      /* our internal buffers are always multiples of 16 */
@@ -741,17 +730,11 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
  
      uv_width = width >> 1;
  
-    /* Allocate mutex */
-    CHECK_MEM_ERROR(pbi->pmutex,
-                    vpx_malloc(sizeof(*pbi->pmutex) * pc->mb_rows));
-    if (pbi->pmutex) {
-      for (i = 0; i < pc->mb_rows; ++i) {
-        pthread_mutex_init(&pbi->pmutex[i], NULL);
-      }
-    }
-
-    /* Allocate an int for each mb row. */
-    CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows);
+    /* Allocate a vpx_atomic_int for each mb row. */
+    CHECK_MEM_ERROR(pbi->mt_current_mb_col,
+                    vpx_malloc(sizeof(*pbi->mt_current_mb_col) * pc->mb_rows));
+    for (i = 0; i < pc->mb_rows; ++i)
+      vpx_atomic_init(&pbi->mt_current_mb_col[i], 0);
  
      /* Allocate memory for above_row buffers. */
      CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
@@ -792,9 +775,9 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
  
  void vp8_decoder_remove_threads(VP8D_COMP *pbi) {
    /* shutdown MB Decoding thread; */
-  if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) {
+  if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) {
      int i;
-    protected_write(&pbi->mt_mutex, &pbi->b_multithreaded_rd, 0);
+    vpx_atomic_store_release(&pbi->b_multithreaded_rd, 0);
  
      /* allow all threads to exit */
      for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) {
@@ -824,7 +807,6 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi) {
  
      vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows);
    }
-  pthread_mutex_destroy(&pbi->mt_mutex);
  }
  
  void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd) {
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c

index 7086faae98bbcf9927305a56423016ce7c3eedba..d7a17b749625f673478bb3feaf7349ce57d01094 100644 (file)
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -1416,7 +1416,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest,
      vp8_start_encode(&cpi->bc[1], cx_data, cx_data_end);
  
  #if CONFIG_MULTITHREAD
-    if (cpi->b_multi_threaded) {
+    if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {
        pack_mb_row_tokens(cpi, &cpi->bc[1]);
      } else {
        vp8_pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c

index b867f6cb19a95278f370275174e5741f3352c8ac..9bb0df72d521e57740a2325be6e05b27e35bbbf1 100644 (file)
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -341,11 +341,11 @@ static void encode_mb_row(VP8_COMP *cpi, VP8_COMMON *cm, int mb_row,
  
  #if CONFIG_MULTITHREAD
    const int nsync = cpi->mt_sync_range;
-  const int rightmost_col = cm->mb_cols + nsync;
-  const int *last_row_current_mb_col;
-  int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
+  vpx_atomic_int rightmost_col = VPX_ATOMIC_INIT(cm->mb_cols + nsync);
+  const vpx_atomic_int *last_row_current_mb_col;
+  vpx_atomic_int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
  
-  if ((cpi->b_multi_threaded != 0) && (mb_row != 0)) {
+  if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0 && mb_row != 0) {
      last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
    } else {
      last_row_current_mb_col = &rightmost_col;
@@ -415,15 +415,13 @@ static void encode_mb_row(VP8_COMP *cpi, VP8_COMMON *cm, int mb_row,
      vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
  
  #if CONFIG_MULTITHREAD
-    if (cpi->b_multi_threaded != 0) {
+    if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0) {
        if (((mb_col - 1) % nsync) == 0) {
-        pthread_mutex_t *mutex = &cpi->pmutex[mb_row];
-        protected_write(mutex, current_mb_col, mb_col - 1);
+        vpx_atomic_store_release(current_mb_col, mb_col - 1);
        }
  
        if (mb_row && !(mb_col & (nsync - 1))) {
-        pthread_mutex_t *mutex = &cpi->pmutex[mb_row - 1];
-        sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
+        vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync);
        }
      }
  #endif
@@ -563,8 +561,9 @@ static void encode_mb_row(VP8_COMP *cpi, VP8_COMMON *cm, int mb_row,
                      xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
  
  #if CONFIG_MULTITHREAD
-  if (cpi->b_multi_threaded != 0) {
-    protected_write(&cpi->pmutex[mb_row], current_mb_col, rightmost_col);
+  if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0) {
+    vpx_atomic_store_release(current_mb_col,
+                             vpx_atomic_load_acquire(&rightmost_col));
    }
  #endif
  
@@ -749,13 +748,14 @@ void vp8_encode_frame(VP8_COMP *cpi) {
      vpx_usec_timer_start(&emr_timer);
  
  #if CONFIG_MULTITHREAD
-    if (cpi->b_multi_threaded) {
+    if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {
        int i;
  
        vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei,
                                  cpi->encoding_thread_count);
  
-      for (i = 0; i < cm->mb_rows; ++i) cpi->mt_current_mb_col[i] = -1;
+      for (i = 0; i < cm->mb_rows; ++i)
+        vpx_atomic_store_release(&cpi->mt_current_mb_col[i], -1);
  
        for (i = 0; i < cpi->encoding_thread_count; ++i) {
          sem_post(&cpi->h_event_start_encoding[i]);
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c

index 3e5b709e0d3f3a615fe9f47408427b8a73be2d1e..55a1528b14a34872099c412b623f24854b355cb4 100644 (file)
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -26,11 +26,11 @@ static THREAD_FUNCTION thread_loopfilter(void *p_data) {
    VP8_COMMON *cm = &cpi->common;
  
    while (1) {
-    if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;
+    if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break;
  
      if (sem_wait(&cpi->h_event_start_lpf) == 0) {
        /* we're shutting down */
-      if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;
+      if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break;
  
        vp8_loopfilter_frame(cpi, cm);
  
@@ -48,7 +48,7 @@ static THREAD_FUNCTION thread_encoding_proc(void *p_data) {
    ENTROPY_CONTEXT_PLANES mb_row_left_context;
  
    while (1) {
-    if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;
+    if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break;
  
      if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0) {
        const int nsync = cpi->mt_sync_range;
@@ -66,7 +66,7 @@ static THREAD_FUNCTION thread_encoding_proc(void *p_data) {
        int *totalrate = &mbri->totalrate;
  
        /* we're shutting down */
-      if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;
+      if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break;
  
        xd->mode_info_context = cm->mi + cm->mode_info_stride * (ithread + 1);
        xd->mode_info_stride = cm->mode_info_stride;
@@ -80,8 +80,8 @@ static THREAD_FUNCTION thread_encoding_proc(void *p_data) {
          int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
          int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
          int map_index = (mb_row * cm->mb_cols);
-        const int *last_row_current_mb_col;
-        int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
+        const vpx_atomic_int *last_row_current_mb_col;
+        vpx_atomic_int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
  
  #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
          vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)];
@@ -108,13 +108,11 @@ static THREAD_FUNCTION thread_encoding_proc(void *p_data) {
          /* for each macroblock col in image */
          for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
            if (((mb_col - 1) % nsync) == 0) {
-            pthread_mutex_t *mutex = &cpi->pmutex[mb_row];
-            protected_write(mutex, current_mb_col, mb_col - 1);
+            vpx_atomic_store_release(current_mb_col, mb_col - 1);
            }
  
            if (mb_row && !(mb_col & (nsync - 1))) {
-            pthread_mutex_t *mutex = &cpi->pmutex[mb_row - 1];
-            sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
+            vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync);
            }
  
  #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
@@ -286,7 +284,7 @@ static THREAD_FUNCTION thread_encoding_proc(void *p_data) {
          vp8_extend_mb_row(&cm->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16,
                            xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
  
-        protected_write(&cpi->pmutex[mb_row], current_mb_col, mb_col + nsync);
+        vpx_atomic_store_release(current_mb_col, mb_col + nsync);
  
          /* this is to account for the border */
          xd->mode_info_context++;
@@ -490,12 +488,10 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, MACROBLOCK *x,
  int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
    const VP8_COMMON *cm = &cpi->common;
  
-  cpi->b_multi_threaded = 0;
+  vpx_atomic_init(&cpi->b_multi_threaded, 0);
    cpi->encoding_thread_count = 0;
    cpi->b_lpf_running = 0;
  
-  pthread_mutex_init(&cpi->mt_mutex, NULL);
-
    if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) {
      int ithread;
      int th_count = cpi->oxcf.multi_threaded - 1;
@@ -526,7 +522,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
      CHECK_MEM_ERROR(cpi->en_thread_data,
                      vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));
  
-    cpi->b_multi_threaded = 1;
+    vpx_atomic_store_release(&cpi->b_multi_threaded, 1);
      cpi->encoding_thread_count = th_count;
  
      /*
@@ -555,7 +551,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
  
      if (rc) {
        /* shutdown other threads */
-      protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
+      vpx_atomic_store_release(&cpi->b_multi_threaded, 0);
        for (--ithread; ithread >= 0; ithread--) {
          pthread_join(cpi->h_encoding_thread[ithread], 0);
          sem_destroy(&cpi->h_event_start_encoding[ithread]);
@@ -569,8 +565,6 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
        vpx_free(cpi->mb_row_ei);
        vpx_free(cpi->en_thread_data);
  
-      pthread_mutex_destroy(&cpi->mt_mutex);
-
        return -1;
      }
  
@@ -585,7 +579,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
  
        if (rc) {
          /* shutdown other threads */
-        protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
+        vpx_atomic_store_release(&cpi->b_multi_threaded, 0);
          for (--ithread; ithread >= 0; ithread--) {
            sem_post(&cpi->h_event_start_encoding[ithread]);
            sem_post(&cpi->h_event_end_encoding[ithread]);
@@ -603,8 +597,6 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
          vpx_free(cpi->mb_row_ei);
          vpx_free(cpi->en_thread_data);
  
-        pthread_mutex_destroy(&cpi->mt_mutex);
-
          return -2;
        }
      }
@@ -613,9 +605,9 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
  }
  
  void vp8cx_remove_encoder_threads(VP8_COMP *cpi) {
-  if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded)) {
+  if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {
      /* shutdown other threads */
-    protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
+    vpx_atomic_store_release(&cpi->b_multi_threaded, 0);
      {
        int i;
  
@@ -643,6 +635,5 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi) {
      vpx_free(cpi->mb_row_ei);
      vpx_free(cpi->en_thread_data);
    }
-  pthread_mutex_destroy(&cpi->mt_mutex);
  }
  #endif
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c

index f68fa22af134652571facaf205745d353f207e85..725e000e213927c6d244e7669fdf137db81e15c9 100644 (file)
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -451,18 +451,6 @@ static void dealloc_compressor_data(VP8_COMP *cpi) {
    cpi->mb.pip = 0;
  
  #if CONFIG_MULTITHREAD
-  /* De-allocate mutex */
-  if (cpi->pmutex != NULL) {
-    VP8_COMMON *const pc = &cpi->common;
-    int i;
-
-    for (i = 0; i < pc->mb_rows; ++i) {
-      pthread_mutex_destroy(&cpi->pmutex[i]);
-    }
-    vpx_free(cpi->pmutex);
-    cpi->pmutex = NULL;
-  }
-
    vpx_free(cpi->mt_current_mb_col);
    cpi->mt_current_mb_col = NULL;
  #endif
@@ -1153,9 +1141,6 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) {
  
    int width = cm->Width;
    int height = cm->Height;
-#if CONFIG_MULTITHREAD
-  int prev_mb_rows = cm->mb_rows;
-#endif
  
    if (vp8_alloc_frame_buffers(cm, width, height)) {
      vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
@@ -1247,26 +1232,11 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) {
    if (cpi->oxcf.multi_threaded > 1) {
      int i;
  
-    /* De-allocate and re-allocate mutex */
-    if (cpi->pmutex != NULL) {
-      for (i = 0; i < prev_mb_rows; ++i) {
-        pthread_mutex_destroy(&cpi->pmutex[i]);
-      }
-      vpx_free(cpi->pmutex);
-      cpi->pmutex = NULL;
-    }
-
-    CHECK_MEM_ERROR(cpi->pmutex,
-                    vpx_malloc(sizeof(*cpi->pmutex) * cm->mb_rows));
-    if (cpi->pmutex) {
-      for (i = 0; i < cm->mb_rows; ++i) {
-        pthread_mutex_init(&cpi->pmutex[i], NULL);
-      }
-    }
-
      vpx_free(cpi->mt_current_mb_col);
      CHECK_MEM_ERROR(cpi->mt_current_mb_col,
                      vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));
+    for (i = 0; i < cm->mb_rows; ++i)
+      vpx_atomic_init(&cpi->mt_current_mb_col[i], 0);
    }
  
  #endif
@@ -3274,7 +3244,7 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) {
    }
  
  #if CONFIG_MULTITHREAD
-  if (cpi->b_multi_threaded) {
+  if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {
      sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */
    }
  #endif
@@ -4471,7 +4441,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
  #endif
  
  #if CONFIG_MULTITHREAD
-  if (cpi->b_multi_threaded) {
+  if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {
      /* start loopfilter in separate thread */
      sem_post(&cpi->h_event_start_lpf);
      cpi->b_lpf_running = 1;
@@ -4497,7 +4467,8 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
  #if CONFIG_MULTITHREAD
    /* wait that filter_level is picked so that we can continue with stream
     * packing */
-  if (cpi->b_multi_threaded) sem_wait(&cpi->h_event_end_lpf);
+  if (vpx_atomic_load_acquire(&cpi->b_multi_threaded))
+    sem_wait(&cpi->h_event_end_lpf);
  #endif
  
    /* build the bitstream */
@@ -5341,7 +5312,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags,
  
  #if CONFIG_MULTITHREAD
    /* wait for the lpf thread done */
-  if (cpi->b_multi_threaded && cpi->b_lpf_running) {
+  if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) && cpi->b_lpf_running) {
      sem_wait(&cpi->h_event_end_lpf);
      cpi->b_lpf_running = 0;
    }
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h

index 08f07851ef2187b2d96caedf44ad3e5d34490361..0ee2d3553c37ced8c049399f63337763a07fecd1 100644 (file)
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -518,11 +518,9 @@ typedef struct VP8_COMP {
  
  #if CONFIG_MULTITHREAD
    /* multithread data */
-  pthread_mutex_t *pmutex;
-  pthread_mutex_t mt_mutex; /* mutex for b_multi_threaded */
-  int *mt_current_mb_col;
+  vpx_atomic_int *mt_current_mb_col;
    int mt_sync_range;
-  int b_multi_threaded;
+  vpx_atomic_int b_multi_threaded;
    int encoding_thread_count;
    int b_lpf_running;
  
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c

index 987a5b8a4fde9274f0a473be6fc5853de1c39e5d..29287725244a438980d85d15fff898e3a2fc5425 100644 (file)
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -415,7 +415,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
  #endif
  
  #if CONFIG_MULTITHREAD
-        if (pbi->b_multithreaded_rd) {
+        if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) {
            vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows);
          }
  #else
diff --git a/vpx_util/vpx_atomics.h b/vpx_util/vpx_atomics.h

new file mode 100644 (file)

index 0000000..a471fd1
--- /dev/null
+++ b/vpx_util/vpx_atomics.h
@@ -0,0 +1,133 @@
+/*
+ *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_UTIL_VPX_ATOMICS_H_
+#define VPX_UTIL_VPX_ATOMICS_H_
+
+#include "./vpx_config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
+
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \
+    (defined(__cplusplus) && __cplusplus >= 201112L)
+// Where available, use <stdatomic.h>
+#include <stdatomic.h>
+#define VPX_USE_STD_ATOMIC
+#else
+// Look for built-ins.
+#if !defined(__has_builtin)
+#define __has_builtin(x) 0  // Compatibility with non-clang compilers.
+#endif                      // !defined(__has_builtin)
+
+#if (__has_builtin(__atomic_load_n)) || \
+    (defined(__GNUC__) &&               \
+     (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
+// For GCC >= 4.7 and Clang that support __atomic builtins, use those.
+#define VPX_USE_ATOMIC_BUILTINS
+#else
+// Use platform-specific asm barriers.
+#if defined(_MSC_VER)
+// TODO(pbos): This assumes that newer versions of MSVC are building with the
+// default /volatile:ms (or older, where this is always true. Consider adding
+// support for using <atomic> instead of stdatomic.h when building C++11 under
+// MSVC. It's unclear what to do for plain C under /volatile:iso (inline asm?),
+// there're no explicit Interlocked* functions for only storing or loading
+// (presumably because volatile has historically implied that on MSVC).
+//
+// For earlier versions of MSVC or the default /volatile:ms volatile int are
+// acquire/release and require no barrier.
+#define vpx_atomic_memory_barrier() \
+  do {                              \
+  } while (0)
+#else
+#if ARCH_X86 || ARCH_X86_64
+// Use a compiler barrier on x86, no runtime penalty.
+#define vpx_atomic_memory_barrier() __asm__ __volatile__("" ::: "memory")
+#elif ARCH_ARM
+#define vpx_atomic_memory_barrier() __asm__ __volatile__("dmb ish" ::: "memory")
+#elif ARCH_MIPS
+#define vpx_atomic_memory_barrier() __asm__ __volatile__("sync" ::: "memory")
+#else
+#error Unsupported architecture!
+#endif  // ARCH_X86 || ARCH_X86_64
+#endif  // defined(_MSC_VER)
+#endif  // atomic builtin availability check
+#endif  // stdatomic availability check
+
+// These are wrapped in a struct so that they are not easily accessed directly
+// on any platform (to discourage programmer errors by setting values directly).
+// This primitive MUST be initialized using vpx_atomic_init or VPX_ATOMIC_INIT
+// (NOT memset) and accessed through vpx_atomic_ functions.
+typedef struct vpx_atomic_int {
+#if defined(VPX_USE_STD_ATOMIC)
+  atomic_int value;
+#else
+  volatile int value;
+#endif  // defined(USE_STD_ATOMIC)
+} vpx_atomic_int;
+
+#if defined(VPX_USE_STD_ATOMIC)
+#define VPX_ATOMIC_INIT(num) \
+  { ATOMIC_VAR_INIT(num) }
+#else
+#define VPX_ATOMIC_INIT(num) \
+  { num }
+#endif  // defined(VPX_USE_STD_ATOMIC)
+
+// Initialization of an atomic int, not thread safe.
+static INLINE void vpx_atomic_init(vpx_atomic_int *atomic, int value) {
+#if defined(VPX_USE_STD_ATOMIC)
+  atomic_init(&atomic->value, value);
+#else
+  atomic->value = value;
+#endif  // defined(USE_STD_ATOMIC)
+}
+
+static INLINE void vpx_atomic_store_release(vpx_atomic_int *atomic, int value) {
+#if defined(VPX_USE_STD_ATOMIC)
+  atomic_store_explicit(&atomic->value, value, memory_order_release);
+#elif defined(VPX_USE_ATOMIC_BUILTINS)
+  __atomic_store_n(&atomic->value, value, __ATOMIC_RELEASE);
+#else
+  vpx_atomic_memory_barrier();
+  atomic->value = value;
+#endif  // defined(VPX_USE_STD_ATOMIC)
+}
+
+static INLINE int vpx_atomic_load_acquire(const vpx_atomic_int *atomic) {
+#if defined(VPX_USE_STD_ATOMIC)
+  // const_cast (in C) that doesn't trigger -Wcast-qual.
+  return atomic_load_explicit(
+      (atomic_int *)(uintptr_t)(const void *)&atomic->value,
+      memory_order_acquire);
+#elif defined(VPX_USE_ATOMIC_BUILTINS)
+  return __atomic_load_n(&atomic->value, __ATOMIC_ACQUIRE);
+#else
+  int v = atomic->value;
+  vpx_atomic_memory_barrier();
+  return v;
+#endif  // defined(VPX_USE_STD_ATOMIC)
+}
+
+#undef VPX_USE_STD_ATOMIC
+#undef VPX_USE_ATOMIC_BUILTINS
+#undef vpx_atomic_memory_barrier
+
+#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+
+#endif  // VPX_UTIL_VPX_ATOMICS_H_
diff --git a/vpx_util/vpx_util.mk b/vpx_util/vpx_util.mk

index d48e4cc2f9ec0980cafa7a1e8081bcebc3bccce5..86d3ece3c899b6a88070336dfa1a56d66851d0c8 100644 (file)
--- a/vpx_util/vpx_util.mk
+++ b/vpx_util/vpx_util.mk
@@ -8,6 +8,7 @@
  ##  be found in the AUTHORS file in the root of the source tree.
  ##
  
+UTIL_SRCS-yes += vpx_atomics.h
  UTIL_SRCS-yes += vpx_util.mk
  UTIL_SRCS-yes += vpx_thread.c
  UTIL_SRCS-yes += vpx_thread.h
author	Peter Boström <pbos@google.com>
	Fri, 25 Aug 2017 22:48:11 +0000 (15:48 -0700)
committer	Peter Boström <pbos@google.com>
	Fri, 1 Sep 2017 00:55:57 +0000 (17:55 -0700)
vp8/common/threading.h		patch \| blob \| history
vp8/decoder/decodeframe.c		patch \| blob \| history
vp8/decoder/onyxd_int.h		patch \| blob \| history
vp8/decoder/threading.c		patch \| blob \| history
vp8/encoder/bitstream.c		patch \| blob \| history
vp8/encoder/encodeframe.c		patch \| blob \| history
vp8/encoder/ethreading.c		patch \| blob \| history
vp8/encoder/onyx_if.c		patch \| blob \| history
vp8/encoder/onyx_int.h		patch \| blob \| history
vp8/vp8_dx_iface.c		patch \| blob \| history
vpx_util/vpx_atomics.h	[new file with mode: 0644]	patch \| blob
vpx_util/vpx_util.mk		patch \| blob \| history