]> granicus.if.org Git - libx264/commitdiff
win32: use pthreads instead of win32threads. for some reason, pthreads is much faster.
authorLoren Merritt <pengvado@videolan.org>
Fri, 15 Dec 2006 23:06:21 +0000 (23:06 +0000)
committerLoren Merritt <pengvado@videolan.org>
Fri, 15 Dec 2006 23:06:21 +0000 (23:06 +0000)
git-svn-id: svn://svn.videolan.org/x264/trunk@608 df754926-b1dd-0310-bc7b-ec298dee348c

common/common.h
common/frame.c
common/frame.h
configure
encoder/analyse.c
encoder/encoder.c
x264.c

index a166a8311b6411be69023bfe1ac135b13ea6ed45..d69d894b2eb8bce1b17fecaf3b279f918d1c8e2c 100644 (file)
 #endif
 
 /* threads */
-#ifdef __WIN32__
-#include <windows.h>
-#define pthread_t               HANDLE
-#define pthread_create(t,u,f,d) *(t)=CreateThread(NULL,0,f,d,0,NULL)
-#define pthread_join(t,s)       { WaitForSingleObject(t,INFINITE); \
-                                  CloseHandle(t); } 
-#define usleep(t)               Sleep((t+999)/1000);
-#define HAVE_PTHREAD 1
+#if defined(__WIN32__) && defined(HAVE_PTHREAD)
+#include <pthread.h>
+#define USE_CONDITION_VAR
 
 #elif defined(SYS_BEOS)
 #include <kernel/OS.h>
 #define pthread_t               int
 #define pthread_create(t,u,f,d)
 #define pthread_join(t,s)
+#endif //SYS_*
+
+#ifndef USE_CONDITION_VAR
+#define pthread_mutex_t         int
+#define pthread_mutex_init(m,f)
+#define pthread_mutex_destroy(m)
+#define pthread_mutex_lock(m)
+#define pthread_mutex_unlock(m)
+#define pthread_cond_t          int
+#define pthread_cond_init(c,f)
+#define pthread_cond_destroy(c)
+#define pthread_cond_broadcast(c)
+#define pthread_cond_wait(c,m)  usleep(100)
 #endif
 
 /****************************************************************************
index ea280d489b8ea3c2870217eead79e9f61ce30aee..6b173f3a9191fad3abef02ccbf34ba12d7c35472 100644 (file)
@@ -23,6 +23,7 @@
 
 #include <stdio.h>
 #include <string.h>
+#include <unistd.h>
 
 #include "common.h"
 
@@ -131,6 +132,9 @@ x264_frame_t *x264_frame_new( x264_t *h )
         for( j = 0; j < h->param.i_bframe + 2; j++ )
             CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
 
+    pthread_mutex_init( &frame->mutex, NULL );
+    pthread_cond_init( &frame->cv, NULL );
+
     return frame;
 
 fail:
@@ -155,6 +159,8 @@ void x264_frame_delete( x264_frame_t *frame )
     x264_free( frame->mv[1] );
     x264_free( frame->ref[0] );
     x264_free( frame->ref[1] );
+    pthread_mutex_destroy( &frame->mutex );
+    pthread_cond_destroy( &frame->cv );
     x264_free( frame );
 }
 
@@ -751,3 +757,32 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
 #endif
 }
 
+
+/* threading */
+
+#ifdef HAVE_PTHREAD
+void x264_frame_cond_broadcast( x264_frame_t *frame )
+{
+    pthread_mutex_lock( &frame->mutex );
+    pthread_cond_broadcast( &frame->cv );
+    pthread_mutex_unlock( &frame->mutex );
+}
+
+void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
+{
+    if( frame->i_lines_completed < i_lines_completed )
+    {
+        pthread_mutex_lock( &frame->mutex );
+        while( frame->i_lines_completed < i_lines_completed )
+            pthread_cond_wait( &frame->cv, &frame->mutex );
+        pthread_mutex_unlock( &frame->mutex );
+    }
+}
+
+#else
+void x264_frame_cond_broadcast( x264_frame_t *frame )
+{}
+void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
+{}
+#endif
+
index 09fece187b765d9b75c727c6739fa21f19d6cca3..638ecab22e2e5065772c5b06ea99f5a7e27284a7 100644 (file)
@@ -73,6 +73,8 @@ typedef struct
     /* threading */
     int     i_lines_completed; /* in pixels */
     int     i_reference_count; /* number of threads using this frame (not necessarily the number of pointers) */
+    pthread_mutex_t mutex;      
+    pthread_cond_t  cv;
 
 } x264_frame_t;
 
@@ -108,4 +110,7 @@ void          x264_frame_init_lowres( int cpu, x264_frame_t *frame );
 
 void          x264_deblock_init( int cpu, x264_deblock_function_t *pf );
 
+void          x264_frame_cond_broadcast( x264_frame_t *frame );
+void          x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed );
+
 #endif
index 41c48bca06afa9caff4a0df0a5e691eb948a1433..8f43eda45bed647f408df3db5991f92e9dc78f6c 100755 (executable)
--- a/configure
+++ b/configure
@@ -29,9 +29,9 @@ cc_check() {
 #include <$1>
 int main () { $3 return 0; }
 EOF
-    $CC $CFLAGS $LDFLAGS $2 conftest.c -o conftest 2>$DEVNULL
+    $CC conftest.c $CFLAGS $LDFLAGS $2 -o conftest 2>$DEVNULL
     TMP="$?"
-    rm -f conftest.c conftest*
+    rm -f conftest*
     return $TMP
 }
 
@@ -267,25 +267,34 @@ done
 
 # autodetect options that weren't forced nor disabled
 
+libpthread=""
 if test "$pthread" = "auto" ; then
+    pthread="no"
     case $SYS in
-        MINGW|CYGWIN|BEOS)
+        BEOS)
             pthread="yes"
             ;;
+        MINGW|CYGWIN)
+            if cc_check pthread.h -lpthread "pthread_create(0,0,0,0);" ; then
+                pthread="yes"
+                libpthread="-lpthread"
+            elif cc_check pthread.h -lpthreadGC2 "pthread_create(0,0,0,0);" ; then
+                pthread="yes"
+                libpthread="-lpthreadGC2"
+            elif cc_check pthread.h "-lpthreadGC2 -lwsock32 -DPTW32_STATIC_LIB" "pthread_create(0,0,0,0);" ; then
+                pthread="yes"
+                libpthread="-lpthreadGC2 -lwsock32"\
+                CFLAGS="$CFLAGS -DPTW32_STATIC_LIB"
+            fi
+            ;;
         *)
-            pthread="no"
-            cc_check pthread.h -lpthread && pthread="yes"
+            cc_check pthread.h -lpthread && pthread="yes" && libpthread="-lpthread"
             ;;
     esac
 fi
 if test "$pthread" = "yes" ; then
     CFLAGS="$CFLAGS -DHAVE_PTHREAD"
-    case $SYS in
-        MINGW|CYGWIN|BEOS)
-            ;;
-        *) LDFLAGS="$LDFLAGS -lpthread"
-            ;;
-    esac
+    LDFLAGS="$LDFLAGS $libpthread"
 fi
 
 MP4_LDFLAGS="-lgpac_static"
@@ -383,15 +392,7 @@ fi
 
 ./version.sh
 
-pclibs="-L$libdir -lx264"
-if test "$pthread" = "yes" ; then
-  case $SYS in
-    MINGW|CYGWIN|BEOS)
-      ;;
-    *) pclibs="$pclibs -lpthread"
-      ;;
-  esac
-fi
+pclibs="-L$libdir -lx264 $libpthread"
 
 cat > x264.pc << EOF
 prefix=$prefix
index e854da8c1f900f64392722f44f82dfe283a62ad7..e2ddf30f333ae285febbe5325da179f59a6417b7 100644 (file)
@@ -248,10 +248,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
                     int i_ref = i ? h->i_ref1 : h->i_ref0;
                     for( j=0; j<i_ref; j++ )
                     {
-                        // could use a condition variable or the like, but
-                        // this way is faster at least on LinuxThreads.
-                        while( fref[j]->i_lines_completed < thresh )
-                            usleep(100);
+                        x264_frame_cond_wait( fref[j], thresh );
                         thread_mvy_range = X264_MIN( thread_mvy_range, fref[j]->i_lines_completed - pix_y );
                     }
                 }
index 84aa9325f7c8d273491d313f7daba6a6e87011d0..3edf1c99604b12e1a033ee74ae368d7e62791a18 100644 (file)
@@ -997,8 +997,8 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y )
 
     if( h->param.i_threads > 1 )
     {
-        /* this must be an atomic store. a 32bit int should be so on sane architectures. */
         h->fdec->i_lines_completed = mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << h->sh.b_mbaff));
+        x264_frame_cond_broadcast( h->fdec );
     }
 }
 
diff --git a/x264.c b/x264.c
index cf194ef90704bf2254e2d946dbadf985651ddb3a..7718a06664582c9d5dca1f8cf35bb4ca2e7b6bb0 100644 (file)
--- a/x264.c
+++ b/x264.c
@@ -92,6 +92,12 @@ int main( int argc, char **argv )
 {
     x264_param_t param;
     cli_opt_t opt;
+    int ret;
+
+#ifdef PTW32_STATIC_LIB
+    pthread_win32_process_attach_np();
+    pthread_win32_thread_attach_np();
+#endif
 
 #ifdef _MSC_VER
     _setmode(_fileno(stdin), _O_BINARY);
@@ -107,7 +113,14 @@ int main( int argc, char **argv )
     /* Control-C handler */
     signal( SIGINT, SigIntHandler );
 
-    return Encode( &param, &opt );
+    ret = Encode( &param, &opt );
+
+#ifdef PTW32_STATIC_LIB
+    pthread_win32_thread_detach_np();
+    pthread_win32_process_detach_np();
+#endif
+
+    return ret;
 }
 
 static char const *strtable_lookup( const char * const table[], int index )