From: Brian Behlendorf Date: Fri, 25 Sep 2009 21:47:01 +0000 (-0700) Subject: Reimplement mutexs for Linux lock profiling/analysis X-Git-Tag: zfs-0.8.0-rc1~152^2~662 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4d54fdee1d774ddaef381893434a3721067e2c56;p=zfs Reimplement mutexs for Linux lock profiling/analysis For a generic explanation of why mutexs needed to be reimplemented to work with the kernel lock profiling see commits: e811949a57044d60d12953c5c3b808a79a7d36ef and d28db80fd0fd4fd63aec09037c44408e51a222d6 The specific changes made to the mutex implemetation are as follows. The Linux mutex structure is now directly embedded in the kmutex_t. This allows a kmutex_t to be directly case to a mutex struct and passed directly to the Linux primative. Just like with the rwlocks it is critical that these functions be implemented as '#defines to ensure the location information is preserved. The preprocessor can then do a direct replacement of the Solaris primative with the linux primative. Just as with the rwlocks we need to track the lock owner. Here things get a little more interesting because depending on your kernel version, and how you've built your kernel Linux may already do this for you. If your running a 2.6.29 or newer kernel on a SMP system the lock owner will be tracked. This was added to Linux to support adaptive mutexs, more on that shortly. Alternately, your kernel might track the lock owner if you've set CONFIG_DEBUG_MUTEXES in the kernel build. If neither of the above things is true for your kernel the kmutex_t type will include and track the lock owner to ensure correct behavior. This is all handled by a new autoconf check called SPL_AC_MUTEX_OWNER. Concerning adaptive mutexs these are a very recent development and they did not make it in to either the latest FC11 of SLES11 kernels. Ideally, I'd love to see this kernel change appear in one of these distros because it does help performance. From Linux kernel commit: 0d66bf6d3514b35eb6897629059443132992dbd7 "Testing with Ingo's test-mutex application... gave a 345% boost for VFS scalability on my testbox" However, if you don't want to backport this change yourself you can still simply export the task_curr() symbol. The kmutex_t implementation will use this symbol when it's available to provide it's own adaptive mutexs. Finally, DEBUG_MUTEX support was removed including the proc handlers. This was done because now that we are cleanly integrated with the kernel profiling all this information and much much more is available in debug kernel builds. This code was now redundant. Update mutexs validated on: - SLES10 (ppc64) - SLES11 (x86_64) - CHAOS4.2 (x86_64) - RHEL5.3 (x86_64) - RHEL6 (x86_64) - FC11 (x86_64) --- diff --git a/config/spl-build.m4 b/config/spl-build.m4 index c52ea5db4..3293aa273 100644 --- a/config/spl-build.m4 +++ b/config/spl-build.m4 @@ -23,7 +23,6 @@ AC_DEFUN([SPL_AC_CONFIG_KERNEL], [ SPL_AC_DEBUG SPL_AC_DEBUG_KMEM - SPL_AC_DEBUG_MUTEX SPL_AC_DEBUG_KSTAT SPL_AC_DEBUG_CALLB SPL_AC_TYPE_UINTPTR_T @@ -48,6 +47,7 @@ AC_DEFUN([SPL_AC_CONFIG_KERNEL], [ SPL_AC_KMALLOC_NODE SPL_AC_MONOTONIC_CLOCK SPL_AC_INODE_I_MUTEX + SPL_AC_MUTEX_OWNER SPL_AC_MUTEX_LOCK_NESTED SPL_AC_DIV64_64 SPL_AC_DIV64_U64 @@ -256,28 +256,6 @@ AC_DEFUN([SPL_AC_DEBUG_KMEM], [ fi ]) -AC_DEFUN([SPL_AC_DEBUG_MUTEX], [ - AC_MSG_CHECKING([whether mutex debugging is enabled]) - AC_ARG_ENABLE( [debug-mutex], - AS_HELP_STRING([--enable-debug-mutex], - [Enable mutex debug support (default off)]), - [ case "$enableval" in - yes) spl_ac_debug_mutex=yes ;; - no) spl_ac_debug_mutex=no ;; - *) AC_MSG_RESULT([Error!]) - AC_MSG_ERROR([Bad value "$enableval" for --enable-debug-mutex]) ;; - esac ] - ) - if test "$spl_ac_debug_mutex" = yes; then - AC_MSG_RESULT([yes]) - AC_DEFINE([DEBUG_MUTEX], [1], - [Define to 1 to enable mutex debugging]) - KERNELCPPFLAGS="${KERNELCPPFLAGS} -DDEBUG_MUTEX" - else - AC_MSG_RESULT([no]) - fi -]) - AC_DEFUN([SPL_AC_DEBUG_KSTAT], [ AC_MSG_CHECKING([whether kstat debugging is enabled]) AC_ARG_ENABLE( [debug-kstat], @@ -825,6 +803,25 @@ AC_DEFUN([SPL_AC_INODE_I_MUTEX], [ ]) ]) +dnl # +dnl # 2.6.29 API change, +dnl # Adaptive mutexs introduced. +dnl # +AC_DEFUN([SPL_AC_MUTEX_OWNER], [ + AC_MSG_CHECKING([whether struct mutex has owner]) + SPL_LINUX_TRY_COMPILE([ + #include + ],[ + struct mutex mtx; + mtx.owner = NULL; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_MUTEX_OWNER, 1, [struct mutex has owner]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + dnl # dnl # 2.6.18 API change, dnl # First introduced 'mutex_lock_nested()' in include/linux/mutex.h, diff --git a/configure b/configure index 6b9ea364e..03b23036e 100755 --- a/configure +++ b/configure @@ -1036,7 +1036,6 @@ Optional Features: --disable-libtool-lock avoid locking (might break parallel builds) --enable-debug Enable generic debug support (default off) --enable-debug-kmem Enable kmem debug support (default off) - --enable-debug-mutex Enable mutex debug support (default off) --enable-debug-kstat Enable kstat debug support (default off) --enable-debug-callb Enable callb debug support (default off) @@ -3987,7 +3986,7 @@ ia64-*-hpux*) ;; *-*-irix6*) # Find out which ABI we are using. - echo '#line 3990 "configure"' > conftest.$ac_ext + echo '#line 3989 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -5586,7 +5585,7 @@ fi # Provide some information about the compiler. -echo "$as_me:5589:" \ +echo "$as_me:5588:" \ "checking for Fortran 77 compiler version" >&5 ac_compiler=`set X $ac_compile; echo $2` { (eval echo "$as_me:$LINENO: \"$ac_compiler --version &5\"") >&5 @@ -6649,11 +6648,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:6652: $lt_compile\"" >&5) + (eval echo "\"\$as_me:6651: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:6656: \$? = $ac_status" >&5 + echo "$as_me:6655: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -6917,11 +6916,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:6920: $lt_compile\"" >&5) + (eval echo "\"\$as_me:6919: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:6924: \$? = $ac_status" >&5 + echo "$as_me:6923: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -7021,11 +7020,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:7024: $lt_compile\"" >&5) + (eval echo "\"\$as_me:7023: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:7028: \$? = $ac_status" >&5 + echo "$as_me:7027: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -8490,7 +8489,7 @@ linux*) libsuff= case "$host_cpu" in x86_64*|s390x*|powerpc64*) - echo '#line 8493 "configure"' > conftest.$ac_ext + echo '#line 8492 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -9387,7 +9386,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext < conftest.$ac_ext <&5) + (eval echo "\"\$as_me:11832: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:11837: \$? = $ac_status" >&5 + echo "$as_me:11836: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -11934,11 +11933,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:11937: $lt_compile\"" >&5) + (eval echo "\"\$as_me:11936: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:11941: \$? = $ac_status" >&5 + echo "$as_me:11940: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -12470,7 +12469,7 @@ linux*) libsuff= case "$host_cpu" in x86_64*|s390x*|powerpc64*) - echo '#line 12473 "configure"' > conftest.$ac_ext + echo '#line 12472 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -13528,11 +13527,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:13531: $lt_compile\"" >&5) + (eval echo "\"\$as_me:13530: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:13535: \$? = $ac_status" >&5 + echo "$as_me:13534: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -13632,11 +13631,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:13635: $lt_compile\"" >&5) + (eval echo "\"\$as_me:13634: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:13639: \$? = $ac_status" >&5 + echo "$as_me:13638: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -15081,7 +15080,7 @@ linux*) libsuff= case "$host_cpu" in x86_64*|s390x*|powerpc64*) - echo '#line 15084 "configure"' > conftest.$ac_ext + echo '#line 15083 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -15859,11 +15858,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:15862: $lt_compile\"" >&5) + (eval echo "\"\$as_me:15861: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:15866: \$? = $ac_status" >&5 + echo "$as_me:15865: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -16127,11 +16126,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:16130: $lt_compile\"" >&5) + (eval echo "\"\$as_me:16129: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:16134: \$? = $ac_status" >&5 + echo "$as_me:16133: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -16231,11 +16230,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:16234: $lt_compile\"" >&5) + (eval echo "\"\$as_me:16233: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:16238: \$? = $ac_status" >&5 + echo "$as_me:16237: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -17700,7 +17699,7 @@ linux*) libsuff= case "$host_cpu" in x86_64*|s390x*|powerpc64*) - echo '#line 17703 "configure"' > conftest.$ac_ext + echo '#line 17702 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -19143,37 +19142,6 @@ echo "${ECHO_T}no" >&6 fi - echo "$as_me:$LINENO: checking whether mutex debugging is enabled" >&5 -echo $ECHO_N "checking whether mutex debugging is enabled... $ECHO_C" >&6 - # Check whether --enable-debug-mutex or --disable-debug-mutex was given. -if test "${enable_debug_mutex+set}" = set; then - enableval="$enable_debug_mutex" - case "$enableval" in - yes) spl_ac_debug_mutex=yes ;; - no) spl_ac_debug_mutex=no ;; - *) echo "$as_me:$LINENO: result: Error!" >&5 -echo "${ECHO_T}Error!" >&6 - { { echo "$as_me:$LINENO: error: Bad value \"$enableval\" for --enable-debug-mutex" >&5 -echo "$as_me: error: Bad value \"$enableval\" for --enable-debug-mutex" >&2;} - { (exit 1); exit 1; }; } ;; - esac - -fi; - if test "$spl_ac_debug_mutex" = yes; then - echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6 - -cat >>confdefs.h <<\_ACEOF -#define DEBUG_MUTEX 1 -_ACEOF - - KERNELCPPFLAGS="${KERNELCPPFLAGS} -DDEBUG_MUTEX" - else - echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6 - fi - - echo "$as_me:$LINENO: checking whether kstat debugging is enabled" >&5 echo $ECHO_N "checking whether kstat debugging is enabled... $ECHO_C" >&6 # Check whether --enable-debug-kstat or --disable-debug-kstat was given. @@ -20562,6 +20530,72 @@ fi + echo "$as_me:$LINENO: checking whether struct mutex has owner" >&5 +echo $ECHO_N "checking whether struct mutex has owner... $ECHO_C" >&6 + + +cat >conftest.c <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + + #include + +int +main (void) +{ + + struct mutex mtx; + mtx.owner = NULL; + + ; + return 0; +} + +_ACEOF + + + rm -Rf build && mkdir -p build + echo "obj-m := conftest.o" >build/Makefile + if { ac_try='cp conftest.c build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } >/dev/null && { ac_try='test -s build/conftest.o' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_MUTEX_OWNER 1 +_ACEOF + + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + + + +fi + + rm -Rf build + + + + echo "$as_me:$LINENO: checking whether mutex_lock_nested() is available" >&5 echo $ECHO_N "checking whether mutex_lock_nested() is available... $ECHO_C" >&6 @@ -22151,37 +22185,6 @@ echo "${ECHO_T}no" >&6 fi - echo "$as_me:$LINENO: checking whether mutex debugging is enabled" >&5 -echo $ECHO_N "checking whether mutex debugging is enabled... $ECHO_C" >&6 - # Check whether --enable-debug-mutex or --disable-debug-mutex was given. -if test "${enable_debug_mutex+set}" = set; then - enableval="$enable_debug_mutex" - case "$enableval" in - yes) spl_ac_debug_mutex=yes ;; - no) spl_ac_debug_mutex=no ;; - *) echo "$as_me:$LINENO: result: Error!" >&5 -echo "${ECHO_T}Error!" >&6 - { { echo "$as_me:$LINENO: error: Bad value \"$enableval\" for --enable-debug-mutex" >&5 -echo "$as_me: error: Bad value \"$enableval\" for --enable-debug-mutex" >&2;} - { (exit 1); exit 1; }; } ;; - esac - -fi; - if test "$spl_ac_debug_mutex" = yes; then - echo "$as_me:$LINENO: result: yes" >&5 -echo "${ECHO_T}yes" >&6 - -cat >>confdefs.h <<\_ACEOF -#define DEBUG_MUTEX 1 -_ACEOF - - KERNELCPPFLAGS="${KERNELCPPFLAGS} -DDEBUG_MUTEX" - else - echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6 - fi - - echo "$as_me:$LINENO: checking whether kstat debugging is enabled" >&5 echo $ECHO_N "checking whether kstat debugging is enabled... $ECHO_C" >&6 # Check whether --enable-debug-kstat or --disable-debug-kstat was given. @@ -23570,6 +23573,72 @@ fi + echo "$as_me:$LINENO: checking whether struct mutex has owner" >&5 +echo $ECHO_N "checking whether struct mutex has owner... $ECHO_C" >&6 + + +cat >conftest.c <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + + #include + +int +main (void) +{ + + struct mutex mtx; + mtx.owner = NULL; + + ; + return 0; +} + +_ACEOF + + + rm -Rf build && mkdir -p build + echo "obj-m := conftest.o" >build/Makefile + if { ac_try='cp conftest.c build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } >/dev/null && { ac_try='test -s build/conftest.o' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_MUTEX_OWNER 1 +_ACEOF + + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + + + +fi + + rm -Rf build + + + + echo "$as_me:$LINENO: checking whether mutex_lock_nested() is available" >&5 echo $ECHO_N "checking whether mutex_lock_nested() is available... $ECHO_C" >&6 diff --git a/include/sys/condvar.h b/include/sys/condvar.h index 40b6e4948..9a2e8b5a0 100644 --- a/include/sys/condvar.h +++ b/include/sys/condvar.h @@ -33,6 +33,7 @@ extern "C" { #include #include +#include #include /* The kcondvar_t struct is protected by mutex taken externally before diff --git a/include/sys/mutex.h b/include/sys/mutex.h index a26b2116a..49d17659d 100644 --- a/include/sys/mutex.h +++ b/include/sys/mutex.h @@ -1,7 +1,7 @@ /* * This file is part of the SPL: Solaris Porting Layer. * - * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Copyright (c) 2009 Lawrence Livermore National Security, LLC. * Produced at Lawrence Livermore National Laboratory * Written by: * Brian Behlendorf , @@ -25,88 +25,177 @@ */ #ifndef _SPL_MUTEX_H -#define _SPL_MUTEX_H +#define _SPL_MUTEX_H -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include #include -#include +#include + +typedef enum { + MUTEX_DEFAULT = 0, + MUTEX_SPIN = 1, + MUTEX_ADAPTIVE = 2 +} kmutex_type_t; -#define MUTEX_DEFAULT 0 -#define MUTEX_SPIN 1 -#define MUTEX_ADAPTIVE 2 +#ifdef HAVE_MUTEX_OWNER -#define MUTEX_ENTER_TOTAL 0 -#define MUTEX_ENTER_NOT_HELD 1 -#define MUTEX_ENTER_SPIN 2 -#define MUTEX_ENTER_SLEEP 3 -#define MUTEX_TRYENTER_TOTAL 4 -#define MUTEX_TRYENTER_NOT_HELD 5 -#define MUTEX_STATS_SIZE 6 +typedef struct mutex kmutex_t; -#define KM_MAGIC 0x42424242 -#define KM_POISON 0x84 +static inline kthread_t * +mutex_owner(kmutex_t *mp) +{ + if (mp->owner) + return (mp->owner)->task; + + return NULL; +} +#define mutex_owned(mp) (mutex_owner(mp) == current) +#define MUTEX_HELD(mp) mutex_owned(mp) +#undef mutex_init +#define mutex_init(mp, name, type, ibc) \ +({ \ + static struct lock_class_key __key; \ + ASSERT(type == MUTEX_DEFAULT); \ + \ + __mutex_init((mp), #mp, &__key); \ +}) +/* #define mutex_destroy(mp) ((void)0) */ +#define mutex_tryenter(mp) mutex_trylock(mp) +#define mutex_enter(mp) mutex_lock(mp) +#define mutex_exit(mp) mutex_unlock(mp) + +#else /* HAVE_MUTEX_OWNER */ typedef struct { - int32_t km_magic; - int16_t km_type; - int16_t km_name_size; - char *km_name; - struct task_struct *km_owner; - struct semaphore *km_sem; -#ifdef DEBUG_MUTEX - int *km_stats; - struct list_head km_list; -#endif + struct mutex m_mutex; + kthread_t *m_owner; } kmutex_t; -extern int mutex_spin_max; +#ifdef HAVE_TASK_CURR +extern int spl_mutex_spin_max(void); +#else /* HAVE_TASK_CURR */ +# define task_curr(owner) 0 +# define spl_mutex_spin_max() 0 +#endif /* HAVE_TASK_CURR */ -#ifdef DEBUG_MUTEX -extern int mutex_stats[MUTEX_STATS_SIZE]; -extern spinlock_t mutex_stats_lock; -extern struct list_head mutex_stats_list; -#define MUTEX_STAT_INC(stats, stat) ((stats)[stat]++) -#else -#define MUTEX_STAT_INC(stats, stat) -#endif +#define MUTEX(mp) ((struct mutex *)(mp)) -int spl_mutex_init(void); -void spl_mutex_fini(void); +static inline kthread_t * +spl_mutex_get_owner(kmutex_t *mp) +{ + return mp->m_owner; +} + +static inline void +spl_mutex_set_owner(kmutex_t *mp) +{ + unsigned long flags; + + spin_lock_irqsave(&MUTEX(mp)->wait_lock, flags); + mp->m_owner = current; + spin_unlock_irqrestore(&MUTEX(mp)->wait_lock, flags); +} + +static inline void +spl_mutex_clear_owner(kmutex_t *mp) +{ + unsigned long flags; + + spin_lock_irqsave(&MUTEX(mp)->wait_lock, flags); + mp->m_owner = NULL; + spin_unlock_irqrestore(&MUTEX(mp)->wait_lock, flags); +} + +static inline kthread_t * +mutex_owner(kmutex_t *mp) +{ + unsigned long flags; + kthread_t *owner; + + spin_lock_irqsave(&MUTEX(mp)->wait_lock, flags); + owner = spl_mutex_get_owner(mp); + spin_unlock_irqrestore(&MUTEX(mp)->wait_lock, flags); -extern int __spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc); -extern void __spl_mutex_destroy(kmutex_t *mp); -extern int __mutex_tryenter(kmutex_t *mp); -extern void __mutex_enter(kmutex_t *mp); -extern void __mutex_exit(kmutex_t *mp); -extern int __mutex_owned(kmutex_t *mp); -extern kthread_t *__spl_mutex_owner(kmutex_t *mp); + return owner; +} + +#define mutex_owned(mp) (mutex_owner(mp) == current) +#define MUTEX_HELD(mp) mutex_owned(mp) +/* + * The following functions must be a #define and not static inline. + * This ensures that the native linux mutex functions (lock/unlock) + * will be correctly located in the users code which is important + * for the built in kernel lock analysis tools + */ #undef mutex_init +#define mutex_init(mp, name, type, ibc) \ +({ \ + static struct lock_class_key __key; \ + ASSERT(type == MUTEX_DEFAULT); \ + \ + __mutex_init(MUTEX(mp), #mp, &__key); \ + spl_mutex_clear_owner(mp); \ +}) + #undef mutex_destroy +#define mutex_destroy(mp) \ +({ \ + VERIFY(!MUTEX_HELD(mp)); \ +}) -#define mutex_init(mp, name, type, ibc) \ -({ \ - /* May never fail or all subsequent mutex_* calls will ASSERT */\ - if ((name) == NULL) \ - while(__spl_mutex_init(mp, #mp, type, ibc)); \ - else \ - while(__spl_mutex_init(mp, name, type, ibc)); \ +#define mutex_tryenter(mp) \ +({ \ + int _rc_; \ + \ + if ((_rc_ = mutex_trylock(MUTEX(mp))) == 1) \ + spl_mutex_set_owner(mp); \ + \ + _rc_; \ }) -#define mutex_destroy(mp) __spl_mutex_destroy(mp) -#define mutex_tryenter(mp) __mutex_tryenter(mp) -#define mutex_enter(mp) __mutex_enter(mp) -#define mutex_exit(mp) __mutex_exit(mp) -#define mutex_owned(mp) __mutex_owned(mp) -#define mutex_owner(mp) __spl_mutex_owner(mp) -#define MUTEX_HELD(mp) mutex_owned(mp) - -#ifdef __cplusplus -} -#endif -#endif /* _SPL_MUTEX_H */ +/* + * Adaptive mutexs assume that the lock may be held by a task running + * on a different cpu. The expectation is that the task will drop the + * lock before leaving the head of the run queue. So the ideal thing + * to do is spin until we acquire the lock and avoid a context switch. + * However it is also possible the task holding the lock yields the + * processor with out dropping lock. In this case, we know it's going + * to be a while so we stop spinning and go to sleep waiting for the + * lock to be available. This should strike the optimum balance + * between spinning and sleeping waiting for a lock. + */ +#define mutex_enter(mp) \ +({ \ + kthread_t *_owner_; \ + int _rc_, _count_; \ + \ + _rc_ = 0; \ + _count_ = 0; \ + _owner_ = mutex_owner(mp); \ + \ + while (_owner_ && task_curr(_owner_) && \ + _count_ <= spl_mutex_spin_max()) { \ + if ((_rc_ = mutex_trylock(MUTEX(mp)))) \ + break; \ + \ + _count_++; \ + } \ + \ + if (!_rc_) \ + mutex_lock(MUTEX(mp)); \ + \ + spl_mutex_set_owner(mp); \ +}) + +#define mutex_exit(mp) \ +({ \ + spl_mutex_clear_owner(mp); \ + mutex_unlock(MUTEX(mp)); \ +}) + +#endif /* HAVE_MUTEX_OWNER */ + +int spl_mutex_init(void); +void spl_mutex_fini(void); + +#endif /* _SPL_MUTEX_H */ diff --git a/module/spl/spl-mutex.c b/module/spl/spl-mutex.c index f0389f5d1..0af74571d 100644 --- a/module/spl/spl-mutex.c +++ b/module/spl/spl-mutex.c @@ -1,7 +1,7 @@ /* * This file is part of the SPL: Solaris Porting Layer. * - * Copyright (c) 2008 Lawrence Livermore National Security, LLC. + * Copyright (c) 2009 Lawrence Livermore National Security, LLC. * Produced at Lawrence Livermore National Laboratory * Written by: * Brian Behlendorf , @@ -32,277 +32,46 @@ #define DEBUG_SUBSYSTEM S_MUTEX -/* Mutex implementation based on those found in Solaris. This means - * they the MUTEX_DEFAULT type is an adaptive mutex. When calling - * mutex_enter() your process will spin waiting for the lock if it's - * likely the lock will be free'd shortly. If it looks like the - * lock will be held for a longer time we schedule and sleep waiting - * for it. This determination is made by checking if the holder of - * the lock is currently running on cpu or sleeping waiting to be - * scheduled. If the holder is currently running it's likely the - * lock will be shortly dropped. +/* + * While a standard mutex implementation has been available in the kernel + * for quite some time. It was not until 2.6.29 and latter kernels that + * adaptive mutexs were embraced and integrated with the scheduler. This + * brought a significant performance improvement, but just as importantly + * it added a lock owner to the generic mutex outside CONFIG_DEBUG_MUTEXES + * builds. This is critical for correctly supporting the mutex_owner() + * Solaris primitive. When the owner is available we use a pure Linux + * mutex implementation. When the owner is not available we still use + * Linux mutexs as a base but also reserve space for an owner field right + * after the mutex structure. * - * XXX: This is basically a rough implementation to see if this - * helps our performance. If it does a more careful implementation - * should be done, perhaps in assembly. + * In the case when HAVE_MUTEX_OWNER is not defined your code may + * still me able to leverage adaptive mutexs. As long as the task_curr() + * symbol is exported this code will provide a poor mans adaptive mutex + * implementation. However, this is not required and if the symbol is + * unavailable we provide a standard mutex. */ -/* 0: Never spin when trying to aquire lock - * -1: Spin until aquired or holder yeilds without dropping lock +#ifndef HAVE_MUTEX_OWNER +#ifdef HAVE_TASK_CURR +/* + * mutex_spin_max = { 0, -1, 1-MAX_INT } + * 0: Never spin when trying to acquire lock + * -1: Spin until acquired or holder yields without dropping lock * 1-MAX_INT: Spin for N attempts before sleeping for lock */ int mutex_spin_max = 0; - -#ifdef DEBUG_MUTEX -int mutex_stats[MUTEX_STATS_SIZE] = { 0 }; -spinlock_t mutex_stats_lock; -struct list_head mutex_stats_list; -#endif - -int -__spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc) -{ - int flags = KM_SLEEP; - - ASSERT(mp); - ASSERT(name); - ASSERT(ibc == NULL); - - mp->km_name = NULL; - mp->km_name_size = strlen(name) + 1; - - switch (type) { - case MUTEX_DEFAULT: - mp->km_type = MUTEX_ADAPTIVE; - break; - case MUTEX_SPIN: - case MUTEX_ADAPTIVE: - mp->km_type = type; - break; - default: - SBUG(); - } - - /* We may be called when there is a non-zero preempt_count or - * interrupts are disabled is which case we must not sleep. - */ - if (current_thread_info()->preempt_count || irqs_disabled()) - flags = KM_NOSLEEP; - - /* Semaphore kmem_alloc'ed to keep struct size down (<64b) */ - mp->km_sem = kmem_alloc(sizeof(struct semaphore), flags); - if (mp->km_sem == NULL) - return -ENOMEM; - - mp->km_name = kmem_alloc(mp->km_name_size, flags); - if (mp->km_name == NULL) { - kmem_free(mp->km_sem, sizeof(struct semaphore)); - return -ENOMEM; - } - - sema_init(mp->km_sem, 1); - strncpy(mp->km_name, name, mp->km_name_size); - -#ifdef DEBUG_MUTEX - mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, flags); - if (mp->km_stats == NULL) { - kmem_free(mp->km_name, mp->km_name_size); - kmem_free(mp->km_sem, sizeof(struct semaphore)); - return -ENOMEM; - } - - /* XXX - This appears to be a much more contended lock than I - * would have expected. To run with this debugging enabled and - * get reasonable performance we may need to be more clever and - * do something like hash the mutex ptr on to one of several - * lists to ease this single point of contention. - */ - spin_lock(&mutex_stats_lock); - list_add_tail(&mp->km_list, &mutex_stats_list); - spin_unlock(&mutex_stats_lock); -#endif - mp->km_magic = KM_MAGIC; - mp->km_owner = NULL; - - return 0; -} -EXPORT_SYMBOL(__spl_mutex_init); - -void -__spl_mutex_destroy(kmutex_t *mp) -{ - ASSERT(mp); - ASSERT(mp->km_magic == KM_MAGIC); - -#ifdef DEBUG_MUTEX - spin_lock(&mutex_stats_lock); - list_del_init(&mp->km_list); - spin_unlock(&mutex_stats_lock); - - kmem_free(mp->km_stats, sizeof(int) * MUTEX_STATS_SIZE); -#endif - kmem_free(mp->km_name, mp->km_name_size); - kmem_free(mp->km_sem, sizeof(struct semaphore)); - - memset(mp, KM_POISON, sizeof(*mp)); -} -EXPORT_SYMBOL(__spl_mutex_destroy); - -/* Return 1 if we acquired the mutex, else zero. */ -int -__mutex_tryenter(kmutex_t *mp) -{ - int rc; - ENTRY; - - ASSERT(mp); - ASSERT(mp->km_magic == KM_MAGIC); - MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_TOTAL); - MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_TOTAL); - - rc = down_trylock(mp->km_sem); - if (rc == 0) { - ASSERT(mp->km_owner == NULL); - mp->km_owner = current; - MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_NOT_HELD); - MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_NOT_HELD); - } - - RETURN(!rc); -} -EXPORT_SYMBOL(__mutex_tryenter); - -#ifndef HAVE_TASK_CURR -#define task_curr(owner) 0 -#endif - - -static void -mutex_enter_adaptive(kmutex_t *mp) -{ - struct task_struct *owner; - int count = 0; - - /* Lock is not held so we expect to aquire the lock */ - if ((owner = mp->km_owner) == NULL) { - down(mp->km_sem); - MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_NOT_HELD); - MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_NOT_HELD); - } else { - /* The lock is held by a currently running task which - * we expect will drop the lock before leaving the - * head of the runqueue. So the ideal thing to do - * is spin until we aquire the lock and avoid a - * context switch. However it is also possible the - * task holding the lock yields the processor with - * out dropping lock. In which case, we know it's - * going to be a while so we stop spinning and go - * to sleep waiting for the lock to be available. - * This should strike the optimum balance between - * spinning and sleeping waiting for a lock. - */ - while (task_curr(owner) && (count <= mutex_spin_max)) { - if (down_trylock(mp->km_sem) == 0) { - MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN); - MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN); - GOTO(out, count); - } - count++; - } - - /* The lock is held by a sleeping task so it's going to - * cost us minimally one context switch. We might as - * well sleep and yield the processor to other tasks. - */ - down(mp->km_sem); - MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SLEEP); - MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SLEEP); - } -out: - MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_TOTAL); - MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_TOTAL); -} - -void -__mutex_enter(kmutex_t *mp) -{ - ENTRY; - ASSERT(mp); - ASSERT(mp->km_magic == KM_MAGIC); - - switch (mp->km_type) { - case MUTEX_SPIN: - while (down_trylock(mp->km_sem)); - MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN); - MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN); - break; - case MUTEX_ADAPTIVE: - mutex_enter_adaptive(mp); - break; - } - - ASSERT(mp->km_owner == NULL); - mp->km_owner = current; - - EXIT; -} -EXPORT_SYMBOL(__mutex_enter); - -void -__mutex_exit(kmutex_t *mp) -{ - ENTRY; - ASSERT(mp); - ASSERT(mp->km_magic == KM_MAGIC); - ASSERT(mp->km_owner == current); - mp->km_owner = NULL; - up(mp->km_sem); - EXIT; -} -EXPORT_SYMBOL(__mutex_exit); - -/* Return 1 if mutex is held by current process, else zero. */ -int -__mutex_owned(kmutex_t *mp) -{ - ENTRY; - ASSERT(mp); - ASSERT(mp->km_magic == KM_MAGIC); - RETURN(mp->km_owner == current); -} -EXPORT_SYMBOL(__mutex_owned); - -/* Return owner if mutex is owned, else NULL. */ -kthread_t * -__spl_mutex_owner(kmutex_t *mp) -{ - ENTRY; - ASSERT(mp); - ASSERT(mp->km_magic == KM_MAGIC); - RETURN(mp->km_owner); -} -EXPORT_SYMBOL(__spl_mutex_owner); +module_param(mutex_spin_max, int, 0644); +MODULE_PARM_DESC(mutex_spin_max, "Spin a maximum of N times to acquire lock"); int -spl_mutex_init(void) +spl_mutex_spin_max(void) { - ENTRY; -#ifdef DEBUG_MUTEX - spin_lock_init(&mutex_stats_lock); - INIT_LIST_HEAD(&mutex_stats_list); -#endif - RETURN(0); + return mutex_spin_max; } +EXPORT_SYMBOL(spl_mutex_spin_max); -void -spl_mutex_fini(void) -{ - ENTRY; -#ifdef DEBUG_MUTEX - ASSERT(list_empty(&mutex_stats_list)); -#endif - EXIT; -} +#endif /* HAVE_TASK_CURR */ +#endif /* !HAVE_MUTEX_OWNER */ -module_param(mutex_spin_max, int, 0644); -MODULE_PARM_DESC(mutex_spin_max, "Spin a maximum of N times to aquire lock"); +int spl_mutex_init(void) { return 0; } +void spl_mutex_fini(void) { } diff --git a/module/spl/spl-proc.c b/module/spl/spl-proc.c index 5dd7884f6..690f2991e 100644 --- a/module/spl/spl-proc.c +++ b/module/spl/spl-proc.c @@ -41,12 +41,8 @@ static unsigned long table_max = ~0; static struct ctl_table_header *spl_header = NULL; #endif /* CONFIG_SYSCTL */ -#if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT) +#if defined(DEBUG_KMEM) || defined(DEBUG_KSTAT) static struct proc_dir_entry *proc_spl = NULL; -#ifdef DEBUG_MUTEX -static struct proc_dir_entry *proc_spl_mutex = NULL; -static struct proc_dir_entry *proc_spl_mutex_stats = NULL; -#endif /* DEBUG_MUTEX */ #ifdef DEBUG_KMEM static struct proc_dir_entry *proc_spl_kmem = NULL; static struct proc_dir_entry *proc_spl_kmem_slab = NULL; @@ -54,7 +50,7 @@ static struct proc_dir_entry *proc_spl_kmem_slab = NULL; #ifdef DEBUG_KSTAT struct proc_dir_entry *proc_spl_kstat = NULL; #endif /* DEBUG_KSTAT */ -#endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */ +#endif /* DEBUG_KMEM || DEBUG_KSTAT */ #ifdef HAVE_CTL_UNNUMBERED @@ -105,10 +101,6 @@ struct proc_dir_entry *proc_spl_kstat = NULL; #define CTL_KMEM_ALLOC_FAILED CTL_UNNUMBERED /* Cache allocations failed */ #endif -#define CTL_MUTEX_STATS CTL_UNNUMBERED /* Global mutex statistics */ -#define CTL_MUTEX_STATS_PER CTL_UNNUMBERED /* Per mutex statistics */ -#define CTL_MUTEX_SPIN_MAX CTL_UNNUMBERED /* Max mutex spin iterations */ - #else /* HAVE_CTL_UNNUMBERED */ enum { @@ -159,10 +151,6 @@ enum { CTL_KMEM_VMEMUSED, /* Alloc'd vmem bytes */ CTL_KMEM_VMEMMAX, /* Max alloc'd by vmem bytes */ #endif - - CTL_MUTEX_STATS, /* Global mutex statistics */ - CTL_MUTEX_STATS_PER, /* Per mutex statistics */ - CTL_MUTEX_SPIN_MAX, /* Maximum mutex spin iterations */ }; #endif /* HAVE_CTL_UNNUMBERED */ @@ -589,103 +577,6 @@ proc_dofreemem(struct ctl_table *table, int write, struct file *filp, RETURN(rc); } -#ifdef DEBUG_MUTEX -static void -mutex_seq_show_headers(struct seq_file *f) -{ - seq_printf(f, "%-36s %-4s %-16s\t" - "e_tot\te_nh\te_sp\te_sl\tte_tot\tte_nh\n", - "name", "type", "owner"); -} - -static int -mutex_seq_show(struct seq_file *f, void *p) -{ - kmutex_t *mp = p; - char t = 'X'; - int i; - - ASSERT(mp->km_magic == KM_MAGIC); - - switch (mp->km_type) { - case MUTEX_DEFAULT: t = 'D'; break; - case MUTEX_SPIN: t = 'S'; break; - case MUTEX_ADAPTIVE: t = 'A'; break; - default: - SBUG(); - } - seq_printf(f, "%-36s %c ", mp->km_name, t); - if (mp->km_owner) - seq_printf(f, "%p\t", mp->km_owner); - else - seq_printf(f, "%-16s\t", ""); - - for (i = 0; i < MUTEX_STATS_SIZE; i++) - seq_printf(f, "%d%c", mp->km_stats[i], - (i + 1 == MUTEX_STATS_SIZE) ? '\n' : '\t'); - - return 0; -} - -static void * -mutex_seq_start(struct seq_file *f, loff_t *pos) -{ - struct list_head *p; - loff_t n = *pos; - ENTRY; - - spin_lock(&mutex_stats_lock); - if (!n) - mutex_seq_show_headers(f); - - p = mutex_stats_list.next; - while (n--) { - p = p->next; - if (p == &mutex_stats_list) - RETURN(NULL); - } - - RETURN(list_entry(p, kmutex_t, km_list)); -} - -static void * -mutex_seq_next(struct seq_file *f, void *p, loff_t *pos) -{ - kmutex_t *mp = p; - ENTRY; - - ++*pos; - RETURN((mp->km_list.next == &mutex_stats_list) ? - NULL : list_entry(mp->km_list.next, kmutex_t, km_list)); -} - -static void -mutex_seq_stop(struct seq_file *f, void *v) -{ - spin_unlock(&mutex_stats_lock); -} - -static struct seq_operations mutex_seq_ops = { - .show = mutex_seq_show, - .start = mutex_seq_start, - .next = mutex_seq_next, - .stop = mutex_seq_stop, -}; - -static int -proc_mutex_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &mutex_seq_ops); -} - -static struct file_operations proc_mutex_operations = { - .open = proc_mutex_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif /* DEBUG_MUTEX */ - #ifdef DEBUG_KMEM static void slab_seq_show_headers(struct seq_file *f) @@ -968,28 +859,6 @@ static struct ctl_table spl_vm_table[] = { {0}, }; -#ifdef DEBUG_MUTEX -static struct ctl_table spl_mutex_table[] = { - { - .ctl_name = CTL_MUTEX_STATS, - .procname = "stats", - .data = &mutex_stats, - .maxlen = sizeof(int) * MUTEX_STATS_SIZE, - .mode = 0444, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = CTL_MUTEX_SPIN_MAX, - .procname = "spin_max", - .data = &mutex_spin_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - {0}, -}; -#endif /* DEBUG_MUTEX */ - #ifdef DEBUG_KMEM static struct ctl_table spl_kmem_table[] = { { @@ -1088,14 +957,6 @@ static struct ctl_table spl_table[] = { .mode = 0555, .child = spl_vm_table, }, -#ifdef DEBUG_MUTEX - { - .ctl_name = CTL_SPL_MUTEX, - .procname = "mutex", - .mode = 0555, - .child = spl_mutex_table, - }, -#endif #ifdef DEBUG_KMEM { .ctl_name = CTL_SPL_KMEM, @@ -1180,24 +1041,11 @@ proc_init(void) RETURN(-EUNATCH); #endif /* CONFIG_SYSCTL */ -#if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT) +#if defined(DEBUG_KMEM) || defined(DEBUG_KSTAT) proc_spl = proc_mkdir("spl", NULL); if (proc_spl == NULL) GOTO(out, rc = -EUNATCH); -#ifdef DEBUG_MUTEX - proc_spl_mutex = proc_mkdir("mutex", proc_spl); - if (proc_spl_mutex == NULL) - GOTO(out, rc = -EUNATCH); - - proc_spl_mutex_stats = create_proc_entry("stats_per", 0444, - proc_spl_mutex); - if (proc_spl_mutex_stats == NULL) - GOTO(out, rc = -EUNATCH); - - proc_spl_mutex_stats->proc_fops = &proc_mutex_operations; -#endif /* DEBUG_MUTEX */ - #ifdef DEBUG_KMEM proc_spl_kmem = proc_mkdir("kmem", proc_spl); if (proc_spl_kmem == NULL) @@ -1223,16 +1071,12 @@ out: remove_proc_entry("slab", proc_spl_kmem); #endif remove_proc_entry("kmem", proc_spl); -#ifdef DEBUG_MUTEX - remove_proc_entry("stats_per", proc_spl_mutex); -#endif - remove_proc_entry("mutex", proc_spl); remove_proc_entry("spl", NULL); #ifdef CONFIG_SYSCTL spl_unregister_sysctl_table(spl_header); #endif /* CONFIG_SYSCTL */ } -#endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */ +#endif /* DEBUG_KMEM || DEBUG_KSTAT */ RETURN(rc); } @@ -1242,18 +1086,14 @@ proc_fini(void) { ENTRY; -#if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT) +#if defined(DEBUG_KMEM) || defined(DEBUG_KSTAT) remove_proc_entry("kstat", proc_spl); #ifdef DEBUG_KMEM remove_proc_entry("slab", proc_spl_kmem); #endif remove_proc_entry("kmem", proc_spl); -#ifdef DEBUG_MUTEX - remove_proc_entry("stats_per", proc_spl_mutex); -#endif - remove_proc_entry("mutex", proc_spl); remove_proc_entry("spl", NULL); -#endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */ +#endif /* DEBUG_KMEM || DEBUG_KSTAT */ #ifdef CONFIG_SYSCTL ASSERT(spl_header != NULL); diff --git a/module/splat/splat-mutex.c b/module/splat/splat-mutex.c index 3d8f94213..72fa32c81 100644 --- a/module/splat/splat-mutex.c +++ b/module/splat/splat-mutex.c @@ -26,296 +26,292 @@ #include "splat-internal.h" -#define SPLAT_MUTEX_NAME "mutex" -#define SPLAT_MUTEX_DESC "Kernel Mutex Tests" +#define SPLAT_MUTEX_NAME "mutex" +#define SPLAT_MUTEX_DESC "Kernel Mutex Tests" -#define SPLAT_MUTEX_TEST1_ID 0x0401 -#define SPLAT_MUTEX_TEST1_NAME "tryenter" -#define SPLAT_MUTEX_TEST1_DESC "Validate mutex_tryenter() correctness" +#define SPLAT_MUTEX_TEST1_ID 0x0401 +#define SPLAT_MUTEX_TEST1_NAME "tryenter" +#define SPLAT_MUTEX_TEST1_DESC "Validate mutex_tryenter() correctness" -#define SPLAT_MUTEX_TEST2_ID 0x0402 -#define SPLAT_MUTEX_TEST2_NAME "race" -#define SPLAT_MUTEX_TEST2_DESC "Many threads entering/exiting the mutex" +#define SPLAT_MUTEX_TEST2_ID 0x0402 +#define SPLAT_MUTEX_TEST2_NAME "race" +#define SPLAT_MUTEX_TEST2_DESC "Many threads entering/exiting the mutex" -#define SPLAT_MUTEX_TEST3_ID 0x0403 -#define SPLAT_MUTEX_TEST3_NAME "owned" -#define SPLAT_MUTEX_TEST3_DESC "Validate mutex_owned() correctness" +#define SPLAT_MUTEX_TEST3_ID 0x0403 +#define SPLAT_MUTEX_TEST3_NAME "owned" +#define SPLAT_MUTEX_TEST3_DESC "Validate mutex_owned() correctness" -#define SPLAT_MUTEX_TEST4_ID 0x0404 -#define SPLAT_MUTEX_TEST4_NAME "owner" -#define SPLAT_MUTEX_TEST4_DESC "Validate mutex_owner() correctness" +#define SPLAT_MUTEX_TEST4_ID 0x0404 +#define SPLAT_MUTEX_TEST4_NAME "owner" +#define SPLAT_MUTEX_TEST4_DESC "Validate mutex_owner() correctness" -#define SPLAT_MUTEX_TEST_MAGIC 0x115599DDUL -#define SPLAT_MUTEX_TEST_NAME "mutex_test" -#define SPLAT_MUTEX_TEST_TASKQ "mutex_taskq" -#define SPLAT_MUTEX_TEST_COUNT 128 +#define SPLAT_MUTEX_TEST_MAGIC 0x115599DDUL +#define SPLAT_MUTEX_TEST_NAME "mutex_test" +#define SPLAT_MUTEX_TEST_TASKQ "mutex_taskq" +#define SPLAT_MUTEX_TEST_COUNT 128 typedef struct mutex_priv { unsigned long mp_magic; struct file *mp_file; - kmutex_t mp_mtx; - int mp_rc; + kmutex_t mp_mtx; + int mp_rc; } mutex_priv_t; static void splat_mutex_test1_func(void *arg) { - mutex_priv_t *mp = (mutex_priv_t *)arg; - ASSERT(mp->mp_magic == SPLAT_MUTEX_TEST_MAGIC); - - if (mutex_tryenter(&mp->mp_mtx)) { - mp->mp_rc = 0; - mutex_exit(&mp->mp_mtx); - } else { - mp->mp_rc = -EBUSY; - } + mutex_priv_t *mp = (mutex_priv_t *)arg; + ASSERT(mp->mp_magic == SPLAT_MUTEX_TEST_MAGIC); + + if (mutex_tryenter(&mp->mp_mtx)) { + mp->mp_rc = 0; + mutex_exit(&mp->mp_mtx); + } else { + mp->mp_rc = -EBUSY; + } } static int splat_mutex_test1(struct file *file, void *arg) { - mutex_priv_t *mp; - taskq_t *tq; - int id, rc = 0; - - mp = (mutex_priv_t *)kmalloc(sizeof(*mp), GFP_KERNEL); - if (mp == NULL) - return -ENOMEM; - - tq = taskq_create(SPLAT_MUTEX_TEST_TASKQ, 1, maxclsyspri, - 50, INT_MAX, TASKQ_PREPOPULATE); - if (tq == NULL) { - rc = -ENOMEM; - goto out2; - } - - mp->mp_magic = SPLAT_MUTEX_TEST_MAGIC; - mp->mp_file = file; - mutex_init(&mp->mp_mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL); - mutex_enter(&mp->mp_mtx); - - /* - * Schedule a task function which will try and acquire the mutex via - * mutex_tryenter() while it's held. This should fail and the task - * function will indicate this status in the passed private data. - */ - mp->mp_rc = -EINVAL; - id = taskq_dispatch(tq, splat_mutex_test1_func, mp, TQ_SLEEP); - if (id == 0) { - mutex_exit(&mp->mp_mtx); - splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s", - "taskq_dispatch() failed\n"); - rc = -EINVAL; - goto out; - } - - taskq_wait_id(tq, id); - mutex_exit(&mp->mp_mtx); - - /* Task function successfully acquired mutex, very bad! */ - if (mp->mp_rc != -EBUSY) { - splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, - "mutex_trylock() incorrectly succeeded when " - "the mutex was held, %d/%d\n", id, mp->mp_rc); - rc = -EINVAL; - goto out; - } else { - splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s", - "mutex_trylock() correctly failed when " - "the mutex was held\n"); - } - - /* - * Schedule a task function which will try and acquire the mutex via - * mutex_tryenter() while it is not held. This should succeed and - * can be verified by checking the private data. - */ - mp->mp_rc = -EINVAL; - id = taskq_dispatch(tq, splat_mutex_test1_func, mp, TQ_SLEEP); - if (id == 0) { - splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s", - "taskq_dispatch() failed\n"); - rc = -EINVAL; - goto out; - } - - taskq_wait_id(tq, id); - - /* Task function failed to acquire mutex, very bad! */ - if (mp->mp_rc != 0) { - splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, - "mutex_trylock() incorrectly failed when " - "the mutex was not held, %d/%d\n", id, mp->mp_rc); - rc = -EINVAL; - } else { - splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s", - "mutex_trylock() correctly succeeded " - "when the mutex was not held\n"); - } + mutex_priv_t *mp; + taskq_t *tq; + int id, rc = 0; + + mp = (mutex_priv_t *)kmalloc(sizeof(*mp), GFP_KERNEL); + if (mp == NULL) + return -ENOMEM; + + tq = taskq_create(SPLAT_MUTEX_TEST_TASKQ, 1, maxclsyspri, + 50, INT_MAX, TASKQ_PREPOPULATE); + if (tq == NULL) { + rc = -ENOMEM; + goto out2; + } + + mp->mp_magic = SPLAT_MUTEX_TEST_MAGIC; + mp->mp_file = file; + mutex_init(&mp->mp_mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL); + mutex_enter(&mp->mp_mtx); + + /* + * Schedule a task function which will try and acquire the mutex via + * mutex_tryenter() while it's held. This should fail and the task + * function will indicate this status in the passed private data. + */ + mp->mp_rc = -EINVAL; + id = taskq_dispatch(tq, splat_mutex_test1_func, mp, TQ_SLEEP); + if (id == 0) { + mutex_exit(&mp->mp_mtx); + splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s", + "taskq_dispatch() failed\n"); + rc = -EINVAL; + goto out; + } + + taskq_wait_id(tq, id); + mutex_exit(&mp->mp_mtx); + + /* Task function successfully acquired mutex, very bad! */ + if (mp->mp_rc != -EBUSY) { + splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, + "mutex_trylock() incorrectly succeeded when " + "the mutex was held, %d/%d\n", id, mp->mp_rc); + rc = -EINVAL; + goto out; + } else { + splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s", + "mutex_trylock() correctly failed when " + "the mutex was held\n"); + } + + /* + * Schedule a task function which will try and acquire the mutex via + * mutex_tryenter() while it is not held. This should succeed and + * can be verified by checking the private data. + */ + mp->mp_rc = -EINVAL; + id = taskq_dispatch(tq, splat_mutex_test1_func, mp, TQ_SLEEP); + if (id == 0) { + splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s", + "taskq_dispatch() failed\n"); + rc = -EINVAL; + goto out; + } + + taskq_wait_id(tq, id); + + /* Task function failed to acquire mutex, very bad! */ + if (mp->mp_rc != 0) { + splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, + "mutex_trylock() incorrectly failed when " + "the mutex was not held, %d/%d\n", id, mp->mp_rc); + rc = -EINVAL; + } else { + splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s", + "mutex_trylock() correctly succeeded " + "when the mutex was not held\n"); + } out: - taskq_destroy(tq); - mutex_destroy(&(mp->mp_mtx)); + taskq_destroy(tq); + mutex_destroy(&(mp->mp_mtx)); out2: - kfree(mp); - return rc; + kfree(mp); + return rc; } static void splat_mutex_test2_func(void *arg) { - mutex_priv_t *mp = (mutex_priv_t *)arg; - int rc; - ASSERT(mp->mp_magic == SPLAT_MUTEX_TEST_MAGIC); - - /* Read the value before sleeping and write it after we wake up to - * maximize the chance of a race if mutexs are not working properly */ - mutex_enter(&mp->mp_mtx); - rc = mp->mp_rc; - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 100); /* 1/100 of a second */ - VERIFY(mp->mp_rc == rc); - mp->mp_rc = rc + 1; - mutex_exit(&mp->mp_mtx); + mutex_priv_t *mp = (mutex_priv_t *)arg; + int rc; + ASSERT(mp->mp_magic == SPLAT_MUTEX_TEST_MAGIC); + + /* Read the value before sleeping and write it after we wake up to + * maximize the chance of a race if mutexs are not working properly */ + mutex_enter(&mp->mp_mtx); + rc = mp->mp_rc; + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 100); /* 1/100 of a second */ + VERIFY(mp->mp_rc == rc); + mp->mp_rc = rc + 1; + mutex_exit(&mp->mp_mtx); } static int splat_mutex_test2(struct file *file, void *arg) { - mutex_priv_t *mp; - taskq_t *tq; - int i, rc = 0; - - mp = (mutex_priv_t *)kmalloc(sizeof(*mp), GFP_KERNEL); - if (mp == NULL) - return -ENOMEM; - - /* Create several threads allowing tasks to race with each other */ - tq = taskq_create(SPLAT_MUTEX_TEST_TASKQ, num_online_cpus(), - maxclsyspri, 50, INT_MAX, TASKQ_PREPOPULATE); - if (tq == NULL) { - rc = -ENOMEM; - goto out; - } - - mp->mp_magic = SPLAT_MUTEX_TEST_MAGIC; - mp->mp_file = file; - mutex_init(&(mp->mp_mtx), SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL); - mp->mp_rc = 0; - - /* - * Schedule N work items to the work queue each of which enters the - * mutex, sleeps briefly, then exits the mutex. On a multiprocessor - * box these work items will be handled by all available CPUs. The - * task function checks to ensure the tracked shared variable is - * always only incremented by one. Additionally, the mutex itself - * is instrumented such that if any two processors are in the - * critical region at the same time the system will panic. If the - * mutex is implemented right this will never happy, that's a pass. - */ - for (i = 0; i < SPLAT_MUTEX_TEST_COUNT; i++) { - if (!taskq_dispatch(tq, splat_mutex_test2_func, mp, TQ_SLEEP)) { - splat_vprint(file, SPLAT_MUTEX_TEST2_NAME, - "Failed to queue task %d\n", i); - rc = -EINVAL; - } - } - - taskq_wait(tq); - - if (mp->mp_rc == SPLAT_MUTEX_TEST_COUNT) { - splat_vprint(file, SPLAT_MUTEX_TEST2_NAME, "%d racing threads " - "correctly entered/exited the mutex %d times\n", - num_online_cpus(), mp->mp_rc); - } else { - splat_vprint(file, SPLAT_MUTEX_TEST2_NAME, "%d racing threads " - "only processed %d/%d mutex work items\n", - num_online_cpus(),mp->mp_rc,SPLAT_MUTEX_TEST_COUNT); - rc = -EINVAL; - } - - taskq_destroy(tq); - mutex_destroy(&(mp->mp_mtx)); + mutex_priv_t *mp; + taskq_t *tq; + int i, rc = 0; + + mp = (mutex_priv_t *)kmalloc(sizeof(*mp), GFP_KERNEL); + if (mp == NULL) + return -ENOMEM; + + /* Create several threads allowing tasks to race with each other */ + tq = taskq_create(SPLAT_MUTEX_TEST_TASKQ, num_online_cpus(), + maxclsyspri, 50, INT_MAX, TASKQ_PREPOPULATE); + if (tq == NULL) { + rc = -ENOMEM; + goto out; + } + + mp->mp_magic = SPLAT_MUTEX_TEST_MAGIC; + mp->mp_file = file; + mutex_init(&(mp->mp_mtx), SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL); + mp->mp_rc = 0; + + /* + * Schedule N work items to the work queue each of which enters the + * mutex, sleeps briefly, then exits the mutex. On a multiprocessor + * box these work items will be handled by all available CPUs. The + * task function checks to ensure the tracked shared variable is + * always only incremented by one. Additionally, the mutex itself + * is instrumented such that if any two processors are in the + * critical region at the same time the system will panic. If the + * mutex is implemented right this will never happy, that's a pass. + */ + for (i = 0; i < SPLAT_MUTEX_TEST_COUNT; i++) { + if (!taskq_dispatch(tq, splat_mutex_test2_func, mp, TQ_SLEEP)) { + splat_vprint(file, SPLAT_MUTEX_TEST2_NAME, + "Failed to queue task %d\n", i); + rc = -EINVAL; + } + } + + taskq_wait(tq); + + if (mp->mp_rc == SPLAT_MUTEX_TEST_COUNT) { + splat_vprint(file, SPLAT_MUTEX_TEST2_NAME, "%d racing threads " + "correctly entered/exited the mutex %d times\n", + num_online_cpus(), mp->mp_rc); + } else { + splat_vprint(file, SPLAT_MUTEX_TEST2_NAME, "%d racing threads " + "only processed %d/%d mutex work items\n", + num_online_cpus(),mp->mp_rc,SPLAT_MUTEX_TEST_COUNT); + rc = -EINVAL; + } + + taskq_destroy(tq); + mutex_destroy(&(mp->mp_mtx)); out: - kfree(mp); - return rc; + kfree(mp); + return rc; } static int splat_mutex_test3(struct file *file, void *arg) { kmutex_t mtx; - int rc = 0; + int rc = 0; - mutex_init(&mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL); + mutex_init(&mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL); + mutex_enter(&mtx); - mutex_enter(&mtx); + /* Mutex should be owned by current */ + if (!mutex_owned(&mtx)) { + splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Unowned mutex " + "should be owned by pid %d\n", current->pid); + rc = -EINVAL; + goto out; + } - /* Mutex should be owned by current */ - if (!mutex_owned(&mtx)) { - splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex should " - "be owned by pid %d but is owned by pid %d\n", - current->pid, mtx.km_owner ? mtx.km_owner->pid : -1); - rc = -EINVAL; - goto out; - } + mutex_exit(&mtx); - mutex_exit(&mtx); - - /* Mutex should not be owned by any task */ - if (mutex_owned(&mtx)) { - splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex should " - "not be owned but is owned by pid %d\n", - mtx.km_owner ? mtx.km_owner->pid : -1); - rc = -EINVAL; - goto out; - } + /* Mutex should not be owned by any task */ + if (mutex_owned(&mtx)) { + splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex owned by " + "pid %d should be unowned\b", current->pid); + rc = -EINVAL; + goto out; + } splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "%s", - "Correct mutex_owned() behavior\n"); + "Correct mutex_owned() behavior\n"); out: - mutex_destroy(&mtx); + mutex_destroy(&mtx); - return rc; + return rc; } static int splat_mutex_test4(struct file *file, void *arg) { kmutex_t mtx; - kthread_t *owner; - int rc = 0; - - mutex_init(&mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL); - - mutex_enter(&mtx); - - /* Mutex should be owned by current */ - owner = mutex_owner(&mtx); - if (current != owner) { - splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex should " - "be owned by pid %d but is owned by pid %d\n", - current->pid, owner ? owner->pid : -1); - rc = -EINVAL; - goto out; - } - - mutex_exit(&mtx); - - /* Mutex should not be owned by any task */ - owner = mutex_owner(&mtx); - if (owner) { - splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex should not " - "be owned but is owned by pid %d\n", owner->pid); - rc = -EINVAL; - goto out; - } + kthread_t *owner; + int rc = 0; + + mutex_init(&mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL); + mutex_enter(&mtx); + + /* Mutex should be owned by current */ + owner = mutex_owner(&mtx); + if (current != owner) { + splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex should " + "be owned by pid %d but is owned by pid %d\n", + current->pid, owner ? owner->pid : -1); + rc = -EINVAL; + goto out; + } + + mutex_exit(&mtx); + + /* Mutex should not be owned by any task */ + owner = mutex_owner(&mtx); + if (owner) { + splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex should not " + "be owned but is owned by pid %d\n", owner->pid); + rc = -EINVAL; + goto out; + } splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "%s", - "Correct mutex_owner() behavior\n"); + "Correct mutex_owner() behavior\n"); out: - mutex_destroy(&mtx); + mutex_destroy(&mtx); - return rc; + return rc; } splat_subsystem_t * diff --git a/spl_config.h.in b/spl_config.h.in index eedff8e81..bd42119cb 100644 --- a/spl_config.h.in +++ b/spl_config.h.in @@ -9,9 +9,6 @@ /* Define to 1 to enable kstat debugging */ #undef DEBUG_KSTAT -/* Define to 1 to enable mutex debugging */ -#undef DEBUG_MUTEX - /* register_sysctl_table() wants 2 args */ #undef HAVE_2ARGS_REGISTER_SYSCTL @@ -102,6 +99,9 @@ /* mutex_lock_nested() is available */ #undef HAVE_MUTEX_LOCK_NESTED +/* struct mutex has owner */ +#undef HAVE_MUTEX_OWNER + /* next_online_pgdat() is available */ #undef HAVE_NEXT_ONLINE_PGDAT