]> granicus.if.org Git - spl/commitdiff
Commit adaptive mutexes. This seems to have introduced some new
authorbehlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
Mon, 5 May 2008 20:18:49 +0000 (20:18 +0000)
committerbehlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
Mon, 5 May 2008 20:18:49 +0000 (20:18 +0000)
crashes but it's not clear to me yet if these are a problem with
the mutex implementation or ZFSs usage of it.

Minor taskq fixes to add new tasks to the end of the pending list.

Minor enhansements to the debug infrastructure.

git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@94 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c

ChangeLog
include/sys/debug.h
include/sys/kmem.h
include/sys/mutex.h
modules/spl/Makefile.in
modules/spl/spl-generic.c
modules/spl/spl-mutex.c [new file with mode: 0644]
modules/spl/spl-proc.c
modules/spl/spl-taskq.c

index 0106bbd5e13acdc671566fea381c0f2059785ef7..a65d6b15d567144182c376f124a649006d6d4d32 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2008-04-26 Brian Behlendorf <behlendorf1@llnl.gov>
+
+       * include/sys/mutex.h : Implemented a close approximation
+       of adaptive mutexes.  These changes however required me to 
+       export a new symbol from the kernel proper 'task_curr()'
+       which means we are now dependant on a patched kernel.
+
 2008-04-24 Brian Behlendorf <behlendorf1@llnl.gov>
 
        * : Tag spl-0.2.1 
index 64aa1808f64ff226785a4eb91c74c5b5898fe50c..39585ba196cc399d854484add1c0ab7d47a7004a 100644 (file)
@@ -310,16 +310,19 @@ do {                                                                    \
         return RETURN__ret;                                             \
 } while (0)
 
-#define ENTRY                                                           \
+#define __ENTRY(subsys)                                                 \
 do {                                                                    \
-        CDEBUG(D_TRACE, "Process entered\n");                           \
+        __CDEBUG(NULL, subsys, D_TRACE, "Process entered\n");           \
 } while (0)
 
-#define EXIT                                                            \
+#define __EXIT(subsys)                                                  \
 do {                                                                    \
-        CDEBUG(D_TRACE, "Process leaving\n");                           \
+        __CDEBUG(NULL, subsys, D_TRACE, "Process leaving\n");           \
 } while(0)
 
+#define ENTRY                          __ENTRY(DEBUG_SUBSYSTEM)
+#define EXIT                            __EXIT(DEBUG_SUBSYSTEM)
+
 extern int spl_debug_vmsg(spl_debug_limit_state_t *cdls, int subsys, int mask,
                           const char *file, const char *fn, const int line,
                           const char *format1, va_list args, const char *format2, ...);
index 73965c58b690fa9d4f8fdebb1ddb875e6b0db687..cc56ddd3684c5221a9ef4577ec37c63838388260 100644 (file)
@@ -54,9 +54,9 @@ extern int kmem_warning_flag;
                 if (unlikely(atomic64_read(&kmem_alloc_used)>kmem_alloc_max)) \
                         kmem_alloc_max = atomic64_read(&kmem_alloc_used);     \
                                                                              \
-                __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_alloc(%d, 0x%x)'d "      \
+                __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_alloc(%d, 0x%x) = %p "   \
                               "(%ld/%ld)\n", (int)(size), (int)(flags),      \
-                              atomic64_read(&kmem_alloc_used),               \
+                              _ptr_, atomic64_read(&kmem_alloc_used),        \
                               kmem_alloc_max);                               \
         }                                                                     \
                                                                               \
@@ -70,8 +70,8 @@ extern int kmem_warning_flag;
 ({                                                                            \
         ASSERT((ptr) || (size > 0));                                          \
         atomic64_sub((size), &kmem_alloc_used);                               \
-        __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_free(%d)'d (%ld/%ld)\n",         \
-                      (int)(size), atomic64_read(&kmem_alloc_used),          \
+        __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_free(%p, %d) (%ld/%ld)\n",       \
+                      (ptr), (int)(size), atomic64_read(&kmem_alloc_used),   \
                       kmem_alloc_max);                                       \
         memset(ptr, 0x5a, (size)); /* Poison */                               \
         kfree(ptr);                                                           \
@@ -99,9 +99,9 @@ extern int kmem_warning_flag;
                 if (unlikely(atomic64_read(&vmem_alloc_used)>vmem_alloc_max)) \
                         vmem_alloc_max = atomic64_read(&vmem_alloc_used);     \
                                                                               \
-                __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_alloc(%d, 0x%x)'d "      \
+                __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_alloc(%d, 0x%x) = %p "   \
                               "(%ld/%ld)\n", (int)(size), (int)(flags),      \
-                              atomic64_read(&vmem_alloc_used),               \
+                              _ptr_, atomic64_read(&vmem_alloc_used),        \
                               vmem_alloc_max);                               \
         }                                                                     \
                                                                               \
@@ -116,8 +116,8 @@ extern int kmem_warning_flag;
 ({                                                                            \
         ASSERT((ptr) || (size > 0));                                          \
         atomic64_sub((size), &vmem_alloc_used);                               \
-        __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_free(%d)'d (%ld/%ld)\n",         \
-                      (int)(size), atomic64_read(&vmem_alloc_used),          \
+        __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_free(%p, %d) (%ld/%ld)\n",       \
+                      (ptr), (int)(size), atomic64_read(&vmem_alloc_used),   \
                       vmem_alloc_max);                                       \
         memset(ptr, 0x5a, (size)); /* Poison */                               \
         vfree(ptr);                                                           \
index 045842d728e85a6b462f17f6b93671bd738fcd75..fd787bb9d57bdaecdd5c886b6f2f25e8b29f9d70 100644 (file)
@@ -8,175 +8,86 @@ extern "C" {
 #include <linux/module.h>
 #include <linux/hardirq.h>
 #include <sys/types.h>
+#include <sys/kmem.h>
 
-/* See the "Big Theory Statement" in solaris mutex.c.
- *
- * Spin mutexes apparently aren't needed by zfs so we assert
- * if ibc is non-zero.
- *
- * Our impementation of adaptive mutexes aren't really adaptive.
- * They go to sleep every time.
- */
+//#define DEBUG_MUTEX
+#undef DEBUG_MUTEX
 
 #define MUTEX_DEFAULT          0
-#define MUTEX_HELD(x)           (mutex_owned(x))
+#define MUTEX_SPIN             1
+#define MUTEX_ADAPTIVE         2
+
+#define MUTEX_ENTER_TOTAL      0
+#define MUTEX_ENTER_NOT_HELD   1
+#define MUTEX_ENTER_SPIN       2
+#define MUTEX_ENTER_SLEEP      3
+#define MUTEX_TRYENTER_TOTAL   4
+#define MUTEX_TRYENTER_NOT_HELD        5
+#define MUTEX_STATS_SIZE       6
 
 #define KM_MAGIC               0x42424242
 #define KM_POISON              0x84
 
 typedef struct {
-       int km_magic;
+       int32_t km_magic;
+       int16_t km_type;
+       int16_t km_name_size;
        char *km_name;
        struct task_struct *km_owner;
-       struct semaphore km_sem;
-       spinlock_t km_lock;
+       struct semaphore *km_sem;
+#ifdef DEBUG_MUTEX
+       int *km_stats;
+       struct list_head km_list;
+#endif
 } kmutex_t;
 
-#undef mutex_init
-static __inline__ void
-mutex_init(kmutex_t *mp, char *name, int type, void *ibc)
-{
-       ENTRY;
-       ASSERT(mp);
-       ASSERT(ibc == NULL);            /* XXX - Spin mutexes not needed */
-       ASSERT(type == MUTEX_DEFAULT);  /* XXX - Only default type supported */
-
-       mp->km_magic = KM_MAGIC;
-       spin_lock_init(&mp->km_lock);
-       sema_init(&mp->km_sem, 1);
-       mp->km_owner = NULL;
-       mp->km_name = NULL;
-
-       if (name) {
-               mp->km_name = kmalloc(strlen(name) + 1, GFP_KERNEL);
-               if (mp->km_name)
-                       strcpy(mp->km_name, name);
-       }
-       EXIT;
-}
-
-#undef mutex_destroy
-static __inline__ void
-mutex_destroy(kmutex_t *mp)
-{
-       ENTRY;
-       ASSERT(mp);
-       ASSERT(mp->km_magic == KM_MAGIC);
-       spin_lock(&mp->km_lock);
-
-       if (mp->km_name)
-               kfree(mp->km_name);
-
-       memset(mp, KM_POISON, sizeof(*mp));
-       spin_unlock(&mp->km_lock);
-       EXIT;
-}
+extern int mutex_spin_max;
 
-static __inline__ void
-mutex_enter(kmutex_t *mp)
-{
-       ENTRY;
-       ASSERT(mp);
-       ASSERT(mp->km_magic == KM_MAGIC);
-       spin_lock(&mp->km_lock);
-
-       if (unlikely(in_atomic() && !current->exit_state)) {
-               spin_unlock(&mp->km_lock);
-               __CDEBUG_LIMIT(S_MUTEX, D_ERROR,
-                              "May schedule while atomic: %s/0x%08x/%d\n",
-                              current->comm, preempt_count(), current->pid);
-               SBUG();
-       }
-
-       spin_unlock(&mp->km_lock);
-
-       down(&mp->km_sem);
-
-       spin_lock(&mp->km_lock);
-       ASSERT(mp->km_owner == NULL);
-       mp->km_owner = current;
-       spin_unlock(&mp->km_lock);
-       EXIT;
-}
-
-/* Return 1 if we acquired the mutex, else zero.  */
-static __inline__ int
-mutex_tryenter(kmutex_t *mp)
-{
-       int rc;
-       ENTRY;
-
-       ASSERT(mp);
-       ASSERT(mp->km_magic == KM_MAGIC);
-       spin_lock(&mp->km_lock);
-
-       if (unlikely(in_atomic() && !current->exit_state)) {
-               spin_unlock(&mp->km_lock);
-               __CDEBUG_LIMIT(S_MUTEX, D_ERROR,
-                              "May schedule while atomic: %s/0x%08x/%d\n",
-                              current->comm, preempt_count(), current->pid);
-               SBUG();
-       }
-
-       spin_unlock(&mp->km_lock);
-       rc = down_trylock(&mp->km_sem); /* returns 0 if acquired */
-       if (rc == 0) {
-               spin_lock(&mp->km_lock);
-               ASSERT(mp->km_owner == NULL);
-               mp->km_owner = current;
-               spin_unlock(&mp->km_lock);
-               RETURN(1);
-       }
-
-       RETURN(0);
-}
-
-static __inline__ void
-mutex_exit(kmutex_t *mp)
-{
-       ENTRY;
-       ASSERT(mp);
-       ASSERT(mp->km_magic == KM_MAGIC);
-       spin_lock(&mp->km_lock);
-
-       ASSERT(mp->km_owner == current);
-       mp->km_owner = NULL;
-       spin_unlock(&mp->km_lock);
-       up(&mp->km_sem);
-       EXIT;
-}
-
-/* Return 1 if mutex is held by current process, else zero.  */
-static __inline__ int
-mutex_owned(kmutex_t *mp)
-{
-       int rc;
-       ENTRY;
+#ifdef DEBUG_MUTEX
+extern int mutex_stats[MUTEX_STATS_SIZE];
+extern struct mutex mutex_stats_lock;
+extern struct list_head mutex_stats_list;
+#define MUTEX_STAT_INC(stats, stat)    ((stats)[stat]++)
+#else
+#define MUTEX_STAT_INC(stats, stat)
+#endif
 
-       ASSERT(mp);
-       ASSERT(mp->km_magic == KM_MAGIC);
-       spin_lock(&mp->km_lock);
-       rc = (mp->km_owner == current);
-       spin_unlock(&mp->km_lock);
+int spl_mutex_init(void);
+void spl_mutex_fini(void);
 
-       RETURN(rc);
-}
+extern void __spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc);
+extern void __spl_mutex_destroy(kmutex_t *mp);
+extern int __mutex_tryenter(kmutex_t *mp);
+extern void __mutex_enter(kmutex_t *mp);
+extern void __mutex_exit(kmutex_t *mp);
+extern int __mutex_owned(kmutex_t *mp);
+extern kthread_t *__spl_mutex_owner(kmutex_t *mp);
 
-/* Return owner if mutex is owned, else NULL.  */
-static __inline__ kthread_t *
-mutex_owner(kmutex_t *mp)
-{
-       kthread_t *thr;
-       ENTRY;
-
-       ASSERT(mp);
-       ASSERT(mp->km_magic == KM_MAGIC);
-       spin_lock(&mp->km_lock);
-       thr = mp->km_owner;
-       spin_unlock(&mp->km_lock);
+#undef mutex_init
+#undef mutex_destroy
 
-       RETURN(thr);
-}
+#define mutex_init(mp, name, type, ibc)                                        \
+({                                                                     \
+        __ENTRY(S_MUTEX);                                               \
+       if ((name) == NULL)                                             \
+               __spl_mutex_init(mp, #mp, type, ibc);                   \
+       else                                                            \
+               __spl_mutex_init(mp, name, type, ibc);                  \
+        __EXIT(S_MUTEX);                                                \
+})
+#define mutex_destroy(mp)                                              \
+({                                                                     \
+        __ENTRY(S_MUTEX);                                               \
+       __spl_mutex_destroy(mp);                                        \
+        __EXIT(S_MUTEX);                                                \
+})
+
+#define mutex_tryenter(mp)     __mutex_tryenter(mp)
+#define mutex_enter(mp)                __mutex_enter(mp)
+#define mutex_exit(mp)         __mutex_exit(mp)
+#define mutex_owned(mp)                __mutex_owned(mp)
+#define mutex_owner(mp)                __spl_mutex_owner(mp)
+#define MUTEX_HELD(mp)         mutex_owned(mp)
 
 #ifdef __cplusplus
 }
index ff283dfd6f3f31402b618116d13c17ba8d5ef367..bd2a5f9f2d75cf48e4a5ee5fab1e6ffd4c64b361 100644 (file)
@@ -22,6 +22,7 @@ spl-objs += spl-kobj.o
 spl-objs += spl-module.o
 spl-objs += spl-generic.o
 spl-objs += spl-atomic.o
+spl-objs += spl-mutex.o
 
 splmodule := spl.ko
 splmoduledir := @kmoduledir@/kernel/lib/
index 1aadb990e633b807aa3a5219e1fec7c2b8d9d2d7..99497dd512418049aedab6a32effb9cea019db04 100644 (file)
@@ -2,6 +2,7 @@
 #include <sys/vmsystm.h>
 #include <sys/vnode.h>
 #include <sys/kmem.h>
+#include <sys/mutex.h>
 #include <sys/debug.h>
 #include <sys/proc.h>
 #include <linux/kmod.h>
@@ -99,21 +100,26 @@ static int __init spl_init(void)
        if ((rc = kmem_init()))
                GOTO(out , rc);
 
+       if ((rc = spl_mutex_init()))
+               GOTO(out2 , rc);
+
        if ((rc = vn_init()))
-               GOTO(out2, rc);
+               GOTO(out3, rc);
 
        if ((rc = proc_init()))
-               GOTO(out3, rc);
+               GOTO(out4, rc);
 
        if ((rc = set_hostid()))
-               GOTO(out4, rc = -EADDRNOTAVAIL);
+               GOTO(out5, rc = -EADDRNOTAVAIL);
 
        printk("SPL: Loaded Solaris Porting Layer v%s\n", VERSION);
        RETURN(rc);
-out4:
+out5:
        proc_fini();
-out3:
+out4:
        vn_fini();
+out3:
+       spl_mutex_fini();
 out2:
        kmem_fini();
 out:
diff --git a/modules/spl/spl-mutex.c b/modules/spl/spl-mutex.c
new file mode 100644 (file)
index 0000000..06a8f31
--- /dev/null
@@ -0,0 +1,256 @@
+#include <sys/mutex.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_MUTEX
+
+/* Mutex implementation based on those found in Solaris.  This means
+ * they the MUTEX_DEFAULT type is an adaptive mutex.  When calling
+ * mutex_enter() your process will spin waiting for the lock if it's
+ * likely the lock will be free'd shortly.  If it looks like the
+ * lock will be held for a longer time we schedule and sleep waiting
+ * for it.  This determination is made by checking if the holder of
+ * the lock is currently running on cpu or sleeping waiting to be
+ * scheduled.  If the holder is currently running it's likely the
+ * lock will be shortly dropped.
+ *
+ * XXX: This is basically a rough implementation to see if this
+ * helps our performance.  If it does a more careful implementation
+ * should be done, perhaps in assembly.
+ */
+
+/*  0:         Never spin when trying to aquire lock
+ * -1:         Spin until aquired or holder yeilds without dropping lock
+ *  1-MAX_INT: Spin for N attempts before sleeping for lock
+ */
+int mutex_spin_max = 100;
+
+#ifdef DEBUG_MUTEX
+int mutex_stats[MUTEX_STATS_SIZE] = { 0 };
+DEFINE_MUTEX(mutex_stats_lock);
+LIST_HEAD(mutex_stats_list);
+#endif
+
+void
+__spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc)
+{
+       ASSERT(mp);
+       ASSERT(name);
+       ASSERT(ibc == NULL);
+       ASSERT(mp->km_magic != KM_MAGIC); /* Never double init */
+
+       mp->km_magic = KM_MAGIC;
+       mp->km_owner = NULL;
+       mp->km_name = NULL;
+       mp->km_name_size = strlen(name) + 1;
+
+       switch (type) {
+               case MUTEX_DEFAULT:
+                       mp->km_type = MUTEX_ADAPTIVE;
+                       break;
+               case MUTEX_SPIN:
+               case MUTEX_ADAPTIVE:
+                       mp->km_type = type;
+                       break;
+               default:
+                       SBUG();
+       }
+
+       /* Semaphore kmem_alloc'ed to keep struct size down (<64b) */
+       mp->km_sem = kmem_alloc(sizeof(struct semaphore), KM_SLEEP);
+       if (mp->km_sem == NULL)
+               return;
+
+       mp->km_name = kmem_alloc(mp->km_name_size, KM_SLEEP);
+       if (mp->km_name == NULL) {
+               kmem_free(mp->km_sem, sizeof(struct semaphore));
+               return;
+       }
+
+       sema_init(mp->km_sem, 1);
+       strcpy(mp->km_name, name);
+
+#ifdef DEBUG_MUTEX
+       mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, KM_SLEEP);
+        if (mp->km_stats == NULL) {
+               kmem_free(mp->km_name, mp->km_name_size);
+               kmem_free(mp->km_sem, sizeof(struct semaphore));
+               return;
+       }
+
+       mutex_lock(&mutex_stats_lock);
+       list_add_tail(&mp->km_list, &mutex_stats_list);
+       mutex_unlock(&mutex_stats_lock);
+#endif
+}
+EXPORT_SYMBOL(__spl_mutex_init);
+
+void
+__spl_mutex_destroy(kmutex_t *mp)
+{
+       ASSERT(mp);
+       ASSERT(mp->km_magic == KM_MAGIC);
+
+#ifdef DEBUG_MUTEX
+       mutex_lock(&mutex_stats_lock);
+       list_del_init(&mp->km_list);
+       mutex_unlock(&mutex_stats_lock);
+
+       kmem_free(mp->km_stats, sizeof(int) * MUTEX_STATS_SIZE);
+#endif
+       kmem_free(mp->km_name, mp->km_name_size);
+       kmem_free(mp->km_sem, sizeof(struct semaphore));
+
+       memset(mp, KM_POISON, sizeof(*mp));
+}
+EXPORT_SYMBOL(__spl_mutex_destroy);
+
+/* Return 1 if we acquired the mutex, else zero.  */
+int
+__mutex_tryenter(kmutex_t *mp)
+{
+       int rc;
+       ENTRY;
+
+       ASSERT(mp);
+       ASSERT(mp->km_magic == KM_MAGIC);
+       MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_TOTAL);
+       MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_TOTAL);
+
+       rc = down_trylock(mp->km_sem);
+       if (rc == 0) {
+               ASSERT(mp->km_owner == NULL);
+               mp->km_owner = current;
+               MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_NOT_HELD);
+               MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_NOT_HELD);
+       }
+
+       RETURN(!rc);
+}
+EXPORT_SYMBOL(__mutex_tryenter);
+
+static void
+mutex_enter_adaptive(kmutex_t *mp)
+{
+       struct task_struct *owner;
+       int count = 0;
+
+       /* Lock is not held so we expect to aquire the lock */
+       if ((owner = mp->km_owner) == NULL) {
+               down(mp->km_sem);
+               MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_NOT_HELD);
+               MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_NOT_HELD);
+       } else {
+               /* The lock is held by a currently running task which
+                * we expect will drop the lock before leaving the
+                * head of the runqueue.  So the ideal thing to do
+                * is spin until we aquire the lock and avoid a
+                * context switch.  However it is also possible the
+                * task holding the lock yields the processor with
+                * out dropping lock.  In which case, we know it's
+                * going to be a while so we stop spinning and go
+                * to sleep waiting for the lock to be available.
+                * This should strike the optimum balance between
+                * spinning and sleeping waiting for a lock.
+                */
+               while (task_curr(owner) && (count <= mutex_spin_max)) {
+                       if (down_trylock(mp->km_sem) == 0) {
+                               MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
+                               MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
+                               GOTO(out, count);
+                       }
+                       count++;
+               }
+
+               /* The lock is held by a sleeping task so it's going to
+                * cost us minimally one context switch.  We might as
+                * well sleep and yield the processor to other tasks.
+                */
+               down(mp->km_sem);
+               MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SLEEP);
+               MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SLEEP);
+       }
+out:
+       MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_TOTAL);
+       MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_TOTAL);
+}
+
+void
+__mutex_enter(kmutex_t *mp)
+{
+       ENTRY;
+       ASSERT(mp);
+       ASSERT(mp->km_magic == KM_MAGIC);
+
+       switch (mp->km_type) {
+               case MUTEX_SPIN:
+                       while (down_trylock(mp->km_sem));
+                       MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
+                       MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
+                       break;
+               case MUTEX_ADAPTIVE:
+                       mutex_enter_adaptive(mp);
+                       break;
+       }
+
+       ASSERT(mp->km_owner == NULL);
+       mp->km_owner = current;
+
+       EXIT;
+}
+EXPORT_SYMBOL(__mutex_enter);
+
+void
+__mutex_exit(kmutex_t *mp)
+{
+       ENTRY;
+       ASSERT(mp);
+       ASSERT(mp->km_magic == KM_MAGIC);
+       ASSERT(mp->km_owner == current);
+       mp->km_owner = NULL;
+       up(mp->km_sem);
+       EXIT;
+}
+EXPORT_SYMBOL(__mutex_exit);
+
+/* Return 1 if mutex is held by current process, else zero.  */
+int
+__mutex_owned(kmutex_t *mp)
+{
+       ENTRY;
+       ASSERT(mp);
+       ASSERT(mp->km_magic == KM_MAGIC);
+       RETURN(mp->km_owner == current);
+}
+EXPORT_SYMBOL(__mutex_owned);
+
+/* Return owner if mutex is owned, else NULL.  */
+kthread_t *
+__spl_mutex_owner(kmutex_t *mp)
+{
+       ENTRY;
+       ASSERT(mp);
+       ASSERT(mp->km_magic == KM_MAGIC);
+       RETURN(mp->km_owner);
+}
+EXPORT_SYMBOL(__spl_mutex_owner);
+
+int
+spl_mutex_init(void)
+{
+       ENTRY;
+       RETURN(0);
+}
+
+void
+spl_mutex_fini(void)
+{
+        ENTRY;
+#ifdef DEBUG_MUTEX
+       ASSERT(list_empty(&mutex_stats_list));
+#endif
+        EXIT;
+}
+
index 94dd937a16e5c40dd281d34dd9305d0f6a1b413e..64423c186856dfc3185f131f953c8aae1714f6aa 100644 (file)
@@ -3,8 +3,10 @@
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
 #include <linux/sysctl.h>
+#include <linux/seq_file.h>
 #include <sys/sysmacros.h>
 #include <sys/kmem.h>
+#include <sys/mutex.h>
 #include <sys/debug.h>
 #include "config.h"
 
@@ -18,10 +20,17 @@ static struct ctl_table_header *spl_header = NULL;
 static unsigned long table_min = 0;
 static unsigned long table_max = ~0;
 
-#define CTL_SPL 0x87
+#define CTL_SPL                0x87
+#define CTL_SPL_DEBUG  0x88
+#define CTL_SPL_MUTEX  0x89
+#define CTL_SPL_KMEM   0x90
+
 enum {
        CTL_VERSION = 1,          /* Version */
-        CTL_DEBUG_SUBSYS,         /* Debug subsystem */
+       CTL_HOSTID,               /* Host id reported by /usr/bin/hostid */
+       CTL_HW_SERIAL,            /* Hardware serial number from hostid */
+
+       CTL_DEBUG_SUBSYS,         /* Debug subsystem */
         CTL_DEBUG_MASK,           /* Debug mask */
         CTL_DEBUG_PRINTK,         /* Force all messages to console */
         CTL_DEBUG_MB,             /* Debug buffer size */
@@ -31,19 +40,23 @@ enum {
         CTL_DEBUG_PATH,           /* Dump log location */
         CTL_DEBUG_DUMP,           /* Dump debug buffer to file */
         CTL_DEBUG_FORCE_BUG,      /* Hook to force a BUG */
-        CTL_CONSOLE_RATELIMIT,    /* Ratelimit console messages */
+        CTL_DEBUG_STACK_SIZE,     /* Max observed stack size */
+
+       CTL_CONSOLE_RATELIMIT,    /* Ratelimit console messages */
         CTL_CONSOLE_MAX_DELAY_CS, /* Max delay at which we skip messages */
         CTL_CONSOLE_MIN_DELAY_CS, /* Init delay at which we skip messages */
         CTL_CONSOLE_BACKOFF,      /* Delay increase factor */
-        CTL_STACK_SIZE,           /* Max observed stack size */
+
 #ifdef DEBUG_KMEM
         CTL_KMEM_KMEMUSED,        /* Crrently alloc'd kmem bytes */
         CTL_KMEM_KMEMMAX,         /* Max alloc'd by kmem bytes */
         CTL_KMEM_VMEMUSED,        /* Currently alloc'd vmem bytes */
         CTL_KMEM_VMEMMAX,         /* Max alloc'd by vmem bytes */
 #endif
-       CTL_HOSTID,               /* Host id reported by /usr/bin/hostid */
-       CTL_HW_SERIAL,            /* Hardware serial number from hostid */
+
+       CTL_MUTEX_STATS,          /* Global mutex statistics */
+       CTL_MUTEX_STATS_PER,      /* Per mutex statistics */
+       CTL_MUTEX_SPIN_MAX,       /* Maximum mutex spin iterations */
 };
 
 static int
@@ -368,21 +381,107 @@ proc_dohostid(struct ctl_table *table, int write, struct file *filp,
         RETURN(rc);
 }
 
-static struct ctl_table spl_table[] = {
-        /* NB No .strategy entries have been provided since
-         * sysctl(8) prefers to go via /proc for portability.
-         */
-        {
-                .ctl_name = CTL_VERSION,
-                .procname = "version",
-                .data     = spl_version,
-                .maxlen   = sizeof(spl_version),
-                .mode     = 0444,
-                .proc_handler = &proc_dostring,
-        },
+#ifdef DEBUG_MUTEX
+static void
+mutex_seq_show_headers(struct seq_file *f)
+{
+        seq_printf(f, "%-36s %-4s %-16s\t"
+                   "e_tot\te_nh\te_sp\te_sl\tte_tot\tte_nh\n",
+                  "name", "type", "owner");
+}
+
+static int
+mutex_seq_show(struct seq_file *f, void *p)
+{
+        kmutex_t *mp = p;
+       char t = 'X';
+        int i;
+
+       ASSERT(mp->km_magic == KM_MAGIC);
+
+       switch (mp->km_type) {
+               case MUTEX_DEFAULT:     t = 'D';        break;
+               case MUTEX_SPIN:        t = 'S';        break;
+               case MUTEX_ADAPTIVE:    t = 'A';        break;
+               default:
+                       SBUG();
+       }
+        seq_printf(f, "%-36s %c    ", mp->km_name, t);
+       if (mp->km_owner)
+                seq_printf(f, "%p\t", mp->km_owner);
+       else
+                seq_printf(f, "%-16s\t", "<not held>");
+
+        for (i = 0; i < MUTEX_STATS_SIZE; i++)
+                seq_printf(f, "%d%c", mp->km_stats[i],
+                           (i + 1 == MUTEX_STATS_SIZE) ? '\n' : '\t');
+
+        return 0;
+}
+
+static void *
+mutex_seq_start(struct seq_file *f, loff_t *pos)
+{
+        struct list_head *p;
+        loff_t n = *pos;
+        ENTRY;
+
+        mutex_lock(&mutex_stats_lock);
+        if (!n)
+                mutex_seq_show_headers(f);
+
+        p = mutex_stats_list.next;
+        while (n--) {
+                p = p->next;
+                if (p == &mutex_stats_list)
+                        RETURN(NULL);
+        }
+
+        RETURN(list_entry(p, kmutex_t, km_list));
+}
+
+static void *
+mutex_seq_next(struct seq_file *f, void *p, loff_t *pos)
+{
+       kmutex_t *mp = p;
+        ENTRY;
+
+        ++*pos;
+        RETURN((mp->km_list.next == &mutex_stats_list) ?
+              NULL : list_entry(mp->km_list.next, kmutex_t, km_list));
+}
+
+static void
+mutex_seq_stop(struct seq_file *f, void *v)
+{
+        mutex_unlock(&mutex_stats_lock);
+}
+
+static struct seq_operations mutex_seq_ops = {
+        .show  = mutex_seq_show,
+        .start = mutex_seq_start,
+        .next  = mutex_seq_next,
+        .stop  = mutex_seq_stop,
+};
+
+static int
+proc_mutex_open(struct inode *inode, struct file *filp)
+{
+        return seq_open(filp, &mutex_seq_ops);
+}
+
+static struct file_operations proc_mutex_operations = {
+        .open           = proc_mutex_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = seq_release,
+};
+#endif /* DEBUG_MUTEX */
+
+static struct ctl_table spl_debug_table[] = {
         {
                 .ctl_name = CTL_DEBUG_SUBSYS,
-                .procname = "debug_subsystem",
+                .procname = "subsystem",
                 .data     = &spl_debug_subsys,
                 .maxlen   = sizeof(unsigned long),
                 .mode     = 0644,
@@ -390,7 +489,7 @@ static struct ctl_table spl_table[] = {
         },
         {
                 .ctl_name = CTL_DEBUG_MASK,
-                .procname = "debug_mask",
+                .procname = "mask",
                 .data     = &spl_debug_mask,
                 .maxlen   = sizeof(unsigned long),
                 .mode     = 0644,
@@ -398,7 +497,7 @@ static struct ctl_table spl_table[] = {
         },
         {
                 .ctl_name = CTL_DEBUG_PRINTK,
-                .procname = "debug_printk",
+                .procname = "printk",
                 .data     = &spl_debug_printk,
                 .maxlen   = sizeof(unsigned long),
                 .mode     = 0644,
@@ -406,13 +505,13 @@ static struct ctl_table spl_table[] = {
         },
         {
                 .ctl_name = CTL_DEBUG_MB,
-                .procname = "debug_mb",
+                .procname = "mb",
                 .mode     = 0644,
                 .proc_handler = &proc_debug_mb,
         },
         {
                 .ctl_name = CTL_DEBUG_BINARY,
-                .procname = "debug_binary",
+                .procname = "binary",
                 .data     = &spl_debug_binary,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
@@ -436,7 +535,7 @@ static struct ctl_table spl_table[] = {
         },
         {
                 .ctl_name = CTL_DEBUG_PATH,
-                .procname = "debug_path",
+                .procname = "path",
                 .data     = spl_debug_file_path,
                 .maxlen   = sizeof(spl_debug_file_path),
                 .mode     = 0644,
@@ -444,7 +543,7 @@ static struct ctl_table spl_table[] = {
         },
         {
                 .ctl_name = CTL_DEBUG_DUMP,
-                .procname = "debug_dump",
+                .procname = "dump",
                 .mode     = 0200,
                 .proc_handler = &proc_dump_kernel,
         },
@@ -483,14 +582,40 @@ static struct ctl_table spl_table[] = {
                 .proc_handler = &proc_console_backoff,
         },
         {
-                .ctl_name = CTL_STACK_SIZE,
+                .ctl_name = CTL_DEBUG_STACK_SIZE,
                 .procname = "stack_max",
                 .data     = &spl_debug_stack,
                 .maxlen   = sizeof(int),
                 .mode     = 0444,
                 .proc_handler = &proc_dointvec,
         },
+       {0},
+};
+
+#ifdef DEBUG_MUTEX
+static struct ctl_table spl_mutex_table[] = {
+        {
+                .ctl_name = CTL_MUTEX_STATS,
+                .procname = "stats",
+                .data     = &mutex_stats,
+                .maxlen   = sizeof(int) * MUTEX_STATS_SIZE,
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec,
+        },
+        {
+                .ctl_name = CTL_MUTEX_SPIN_MAX,
+                .procname = "spin_max",
+                .data     = &mutex_spin_max,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+        },
+       {0},
+};
+#endif /* DEBUG_MUTEX */
+
 #ifdef DEBUG_KMEM
+static struct ctl_table spl_kmem_table[] = {
         {
                 .ctl_name = CTL_KMEM_KMEMUSED,
                 .procname = "kmem_used",
@@ -527,7 +652,22 @@ static struct ctl_table spl_table[] = {
                 .mode     = 0444,
                 .proc_handler = &proc_doulongvec_minmax,
         },
-#endif
+       {0},
+};
+#endif /* DEBUG_MUTEX */
+
+static struct ctl_table spl_table[] = {
+        /* NB No .strategy entries have been provided since
+         * sysctl(8) prefers to go via /proc for portability.
+         */
+        {
+                .ctl_name = CTL_VERSION,
+                .procname = "version",
+                .data     = spl_version,
+                .maxlen   = sizeof(spl_version),
+                .mode     = 0444,
+                .proc_handler = &proc_dostring,
+        },
         {
                 .ctl_name = CTL_HOSTID,
                 .procname = "hostid",
@@ -544,10 +684,32 @@ static struct ctl_table spl_table[] = {
                 .mode     = 0444,
                 .proc_handler = &proc_dostring,
         },
+       {
+               .ctl_name = CTL_SPL_DEBUG,
+               .procname = "debug",
+               .mode     = 0555,
+               .child    = spl_debug_table,
+       },
+#ifdef DEBUG_MUTEX
+       {
+               .ctl_name = CTL_SPL_MUTEX,
+               .procname = "mutex",
+               .mode     = 0555,
+               .child    = spl_mutex_table,
+       },
+#endif
+#ifdef DEBUG_KMEM
+       {
+               .ctl_name = CTL_SPL_KMEM,
+               .procname = "kmem",
+               .mode     = 0555,
+               .child    = spl_kmem_table,
+       },
+#endif
         { 0 },
 };
 
-static struct ctl_table spl_dir_table[] = {
+static struct ctl_table spl_dir[] = {
         {
                 .ctl_name = CTL_SPL,
                 .procname = "spl",
@@ -563,9 +725,22 @@ proc_init(void)
         ENTRY;
 
 #ifdef CONFIG_SYSCTL
-        spl_header = register_sysctl_table(spl_dir_table, 0);
+        spl_header = register_sysctl_table(spl_dir, 0);
        if (spl_header == NULL)
                RETURN(-EUNATCH);
+
+#ifdef DEBUG_MUTEX
+       {
+                struct proc_dir_entry *entry = create_proc_entry("mutex_stats",
+                                                                0444, NULL);
+                if (entry) {
+                        entry->proc_fops = &proc_mutex_operations;
+                } else {
+                        unregister_sysctl_table(spl_header);
+                        RETURN(-EUNATCH);
+                }
+       }
+#endif /* DEBUG_MUTEX */
 #endif
         RETURN(0);
 }
@@ -577,6 +752,7 @@ proc_fini(void)
 
 #ifdef CONFIG_SYSCTL
         ASSERT(spl_header != NULL);
+        remove_proc_entry("mutex_stats", NULL);
         unregister_sysctl_table(spl_header);
 #endif
         EXIT;
index ad9be695b80f87437be1bbbd7f6302e985751ff1..70deb0aea6e3d2ffc19d47a0d6a7ac034d5fa28e 100644 (file)
@@ -106,7 +106,7 @@ task_done(taskq_t *tq, task_t *t)
                t->t_id = 0;
                t->t_func = NULL;
                t->t_arg = NULL;
-                list_add(&t->t_list, &tq->tq_free_list);
+                list_add_tail(&t->t_list, &tq->tq_free_list);
        } else {
                task_free(tq, t);
        }
@@ -209,7 +209,7 @@ __taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
 
 
        spin_lock(&t->t_lock);
-       list_add(&t->t_list, &tq->tq_pend_list);
+       list_add_tail(&t->t_list, &tq->tq_pend_list);
        t->t_id = rc = tq->tq_next_id;
        tq->tq_next_id++;
         t->t_func = func;
@@ -282,7 +282,7 @@ taskq_thread(void *args)
                 if (!list_empty(&tq->tq_pend_list)) {
                         t = list_entry(tq->tq_pend_list.next, task_t, t_list);
                         list_del_init(&t->t_list);
-                       list_add(&t->t_list, &tq->tq_work_list);
+                       list_add_tail(&t->t_list, &tq->tq_work_list);
                         tq->tq_nactive++;
                        spin_unlock_irq(&tq->tq_lock);