]> granicus.if.org Git - zfs/commitdiff
Allow kicking a taskq to spawn more threads
authorChunwei Chen <david.chen@osnexus.com>
Thu, 28 Jan 2016 00:55:14 +0000 (16:55 -0800)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Fri, 5 Feb 2016 22:08:31 +0000 (14:08 -0800)
This patch add a module parameter spl_taskq_kick. When writing non-zero value
to it, it will scan all the taskq, if a taskq contains a task pending for more
than 5 seconds, it will be forced to spawn a new thread. This is use as an
emergency recovery from deadlock, not a general solution.

Signed-off-by: Chunwei Chen <david.chen@osnexus.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #529

include/sys/taskq.h
man/man5/spl-module-parameters.5
module/spl/spl-taskq.c

index e7661f7ce8b8f51971052093db0adab334730307..19bc6c1dda03c092df55d36cee5845f2f94e20c1 100644 (file)
@@ -105,6 +105,7 @@ typedef struct taskq_ent {
        void                    *tqent_arg;
        taskq_t                 *tqent_taskq;
        uintptr_t               tqent_flags;
+       unsigned long           tqent_birth;
 } taskq_ent_t;
 
 #define        TQENT_FLAG_PREALLOC     0x1
index 1d4d73e6089c4a6cc310483b025cd50da154744e..1b760243e2bcbf17ffced5f9c6824a8725d9cb7d 100644 (file)
@@ -250,6 +250,20 @@ may be overridden for non-standard configurations.
 Default value: \fB/etc/hostid\fR
 .RE
 
+.sp
+.ne 2
+.na
+\fBspl_taskq_kick\fR (uint)
+.ad
+.RS 12n
+Kick stuck taskq to spawn threads. When writing a non-zero value to it, it will
+scan all the taskqs. If any of them have a pending task more than 5 seconds old,
+it will kick it to spawn more threads. This can be used if you find a rare
+deadlock occurs because one or more taskqs didn't spawn a thread when it should.
+.sp
+Default value: \fB0\fR
+.RE
+
 .sp
 .ne 2
 .na
index 2b3f3f4bc939f588cffb16f078b578527e29d5da..56034c89947a17b0e94ad1159cb95970e32a868f 100644 (file)
@@ -221,6 +221,7 @@ task_expire(unsigned long data)
                return;
        }
 
+       t->tqent_birth = jiffies;
        /*
         * The priority list must be maintained in strict task id order
         * from lowest to highest for lowest_id to be easily calculable.
@@ -583,6 +584,7 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
        t->tqent_timer.data = 0;
        t->tqent_timer.function = NULL;
        t->tqent_timer.expires = 0;
+       t->tqent_birth = jiffies;
 
        ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
 
@@ -682,6 +684,7 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
        t->tqent_func = func;
        t->tqent_arg = arg;
        t->tqent_taskq = tq;
+       t->tqent_birth = jiffies;
 
        spin_unlock(&t->tqent_lock);
 
@@ -1134,6 +1137,63 @@ taskq_destroy(taskq_t *tq)
 }
 EXPORT_SYMBOL(taskq_destroy);
 
+
+static unsigned int spl_taskq_kick = 0;
+
+/*
+ * 2.6.36 API Change
+ * module_param_cb is introduced to take kernel_param_ops and
+ * module_param_call is marked as obsolete. Also set and get operations
+ * were changed to take a 'const struct kernel_param *'.
+ */
+static int
+#ifdef module_param_cb
+param_set_taskq_kick(const char *val, const struct kernel_param *kp)
+#else
+param_set_taskq_kick(const char *val, struct kernel_param *kp)
+#endif
+{
+       int ret;
+       taskq_t *tq;
+       taskq_ent_t *t;
+       unsigned long flags;
+
+       ret = param_set_uint(val, kp);
+       if (ret < 0 || !spl_taskq_kick)
+               return (ret);
+       /* reset value */
+       spl_taskq_kick = 0;
+
+       down_read(&tq_list_sem);
+       list_for_each_entry(tq, &tq_list, tq_taskqs) {
+               spin_lock_irqsave_nested(&tq->tq_lock, flags,
+                   tq->tq_lock_class);
+               /* Check if the first pending is older than 5 seconds */
+               t = taskq_next_ent(tq);
+               if (t && time_after(jiffies, t->tqent_birth + 5*HZ)) {
+                       (void) taskq_thread_spawn(tq);
+                       printk(KERN_INFO "spl: Kicked taskq %s/%d\n",
+                           tq->tq_name, tq->tq_instance);
+               }
+               spin_unlock_irqrestore(&tq->tq_lock, flags);
+       }
+       up_read(&tq_list_sem);
+       return (ret);
+}
+
+#ifdef module_param_cb
+static const struct kernel_param_ops param_ops_taskq_kick = {
+        .set = param_set_taskq_kick,
+        .get = param_get_uint,
+};
+module_param_cb(spl_taskq_kick, &param_ops_taskq_kick, &spl_taskq_kick, 0644);
+#else
+module_param_call(spl_taskq_kick, param_set_taskq_kick, param_get_uint,
+    &spl_taskq_kick, 0644);
+#endif
+MODULE_PARM_DESC(spl_taskq_kick,
+    "Write nonzero to kick stuck taskqs to spawn more threads");
+
 int
 spl_taskq_init(void)
 {