]> granicus.if.org Git - spl/commitdiff
More fixes to ensure we get good debug logs even if we're in the
authorbehlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
Mon, 21 Apr 2008 22:44:11 +0000 (22:44 +0000)
committerbehlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
Mon, 21 Apr 2008 22:44:11 +0000 (22:44 +0000)
process of destroying the stacks.  Threshhold set fairly aggressively
top 80% of stack usage.

git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@82 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c

include/sys/debug.h
modules/spl/spl-debug.c
modules/spl/spl-proc.c

index 720e4136e7c732226c3a24f90f9385a51ed376df..8f3fcd94b1c03f6562dcca7e37630bd165a796bf 100644 (file)
@@ -64,6 +64,16 @@ extern unsigned int spl_debug_stack;
 #define SPL_DEFAULT_MIN_DELAY          ((HZ + 1) / 2)
 #define SPL_DEFAULT_BACKOFF            2
 
+#define DL_NOTHREAD                     0x0001 /* Do not create a new thread */
+#define DL_SINGLE_CPU                  0x0002 /* Collect pages from this CPU */
+
+typedef struct dumplog_priv {
+        wait_queue_head_t dp_waitq;
+        pid_t dp_pid;
+        int dp_flags;
+        atomic_t dp_done;
+} dumplog_priv_t;
+
 typedef struct {
         unsigned long cdls_next;
         int           cdls_count;
@@ -147,7 +157,7 @@ struct page_collection {
         int               pc_want_daemon_pages;
 };
 
-#define SBUG()         spl_debug_bug(__FILE__, __FUNCTION__, __LINE__);
+#define SBUG()         spl_debug_bug(__FILE__, __FUNCTION__, __LINE__, 0);
 
 #ifdef  __ia64__
 #define CDEBUG_STACK() (THREAD_SIZE -                                   \
@@ -159,29 +169,24 @@ struct page_collection {
                         (THREAD_SIZE - 1)))
 # endif /* __ia64__ */
 
+/* DL_NOTHREAD and DL_SINGLE_CPU flags are passed to spl_debug_bug()
+ * because we have over run our stack and likely damaged at least one
+ * other unknown threads stack.  We must finish generating the needed
+ * debug info within this thread context because once we yeild the CPU
+ * its very likely the system will crash.
+ */
 #define __CHECK_STACK(file, func, line)                                 \
 do {                                                                    \
         unsigned long _stack = CDEBUG_STACK();                          \
-       unsigned long _soft_limit = (9 * THREAD_SIZE) / 10;             \
+       unsigned long _soft_limit = (8 * THREAD_SIZE) / 10;             \
                                                                         \
        if (unlikely(_stack > _soft_limit && _stack > spl_debug_stack)){\
                 spl_debug_stack = _stack;                               \
-               if (_stack <= THREAD_SIZE) {                            \
-                        spl_debug_msg(NULL, D_TRACE, D_WARNING,         \
-                                      file, func, line, "Warning "      \
-                                      "exceeded 90%% of maximum safe "  \
-                                     "stack size (%lu/%lu)\n",         \
-                                     _stack, THREAD_SIZE);             \
-                       spl_debug_dumpstack(NULL);                      \
-                       spl_debug_dumplog();                            \
-               } else {                                                \
-                        spl_debug_msg(NULL, D_TRACE, D_WARNING,         \
-                                      file, func, line, "Error "        \
-                                      "exceeded maximum safe stack "    \
-                                     "size (%lu/%lu)\n",               \
-                                     _stack, THREAD_SIZE);             \
-                       SBUG();                                         \
-               }                                                       \
+                spl_debug_msg(NULL, D_TRACE, D_WARNING,                 \
+                              file, func, line, "Error exceeded "       \
+                             "maximum safe stack size (%lu/%lu)\n",    \
+                             _stack, THREAD_SIZE);                     \
+               spl_debug_bug(file, func, line, DL_SINGLE_CPU);         \
         }                                                               \
 } while (0)
 
@@ -213,7 +218,7 @@ do {                                                                    \
                 spl_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG,           \
                               __FILE__, __FUNCTION__, __LINE__,         \
                               "ASSERTION(" #cond ") failed\n");         \
-                spl_debug_bug(__FILE__, __FUNCTION__, __LINE__);        \
+               SBUG();                                                 \
        }                                                               \
 } while (0)
 
@@ -226,7 +231,7 @@ do {                                                                    \
                               __FILE__, __FUNCTION__, __LINE__,         \
                               "ASSERTION(" #cond ") failed:" fmt,       \
                                  ## a);                                 \
-                spl_debug_bug(__FILE__, __FUNCTION__, __LINE__)         \
+               SBUG();                                                 \
         }                                                               \
 } while (0)
 
@@ -242,7 +247,7 @@ do {                                                                    \
                               __FILE__, __FUNCTION__, __LINE__,         \
                               "VERIFY3(" FMT " " #OP " " FMT ")\n",     \
                               CAST __left,  CAST __right);              \
-                spl_debug_bug(__FILE__, __FUNCTION__, __LINE__);        \
+               SBUG();                                                 \
         }                                                               \
 } while (0)
 
@@ -285,7 +290,6 @@ do {                                                                    \
 #define CDEBUG_LIMIT(mask, format, a...)                                \
         __CDEBUG_LIMIT(DEBUG_SUBSYSTEM, mask, format, ## a)
 
-#define dprintf(fmt, a...)             CDEBUG_LIMIT(D_INFO, fmt, ## a)
 #define CWARN(fmt, a...)               CDEBUG_LIMIT(D_WARNING, fmt, ## a)
 #define CERROR(fmt, a...)              CDEBUG_LIMIT(D_ERROR, fmt, ## a)
 #define CEMERG(fmt, a...)              CDEBUG_LIMIT(D_EMERG, fmt, ## a)
@@ -329,9 +333,9 @@ extern unsigned long spl_debug_get_subsys(void);
 extern int spl_debug_set_mb(int mb);
 extern int spl_debug_get_mb(void);
 
-extern int spl_debug_dumplog(void);
+extern int spl_debug_dumplog(int flags);
 extern void spl_debug_dumpstack(struct task_struct *tsk);
-extern void spl_debug_bug(char *file, const char *func, const int line);
+extern void spl_debug_bug(char *file, const char *func, const int line, int flags);
 
 extern int spl_debug_clear_buffer(void);
 extern int spl_debug_mark_buffer(char *text);
index 47d0c854e0b3e0fca54297a976c89a1ac2d54a1a..8c60ea2de4bf6f148bed4f6dd989f7fd6814a809 100644 (file)
@@ -102,7 +102,7 @@ struct rw_semaphore trace_sem;
 atomic_t trace_tage_allocated = ATOMIC_INIT(0);
 
 static int panic_notifier(struct notifier_block *, unsigned long, void *);
-static int spl_debug_dump_all_pages(char *);
+static int spl_debug_dump_all_pages(dumplog_priv_t *dp, char *);
 static void trace_fini(void);
 
 
@@ -344,12 +344,6 @@ spl_debug_str2mask(unsigned long *mask, const char *str, int is_subsys)
         return 0;
 }
 
-typedef struct dumplog_priv {
-        wait_queue_head_t dp_waitq;
-       pid_t dp_pid;
-       atomic_t dp_flag;
-} dumplog_priv_t;
-
 static void
 spl_debug_dumplog_internal(dumplog_priv_t *dp)
 {
@@ -362,7 +356,7 @@ spl_debug_dumplog_internal(dumplog_priv_t *dp)
                  "%s.%ld.%ld", spl_debug_file_path,
                 get_seconds(), (long)dp->dp_pid);
         printk(KERN_ALERT "SPL: dumping log to %s\n", spl_debug_file_name);
-        spl_debug_dump_all_pages(spl_debug_file_name);
+        spl_debug_dump_all_pages(dp, spl_debug_file_name);
 
         current->journal_info = journal_info;
 }
@@ -373,29 +367,36 @@ spl_debug_dumplog_thread(void *arg)
        dumplog_priv_t *dp = (dumplog_priv_t *)arg;
 
         spl_debug_dumplog_internal(dp);
-       atomic_set(&dp->dp_flag, 1);
+       atomic_set(&dp->dp_done, 1);
         wake_up(&dp->dp_waitq);
         do_exit(0);
 
         return 0; /* Unreachable */
 }
 
+/* When flag is set do not use a new thread for the debug dump */
 int
-spl_debug_dumplog(void)
+spl_debug_dumplog(int flags)
 {
        struct task_struct *tsk;
        dumplog_priv_t dp;
 
-       init_waitqueue_head(&dp.dp_waitq);
-       dp.dp_pid = current->pid;
-       atomic_set(&dp.dp_flag, 0);
+        init_waitqueue_head(&dp.dp_waitq);
+        dp.dp_pid = current->pid;
+        dp.dp_flags = flags;
+        atomic_set(&dp.dp_done, 0);
 
-        tsk = kthread_create(spl_debug_dumplog_thread,(void *)&dp,"spl_debug");
-        if (tsk == NULL)
-               return -ENOMEM;
+        if (dp.dp_flags & DL_NOTHREAD) {
+                spl_debug_dumplog_internal(&dp);
+        } else {
 
-       wake_up_process(tsk);
-       wait_event(dp.dp_waitq, atomic_read(&dp.dp_flag));
+                tsk = kthread_create(spl_debug_dumplog_thread,(void *)&dp,"spl_debug");
+                if (tsk == NULL)
+                        return -ENOMEM;
+
+                wake_up_process(tsk);
+                wait_event(dp.dp_waitq, atomic_read(&dp.dp_done));
+        }
 
        return 0;
 }
@@ -849,7 +850,7 @@ EXPORT_SYMBOL(spl_debug_vmsg);
  * some arch, this will have to be implemented separately in each arch.
  */
 static void
-panic_collect_pages(struct page_collection *pc)
+collect_pages_from_single_cpu(struct page_collection *pc)
 {
         struct trace_cpu_data *tcd;
         int i, j;
@@ -876,12 +877,12 @@ collect_pages_on_cpu(void *info)
 }
 
 static void
-collect_pages(struct page_collection *pc)
+collect_pages(dumplog_priv_t *dp, struct page_collection *pc)
 {
         INIT_LIST_HEAD(&pc->pc_pages);
 
-        if (spl_panic_in_progress)
-                panic_collect_pages(pc);
+        if (spl_panic_in_progress || dp->dp_flags & DL_SINGLE_CPU)
+                collect_pages_from_single_cpu(pc);
         else
                 trace_call_on_all_cpus(collect_pages_on_cpu, pc);
 }
@@ -944,7 +945,7 @@ trace_filp_open (const char *name, int flags, int mode, int *err)
 #define trace_filp_poff(f)             (&(f)->f_pos)
 
 static int
-spl_debug_dump_all_pages(char *filename)
+spl_debug_dump_all_pages(dumplog_priv_t *dp, char *filename)
 {
         struct page_collection pc;
         struct file *filp;
@@ -965,7 +966,7 @@ spl_debug_dump_all_pages(char *filename)
         }
 
         spin_lock_init(&pc.pc_lock);
-        collect_pages(&pc);
+        collect_pages(dp, &pc);
         if (list_empty(&pc.pc_pages)) {
                 rc = 0;
                 goto close;
@@ -1006,13 +1007,18 @@ spl_debug_dump_all_pages(char *filename)
 static void
 spl_debug_flush_pages(void)
 {
+        dumplog_priv_t dp;
         struct page_collection pc;
         struct trace_page *tage;
         struct trace_page *tmp;
 
         spin_lock_init(&pc.pc_lock);
+        init_waitqueue_head(&dp.dp_waitq);
+        dp.dp_pid = current->pid;
+        dp.dp_flags = 0;
+        atomic_set(&dp.dp_done, 0);
 
-        collect_pages(&pc);
+        collect_pages(&dp, &pc);
         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
                 __ASSERT_TAGE_INVARIANT(tage);
                 list_del(&tage->linkage);
@@ -1109,7 +1115,7 @@ void spl_debug_dumpstack(struct task_struct *tsk)
 }
 EXPORT_SYMBOL(spl_debug_dumpstack);
 
-void spl_debug_bug(char *file, const char *func, const int line)
+void spl_debug_bug(char *file, const char *func, const int line, int flags)
 {
         spl_debug_catastrophe = 1;
         spl_debug_msg(NULL, 0, D_EMERG, file, func, line, "SBUG\n");
@@ -1124,7 +1130,7 @@ void spl_debug_bug(char *file, const char *func, const int line)
                 spl_panic_in_progress = 1;
 
         spl_debug_dumpstack(NULL);
-        spl_debug_dumplog();
+        spl_debug_dumplog(flags);
 
         if (spl_debug_panic_on_bug)
                 panic("SBUG");
@@ -1168,7 +1174,7 @@ panic_notifier(struct notifier_block *self,
                 while (current->lock_depth >= 0)
                         unlock_kernel();
 
-                spl_debug_dumplog_internal((void *)(long)current->pid);
+                spl_debug_dumplog(DL_NOTHREAD | DL_SINGLE_CPU);
         }
 
         return 0;
index f3fb793a22bf271ac02508aa306615e2258ffe44..1f97b366723143f3b2a6702a038e555917745f41 100644 (file)
@@ -179,7 +179,7 @@ proc_dump_kernel(struct ctl_table *table, int write, struct file *filp,
        ENTRY;
 
         if (write) {
-               spl_debug_dumplog();
+               spl_debug_dumplog(0);
                 *ppos += *lenp;
         } else {
                 *lenp = 0;