]> granicus.if.org Git - zfs/blob - module/spl/spl-debug.c
8ee6c5dfb8ea611132c361c44c6b58d11e5ab11e
[zfs] / module / spl / spl-debug.c
1 /*****************************************************************************\
2  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3  *  Copyright (C) 2007 The Regents of the University of California.
4  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6  *  UCRL-CODE-235197
7  *
8  *  This file is part of the SPL, Solaris Porting Layer.
9  *  For details, see <http://github.com/behlendorf/spl/>.
10  *
11  *  The SPL is free software; you can redistribute it and/or modify it
12  *  under the terms of the GNU General Public License as published by the
13  *  Free Software Foundation; either version 2 of the License, or (at your
14  *  option) any later version.
15  *
16  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
17  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
19  *  for more details.
20  *
21  *  You should have received a copy of the GNU General Public License along
22  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
23  *****************************************************************************
24  *  Solaris Porting Layer (SPL) Debug Implementation.
25 \*****************************************************************************/
26
27 #include <linux/kmod.h>
28 #include <linux/mm.h>
29 #include <linux/vmalloc.h>
30 #include <linux/pagemap.h>
31 #include <linux/slab.h>
32 #include <linux/ctype.h>
33 #include <linux/kthread.h>
34 #include <linux/hardirq.h>
35 #include <linux/interrupt.h>
36 #include <linux/spinlock.h>
37 #include <sys/sysmacros.h>
38 #include <sys/proc.h>
39 #include <sys/debug.h>
40 #include <spl-ctl.h>
41
42 #ifdef DEBUG_SUBSYSTEM
43 #undef DEBUG_SUBSYSTEM
44 #endif
45
46 #define DEBUG_SUBSYSTEM S_DEBUG
47
48 unsigned long spl_debug_subsys = ~0;
49 EXPORT_SYMBOL(spl_debug_subsys);
50 module_param(spl_debug_subsys, long, 0644);
51 MODULE_PARM_DESC(spl_debug_subsys, "Subsystem debugging level mask.");
52
53 unsigned long spl_debug_mask = (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE);
54 EXPORT_SYMBOL(spl_debug_mask);
55 module_param(spl_debug_mask, long, 0644);
56 MODULE_PARM_DESC(spl_debug_mask, "Debugging level mask.");
57
58 unsigned long spl_debug_printk = D_CANTMASK;
59 EXPORT_SYMBOL(spl_debug_printk);
60 module_param(spl_debug_printk, long, 0644);
61 MODULE_PARM_DESC(spl_debug_printk, "Console printk level mask.");
62
63 int spl_debug_mb = -1;
64 EXPORT_SYMBOL(spl_debug_mb);
65 module_param(spl_debug_mb, int, 0644);
66 MODULE_PARM_DESC(spl_debug_mb, "Total debug buffer size.");
67
68 unsigned int spl_debug_binary = 1;
69 EXPORT_SYMBOL(spl_debug_binary);
70
71 unsigned int spl_debug_catastrophe;
72 EXPORT_SYMBOL(spl_debug_catastrophe);
73
74 unsigned int spl_debug_panic_on_bug = 0;
75 EXPORT_SYMBOL(spl_debug_panic_on_bug);
76 module_param(spl_debug_panic_on_bug, int, 0644);
77 MODULE_PARM_DESC(spl_debug_panic_on_bug, "Panic on BUG");
78
79 static char spl_debug_file_name[PATH_MAX];
80 char spl_debug_file_path[PATH_MAX] = "/tmp/spl-log";
81
82 unsigned int spl_console_ratelimit = 1;
83 EXPORT_SYMBOL(spl_console_ratelimit);
84
85 long spl_console_max_delay;
86 EXPORT_SYMBOL(spl_console_max_delay);
87
88 long spl_console_min_delay;
89 EXPORT_SYMBOL(spl_console_min_delay);
90
91 unsigned int spl_console_backoff = SPL_DEFAULT_BACKOFF;
92 EXPORT_SYMBOL(spl_console_backoff);
93
94 unsigned int spl_debug_stack;
95 EXPORT_SYMBOL(spl_debug_stack);
96
97 static int spl_panic_in_progress;
98
99 union trace_data_union (*trace_data[TCD_TYPE_MAX])[NR_CPUS] __cacheline_aligned;
100 char *trace_console_buffers[NR_CPUS][3];
101 struct rw_semaphore trace_sem;
102 atomic_t trace_tage_allocated = ATOMIC_INIT(0);
103
104 static int spl_debug_dump_all_pages(dumplog_priv_t *dp, char *);
105 static void trace_fini(void);
106
107
108 /* Memory percentage breakdown by type */
109 static unsigned int pages_factor[TCD_TYPE_MAX] = {
110        80,  /* 80% pages for TCD_TYPE_PROC */
111        10,  /* 10% pages for TCD_TYPE_SOFTIRQ */
112        10   /* 10% pages for TCD_TYPE_IRQ */
113 };
114
115 const char *
116 spl_debug_subsys2str(int subsys)
117 {
118         switch (subsys) {
119         default:
120                 return NULL;
121         case S_UNDEFINED:
122                 return "undefined";
123         case S_ATOMIC:
124                 return "atomic";
125         case S_KOBJ:
126                 return "kobj";
127         case S_VNODE:
128                 return "vnode";
129         case S_TIME:
130                 return "time";
131         case S_RWLOCK:
132                 return "rwlock";
133         case S_THREAD:
134                 return "thread";
135         case S_CONDVAR:
136                 return "condvar";
137         case S_MUTEX:
138                 return "mutex";
139         case S_RNG:
140                 return "rng";
141         case S_TASKQ:
142                 return "taskq";
143         case S_KMEM:
144                 return "kmem";
145         case S_DEBUG:
146                 return "debug";
147         case S_GENERIC:
148                 return "generic";
149         case S_PROC:
150                 return "proc";
151         case S_MODULE:
152                 return "module";
153         case S_CRED:
154                 return "cred";
155         }
156 }
157
158 const char *
159 spl_debug_dbg2str(int debug)
160 {
161         switch (debug) {
162         default:
163                 return NULL;
164         case D_TRACE:
165                 return "trace";
166         case D_INFO:
167                 return "info";
168         case D_WARNING:
169                 return "warning";
170         case D_ERROR:
171                 return "error";
172         case D_EMERG:
173                 return "emerg";
174         case D_CONSOLE:
175                 return "console";
176         case D_IOCTL:
177                 return "ioctl";
178         case D_DPRINTF:
179                 return "dprintf";
180         case D_OTHER:
181                 return "other";
182         }
183 }
184
185 int
186 spl_debug_mask2str(char *str, int size, unsigned long mask, int is_subsys)
187 {
188         const char *(*fn)(int bit) = is_subsys ? spl_debug_subsys2str :
189                                                  spl_debug_dbg2str;
190         const char *token;
191         int i, bit, len = 0;
192
193         if (mask == 0) {                        /* "0" */
194                 if (size > 0)
195                         str[0] = '0';
196                 len = 1;
197         } else {                                /* space-separated tokens */
198                 for (i = 0; i < 32; i++) {
199                         bit = 1 << i;
200
201                         if ((mask & bit) == 0)
202                                 continue;
203
204                         token = fn(bit);
205                         if (token == NULL)              /* unused bit */
206                                 continue;
207
208                         if (len > 0) {                  /* separator? */
209                                 if (len < size)
210                                         str[len] = ' ';
211                                 len++;
212                         }
213
214                         while (*token != 0) {
215                                 if (len < size)
216                                         str[len] = *token;
217                                 token++;
218                                 len++;
219                         }
220                 }
221         }
222
223         /* terminate 'str' */
224         if (len < size)
225                 str[len] = 0;
226         else
227                 str[size - 1] = 0;
228
229         return len;
230 }
231
232 static int
233 spl_debug_token2mask(int *mask, const char *str, int len, int is_subsys)
234 {
235         const char *(*fn)(int bit) = is_subsys ? spl_debug_subsys2str :
236                                                  spl_debug_dbg2str;
237         const char   *token;
238         int i, j, bit;
239
240         /* match against known tokens */
241         for (i = 0; i < 32; i++) {
242                 bit = 1 << i;
243
244                 token = fn(bit);
245                 if (token == NULL)              /* unused? */
246                         continue;
247
248                 /* strcasecmp */
249                 for (j = 0; ; j++) {
250                         if (j == len) {         /* end of token */
251                                 if (token[j] == 0) {
252                                         *mask = bit;
253                                         return 0;
254                                 }
255                                 break;
256                         }
257
258                         if (token[j] == 0)
259                                 break;
260
261                         if (str[j] == token[j])
262                                 continue;
263
264                         if (str[j] < 'A' || 'Z' < str[j])
265                                 break;
266
267                         if (str[j] - 'A' + 'a' != token[j])
268                                 break;
269                 }
270         }
271
272         return -EINVAL;                         /* no match */
273 }
274
275 int
276 spl_debug_str2mask(unsigned long *mask, const char *str, int is_subsys)
277 {
278         char op = 0;
279         int m = 0, matched, n, t;
280
281         /* Allow a number for backwards compatibility */
282         for (n = strlen(str); n > 0; n--)
283                 if (!isspace(str[n-1]))
284                         break;
285         matched = n;
286
287         if ((t = sscanf(str, "%i%n", &m, &matched)) >= 1 && matched == n) {
288                 *mask = m;
289                 return 0;
290         }
291
292         /* <str> must be a list of debug tokens or numbers separated by
293          * whitespace and optionally an operator ('+' or '-').  If an operator
294          * appears first in <str>, '*mask' is used as the starting point
295          * (relative), otherwise 0 is used (absolute).  An operator applies to
296          * all following tokens up to the next operator. */
297         matched = 0;
298         while (*str != 0) {
299                 while (isspace(*str)) /* skip whitespace */
300                         str++;
301
302                 if (*str == 0)
303                         break;
304
305                 if (*str == '+' || *str == '-') {
306                         op = *str++;
307
308                         /* op on first token == relative */
309                         if (!matched)
310                                 m = *mask;
311
312                         while (isspace(*str)) /* skip whitespace */
313                                 str++;
314
315                         if (*str == 0)          /* trailing op */
316                                 return -EINVAL;
317                 }
318
319                 /* find token length */
320                 for (n = 0; str[n] != 0 && !isspace(str[n]); n++);
321
322                 /* match token */
323                 if (spl_debug_token2mask(&t, str, n, is_subsys) != 0)
324                         return -EINVAL;
325
326                 matched = 1;
327                 if (op == '-')
328                         m &= ~t;
329                 else
330                         m |= t;
331
332                 str += n;
333         }
334
335         if (!matched)
336                 return -EINVAL;
337
338         *mask = m;
339         return 0;
340 }
341
342 static void
343 spl_debug_dumplog_internal(dumplog_priv_t *dp)
344 {
345         void *journal_info;
346
347         journal_info = current->journal_info;
348         current->journal_info = NULL;
349
350         snprintf(spl_debug_file_name, sizeof(spl_debug_file_path) - 1,
351                  "%s.%ld.%ld", spl_debug_file_path,
352                  get_seconds(), (long)dp->dp_pid);
353         printk("SPL: Dumping log to %s\n", spl_debug_file_name);
354         spl_debug_dump_all_pages(dp, spl_debug_file_name);
355
356         current->journal_info = journal_info;
357 }
358
359 static int
360 spl_debug_dumplog_thread(void *arg)
361 {
362         dumplog_priv_t *dp = (dumplog_priv_t *)arg;
363
364         spl_debug_dumplog_internal(dp);
365         atomic_set(&dp->dp_done, 1);
366         wake_up(&dp->dp_waitq);
367         complete_and_exit(NULL, 0);
368
369         return 0; /* Unreachable */
370 }
371
372 /* When flag is set do not use a new thread for the debug dump */
373 int
374 spl_debug_dumplog(int flags)
375 {
376         struct task_struct *tsk;
377         dumplog_priv_t dp;
378
379         init_waitqueue_head(&dp.dp_waitq);
380         dp.dp_pid = current->pid;
381         dp.dp_flags = flags;
382         atomic_set(&dp.dp_done, 0);
383
384         if (dp.dp_flags & DL_NOTHREAD) {
385                 spl_debug_dumplog_internal(&dp);
386         } else {
387
388                 tsk = kthread_create(spl_debug_dumplog_thread,(void *)&dp,"spl_debug");
389                 if (tsk == NULL)
390                         return -ENOMEM;
391
392                 wake_up_process(tsk);
393                 wait_event(dp.dp_waitq, atomic_read(&dp.dp_done));
394         }
395
396         return 0;
397 }
398 EXPORT_SYMBOL(spl_debug_dumplog);
399
400 static char *
401 trace_get_console_buffer(void)
402 {
403         int  cpu = get_cpu();
404         int  idx;
405
406         if (in_irq()) {
407                 idx = 0;
408         } else if (in_softirq()) {
409                 idx = 1;
410         } else {
411                 idx = 2;
412         }
413
414         return trace_console_buffers[cpu][idx];
415 }
416
417 static void
418 trace_put_console_buffer(char *buffer)
419 {
420         put_cpu();
421 }
422
423 static int
424 trace_lock_tcd(struct trace_cpu_data *tcd)
425 {
426         __ASSERT(tcd->tcd_type < TCD_TYPE_MAX);
427
428         spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
429
430         return 1;
431 }
432
433 static void
434 trace_unlock_tcd(struct trace_cpu_data *tcd)
435 {
436         __ASSERT(tcd->tcd_type < TCD_TYPE_MAX);
437
438         spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
439 }
440
441 static struct trace_cpu_data *
442 trace_get_tcd(void)
443 {
444         int cpu;
445         struct trace_cpu_data *tcd;
446
447         cpu = get_cpu();
448         if (in_irq())
449                 tcd = &(*trace_data[TCD_TYPE_IRQ])[cpu].tcd;
450         else if (in_softirq())
451                 tcd = &(*trace_data[TCD_TYPE_SOFTIRQ])[cpu].tcd;
452         else
453                 tcd = &(*trace_data[TCD_TYPE_PROC])[cpu].tcd;
454
455         trace_lock_tcd(tcd);
456
457         return tcd;
458 }
459
460 static void
461 trace_put_tcd (struct trace_cpu_data *tcd)
462 {
463         trace_unlock_tcd(tcd);
464
465         put_cpu();
466 }
467
468 static void
469 trace_set_debug_header(struct spl_debug_header *header, int subsys,
470                        int mask, const int line, unsigned long stack)
471 {
472         struct timeval tv;
473
474         do_gettimeofday(&tv);
475
476         header->ph_subsys = subsys;
477         header->ph_mask = mask;
478         header->ph_cpu_id = smp_processor_id();
479         header->ph_sec = (__u32)tv.tv_sec;
480         header->ph_usec = tv.tv_usec;
481         header->ph_stack = stack;
482         header->ph_pid = current->pid;
483         header->ph_line_num = line;
484
485         return;
486 }
487
488 static void
489 trace_print_to_console(struct spl_debug_header *hdr, int mask, const char *buf,
490                        int len, const char *file, const char *fn)
491 {
492         char *prefix = "SPL", *ptype = NULL;
493
494         if ((mask & D_EMERG) != 0) {
495                 prefix = "SPLError";
496                 ptype = KERN_EMERG;
497         } else if ((mask & D_ERROR) != 0) {
498                 prefix = "SPLError";
499                 ptype = KERN_ERR;
500         } else if ((mask & D_WARNING) != 0) {
501                 prefix = "SPL";
502                 ptype = KERN_WARNING;
503         } else if ((mask & (D_CONSOLE | spl_debug_printk)) != 0) {
504                 prefix = "SPL";
505                 ptype = KERN_INFO;
506         }
507
508         if ((mask & D_CONSOLE) != 0) {
509                 printk("%s%s: %.*s", ptype, prefix, len, buf);
510         } else {
511                 printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix,
512                        hdr->ph_pid, hdr->ph_stack, file,
513                        hdr->ph_line_num, fn, len, buf);
514         }
515
516         return;
517 }
518
519 static int
520 trace_max_debug_mb(void)
521 {
522         return MAX(512, ((num_physpages >> (20 - PAGE_SHIFT)) * 80) / 100);
523 }
524
525 static struct trace_page *
526 tage_alloc(int gfp)
527 {
528         struct page *page;
529         struct trace_page *tage;
530
531         page = alloc_pages(gfp | __GFP_NOWARN, 0);
532         if (page == NULL)
533                 return NULL;
534
535         tage = kmalloc(sizeof(*tage), gfp);
536         if (tage == NULL) {
537                 __free_pages(page, 0);
538                 return NULL;
539         }
540
541         tage->page = page;
542         atomic_inc(&trace_tage_allocated);
543
544         return tage;
545 }
546
547 static void
548 tage_free(struct trace_page *tage)
549 {
550         __ASSERT(tage != NULL);
551         __ASSERT(tage->page != NULL);
552
553         __free_pages(tage->page, 0);
554         kfree(tage);
555         atomic_dec(&trace_tage_allocated);
556 }
557
558 static struct trace_page *
559 tage_from_list(struct list_head *list)
560 {
561         return list_entry(list, struct trace_page, linkage);
562 }
563
564 static void
565 tage_to_tail(struct trace_page *tage, struct list_head *queue)
566 {
567         __ASSERT(tage != NULL);
568         __ASSERT(queue != NULL);
569
570         list_move_tail(&tage->linkage, queue);
571 }
572
573 /* try to return a page that has 'len' bytes left at the end */
574 static struct trace_page *
575 trace_get_tage_try(struct trace_cpu_data *tcd, unsigned long len)
576 {
577         struct trace_page *tage;
578
579         if (tcd->tcd_cur_pages > 0) {
580                 __ASSERT(!list_empty(&tcd->tcd_pages));
581                 tage = tage_from_list(tcd->tcd_pages.prev);
582                 if (tage->used + len <= PAGE_SIZE)
583                         return tage;
584         }
585
586         if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
587                 if (tcd->tcd_cur_stock_pages > 0) {
588                         tage = tage_from_list(tcd->tcd_stock_pages.prev);
589                         tcd->tcd_cur_stock_pages--;
590                         list_del_init(&tage->linkage);
591                 } else {
592                         tage = tage_alloc(GFP_ATOMIC);
593                         if (tage == NULL) {
594                                 printk(KERN_WARNING
595                                        "failure to allocate a tage (%ld)\n",
596                                        tcd->tcd_cur_pages);
597                                 return NULL;
598                         }
599                 }
600
601                 tage->used = 0;
602                 tage->cpu = smp_processor_id();
603                 tage->type = tcd->tcd_type;
604                 list_add_tail(&tage->linkage, &tcd->tcd_pages);
605                 tcd->tcd_cur_pages++;
606
607                 return tage;
608         }
609
610         return NULL;
611 }
612
613 /* return a page that has 'len' bytes left at the end */
614 static struct trace_page *
615 trace_get_tage(struct trace_cpu_data *tcd, unsigned long len)
616 {
617         struct trace_page *tage;
618
619         __ASSERT(len <= PAGE_SIZE);
620
621         tage = trace_get_tage_try(tcd, len);
622         if (tage)
623                 return tage;
624
625         if (tcd->tcd_cur_pages > 0) {
626                 tage = tage_from_list(tcd->tcd_pages.next);
627                 tage->used = 0;
628                 tage_to_tail(tage, &tcd->tcd_pages);
629         }
630
631         return tage;
632 }
633
634 int
635 spl_debug_vmsg(spl_debug_limit_state_t *cdls, int subsys, int mask,
636                const char *file, const char *fn, const int line,
637                const char *format1, va_list args, const char *format2, ...)
638 {
639         struct trace_cpu_data   *tcd = NULL;
640         struct spl_debug_header header = { 0, };
641         struct trace_page       *tage;
642         /* string_buf is used only if tcd != NULL, and is always set then */
643         char                    *string_buf = NULL;
644         char                    *debug_buf;
645         int                      known_size;
646         int                      needed = 85; /* average message length */
647         int                      max_nob;
648         va_list                  ap;
649         int                      i;
650         int                      remain;
651
652         if (strchr(file, '/'))
653                 file = strrchr(file, '/') + 1;
654
655         trace_set_debug_header(&header, subsys, mask, line, CDEBUG_STACK());
656
657         tcd = trace_get_tcd();
658         if (tcd == NULL)
659                 goto console;
660
661         if (tcd->tcd_shutting_down) {
662                 trace_put_tcd(tcd);
663                 tcd = NULL;
664                 goto console;
665         }
666
667         known_size = strlen(file) + 1;
668         if (fn)
669                 known_size += strlen(fn) + 1;
670
671         if (spl_debug_binary)
672                 known_size += sizeof(header);
673
674         /* '2' used because vsnprintf returns real size required for output
675          * _without_ terminating NULL. */
676         for (i = 0; i < 2; i++) {
677                 tage = trace_get_tage(tcd, needed + known_size + 1);
678                 if (tage == NULL) {
679                         if (needed + known_size > PAGE_SIZE)
680                                 mask |= D_ERROR;
681
682                         trace_put_tcd(tcd);
683                         tcd = NULL;
684                         goto console;
685                 }
686
687                 string_buf = (char *)page_address(tage->page) +
688                              tage->used + known_size;
689
690                 max_nob = PAGE_SIZE - tage->used - known_size;
691                 if (max_nob <= 0) {
692                         printk(KERN_EMERG "negative max_nob: %i\n", max_nob);
693                         mask |= D_ERROR;
694                         trace_put_tcd(tcd);
695                         tcd = NULL;
696                         goto console;
697                 }
698
699                 needed = 0;
700                 if (format1) {
701                         va_copy(ap, args);
702                         needed = vsnprintf(string_buf, max_nob, format1, ap);
703                         va_end(ap);
704                 }
705
706                 if (format2) {
707                         remain = max_nob - needed;
708                         if (remain < 0)
709                                 remain = 0;
710
711                         va_start(ap, format2);
712                         needed += vsnprintf(string_buf+needed, remain, format2, ap);
713                         va_end(ap);
714                 }
715
716                 if (needed < max_nob)
717                         break;
718         }
719
720         header.ph_len = known_size + needed;
721         debug_buf = (char *)page_address(tage->page) + tage->used;
722
723         if (spl_debug_binary) {
724                 memcpy(debug_buf, &header, sizeof(header));
725                 tage->used += sizeof(header);
726                 debug_buf += sizeof(header);
727         }
728
729         strcpy(debug_buf, file);
730         tage->used += strlen(file) + 1;
731         debug_buf += strlen(file) + 1;
732
733         if (fn) {
734                 strcpy(debug_buf, fn);
735                 tage->used += strlen(fn) + 1;
736                 debug_buf += strlen(fn) + 1;
737         }
738
739         __ASSERT(debug_buf == string_buf);
740
741         tage->used += needed;
742         __ASSERT (tage->used <= PAGE_SIZE);
743
744 console:
745         if ((mask & spl_debug_printk) == 0) {
746                 /* no console output requested */
747                 if (tcd != NULL)
748                         trace_put_tcd(tcd);
749                 return 1;
750         }
751
752         if (cdls != NULL) {
753                 if (spl_console_ratelimit && cdls->cdls_next != 0 &&
754                     !time_before(cdls->cdls_next, jiffies)) {
755                         /* skipping a console message */
756                         cdls->cdls_count++;
757                         if (tcd != NULL)
758                                 trace_put_tcd(tcd);
759                         return 1;
760                 }
761
762                 if (time_before(cdls->cdls_next + spl_console_max_delay +
763                                 (10 * HZ), jiffies)) {
764                         /* last timeout was a long time ago */
765                         cdls->cdls_delay /= spl_console_backoff * 4;
766                 } else {
767                         cdls->cdls_delay *= spl_console_backoff;
768
769                         if (cdls->cdls_delay < spl_console_min_delay)
770                                 cdls->cdls_delay = spl_console_min_delay;
771                         else if (cdls->cdls_delay > spl_console_max_delay)
772                                 cdls->cdls_delay = spl_console_max_delay;
773                 }
774
775                 /* ensure cdls_next is never zero after it's been seen */
776                 cdls->cdls_next = (jiffies + cdls->cdls_delay) | 1;
777         }
778
779         if (tcd != NULL) {
780                 trace_print_to_console(&header, mask, string_buf, needed, file, fn);
781                 trace_put_tcd(tcd);
782         } else {
783                 string_buf = trace_get_console_buffer();
784
785                 needed = 0;
786                 if (format1 != NULL) {
787                         va_copy(ap, args);
788                         needed = vsnprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, format1, ap);
789                         va_end(ap);
790                 }
791                 if (format2 != NULL) {
792                         remain = TRACE_CONSOLE_BUFFER_SIZE - needed;
793                         if (remain > 0) {
794                                 va_start(ap, format2);
795                                 needed += vsnprintf(string_buf+needed, remain, format2, ap);
796                                 va_end(ap);
797                         }
798                 }
799                 trace_print_to_console(&header, mask,
800                                  string_buf, needed, file, fn);
801
802                 trace_put_console_buffer(string_buf);
803         }
804
805         if (cdls != NULL && cdls->cdls_count != 0) {
806                 string_buf = trace_get_console_buffer();
807
808                 needed = snprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE,
809                          "Skipped %d previous similar message%s\n",
810                          cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : "");
811
812                 trace_print_to_console(&header, mask,
813                                  string_buf, needed, file, fn);
814
815                 trace_put_console_buffer(string_buf);
816                 cdls->cdls_count = 0;
817         }
818
819         return 0;
820 }
821 EXPORT_SYMBOL(spl_debug_vmsg);
822
823 /* Do the collect_pages job on a single CPU: assumes that all other
824  * CPUs have been stopped during a panic.  If this isn't true for
825  * some arch, this will have to be implemented separately in each arch.
826  */
827 static void
828 collect_pages_from_single_cpu(struct page_collection *pc)
829 {
830         struct trace_cpu_data *tcd;
831         int i, j;
832
833         tcd_for_each(tcd, i, j) {
834                 list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
835                 tcd->tcd_cur_pages = 0;
836         }
837 }
838
839 static void
840 collect_pages_on_all_cpus(struct page_collection *pc)
841 {
842         struct trace_cpu_data *tcd;
843         int i, cpu;
844
845         spin_lock(&pc->pc_lock);
846         for_each_possible_cpu(cpu) {
847                 tcd_for_each_type_lock(tcd, i, cpu) {
848                         list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
849                         tcd->tcd_cur_pages = 0;
850                 }
851         }
852         spin_unlock(&pc->pc_lock);
853 }
854
855 static void
856 collect_pages(dumplog_priv_t *dp, struct page_collection *pc)
857 {
858         INIT_LIST_HEAD(&pc->pc_pages);
859
860         if (spl_panic_in_progress || dp->dp_flags & DL_SINGLE_CPU)
861                 collect_pages_from_single_cpu(pc);
862         else
863                 collect_pages_on_all_cpus(pc);
864 }
865
866 static void
867 put_pages_back_on_all_cpus(struct page_collection *pc)
868 {
869         struct trace_cpu_data *tcd;
870         struct list_head *cur_head;
871         struct trace_page *tage;
872         struct trace_page *tmp;
873         int i, cpu;
874
875         spin_lock(&pc->pc_lock);
876
877         for_each_possible_cpu(cpu) {
878                 tcd_for_each_type_lock(tcd, i, cpu) {
879                         cur_head = tcd->tcd_pages.next;
880
881                         list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
882                                                  linkage) {
883
884                                 __ASSERT_TAGE_INVARIANT(tage);
885
886                                 if (tage->cpu != cpu || tage->type != i)
887                                         continue;
888
889                                 tage_to_tail(tage, cur_head);
890                                 tcd->tcd_cur_pages++;
891                         }
892                 }
893         }
894
895         spin_unlock(&pc->pc_lock);
896 }
897
898 static void
899 put_pages_back(struct page_collection *pc)
900 {
901         if (!spl_panic_in_progress)
902                 put_pages_back_on_all_cpus(pc);
903 }
904
905 static struct file *
906 trace_filp_open (const char *name, int flags, int mode, int *err)
907 {
908         struct file *filp = NULL;
909         int rc;
910
911         filp = filp_open(name, flags, mode);
912         if (IS_ERR(filp)) {
913                 rc = PTR_ERR(filp);
914                 printk(KERN_ERR "SPL: Can't open %s file: %d\n", name, rc);
915                 if (err)
916                         *err = rc;
917                 filp = NULL;
918         }
919         return filp;
920 }
921
922 #define trace_filp_write(fp, b, s, p)  (fp)->f_op->write((fp), (b), (s), p)
923 #define trace_filp_fsync(fp)           (fp)->f_op->fsync((fp),(fp)->f_dentry,1)
924 #define trace_filp_close(f)            filp_close(f, NULL)
925 #define trace_filp_poff(f)             (&(f)->f_pos)
926
927 static int
928 spl_debug_dump_all_pages(dumplog_priv_t *dp, char *filename)
929 {
930         struct page_collection pc;
931         struct file *filp;
932         struct trace_page *tage;
933         struct trace_page *tmp;
934         mm_segment_t oldfs;
935         int rc = 0;
936
937         down_write(&trace_sem);
938
939         filp = trace_filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE,
940                                0600, &rc);
941         if (filp == NULL) {
942                 if (rc != -EEXIST)
943                         printk(KERN_ERR "SPL: Can't open %s for dump: %d\n",
944                                filename, rc);
945                 goto out;
946         }
947
948         spin_lock_init(&pc.pc_lock);
949         collect_pages(dp, &pc);
950         if (list_empty(&pc.pc_pages)) {
951                 rc = 0;
952                 goto close;
953         }
954
955         oldfs = get_fs();
956         set_fs(get_ds());
957
958         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
959                 __ASSERT_TAGE_INVARIANT(tage);
960
961                 rc = trace_filp_write(filp, page_address(tage->page),
962                                       tage->used, trace_filp_poff(filp));
963                 if (rc != (int)tage->used) {
964                         printk(KERN_WARNING "SPL: Wanted to write %u "
965                                "but wrote %d\n", tage->used, rc);
966                         put_pages_back(&pc);
967                         __ASSERT(list_empty(&pc.pc_pages));
968                         break;
969                 }
970                 list_del(&tage->linkage);
971                 tage_free(tage);
972         }
973
974         set_fs(oldfs);
975
976         rc = trace_filp_fsync(filp);
977         if (rc)
978                 printk(KERN_ERR "SPL: Unable to sync: %d\n", rc);
979  close:
980         trace_filp_close(filp);
981  out:
982         up_write(&trace_sem);
983
984         return rc;
985 }
986
987 static void
988 spl_debug_flush_pages(void)
989 {
990         dumplog_priv_t dp;
991         struct page_collection pc;
992         struct trace_page *tage;
993         struct trace_page *tmp;
994
995         spin_lock_init(&pc.pc_lock);
996         init_waitqueue_head(&dp.dp_waitq);
997         dp.dp_pid = current->pid;
998         dp.dp_flags = 0;
999         atomic_set(&dp.dp_done, 0);
1000
1001         collect_pages(&dp, &pc);
1002         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
1003                 __ASSERT_TAGE_INVARIANT(tage);
1004                 list_del(&tage->linkage);
1005                 tage_free(tage);
1006         }
1007 }
1008
1009 unsigned long
1010 spl_debug_set_mask(unsigned long mask) {
1011         spl_debug_mask = mask;
1012         return 0;
1013 }
1014 EXPORT_SYMBOL(spl_debug_set_mask);
1015
1016 unsigned long
1017 spl_debug_get_mask(void) {
1018         return spl_debug_mask;
1019 }
1020 EXPORT_SYMBOL(spl_debug_get_mask);
1021
1022 unsigned long
1023 spl_debug_set_subsys(unsigned long subsys) {
1024         spl_debug_subsys = subsys;
1025         return 0;
1026 }
1027 EXPORT_SYMBOL(spl_debug_set_subsys);
1028
1029 unsigned long
1030 spl_debug_get_subsys(void) {
1031         return spl_debug_subsys;
1032 }
1033 EXPORT_SYMBOL(spl_debug_get_subsys);
1034
1035 int
1036 spl_debug_set_mb(int mb)
1037 {
1038         int i, j, pages;
1039         int limit = trace_max_debug_mb();
1040         struct trace_cpu_data *tcd;
1041
1042         if (mb < num_possible_cpus()) {
1043                 printk(KERN_ERR "SPL: Refusing to set debug buffer size to "
1044                        "%dMB - lower limit is %d\n", mb, num_possible_cpus());
1045                 return -EINVAL;
1046         }
1047
1048         if (mb > limit) {
1049                 printk(KERN_ERR "SPL: Refusing to set debug buffer size to "
1050                        "%dMB - upper limit is %d\n", mb, limit);
1051                 return -EINVAL;
1052         }
1053
1054         mb /= num_possible_cpus();
1055         pages = mb << (20 - PAGE_SHIFT);
1056
1057         down_write(&trace_sem);
1058
1059         tcd_for_each(tcd, i, j)
1060                 tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
1061
1062         up_write(&trace_sem);
1063
1064         return 0;
1065 }
1066 EXPORT_SYMBOL(spl_debug_set_mb);
1067
1068 int
1069 spl_debug_get_mb(void)
1070 {
1071         int i, j;
1072         struct trace_cpu_data *tcd;
1073         int total_pages = 0;
1074
1075         down_read(&trace_sem);
1076
1077         tcd_for_each(tcd, i, j)
1078                 total_pages += tcd->tcd_max_pages;
1079
1080         up_read(&trace_sem);
1081
1082         return (total_pages >> (20 - PAGE_SHIFT)) + 1;
1083 }
1084 EXPORT_SYMBOL(spl_debug_get_mb);
1085
1086 void spl_debug_dumpstack(struct task_struct *tsk)
1087 {
1088         extern void show_task(struct task_struct *);
1089
1090         if (tsk == NULL)
1091                 tsk = current;
1092
1093         printk("SPL: Showing stack for process %d\n", tsk->pid);
1094         dump_stack();
1095 }
1096 EXPORT_SYMBOL(spl_debug_dumpstack);
1097
1098 void spl_debug_bug(char *file, const char *func, const int line, int flags)
1099 {
1100         spl_debug_catastrophe = 1;
1101         spl_debug_msg(NULL, 0, D_EMERG, file, func, line, "SBUG\n");
1102
1103         if (in_interrupt()) {
1104                 panic("SBUG in interrupt.\n");
1105                 /* not reached */
1106         }
1107
1108         if (in_atomic() || irqs_disabled())
1109                 flags |= DL_NOTHREAD;
1110
1111         /* Ensure all debug pages and dumped by current cpu */
1112          if (spl_debug_panic_on_bug)
1113                 spl_panic_in_progress = 1;
1114
1115         spl_debug_dumpstack(NULL);
1116         spl_debug_dumplog(flags);
1117
1118         if (spl_debug_panic_on_bug)
1119                 panic("SBUG");
1120
1121         set_task_state(current, TASK_UNINTERRUPTIBLE);
1122         while (1)
1123                 schedule();
1124 }
1125 EXPORT_SYMBOL(spl_debug_bug);
1126
1127 int
1128 spl_debug_clear_buffer(void)
1129 {
1130         spl_debug_flush_pages();
1131         return 0;
1132 }
1133 EXPORT_SYMBOL(spl_debug_clear_buffer);
1134
1135 int
1136 spl_debug_mark_buffer(char *text)
1137 {
1138         CDEBUG(D_WARNING, "*************************************\n");
1139         CDEBUG(D_WARNING, "DEBUG MARKER: %s\n", text);
1140         CDEBUG(D_WARNING, "*************************************\n");
1141
1142         return 0;
1143 }
1144 EXPORT_SYMBOL(spl_debug_mark_buffer);
1145
1146 static int
1147 trace_init(int max_pages)
1148 {
1149         struct trace_cpu_data *tcd;
1150         int i, j;
1151
1152         init_rwsem(&trace_sem);
1153
1154         /* initialize trace_data */
1155         memset(trace_data, 0, sizeof(trace_data));
1156         for (i = 0; i < TCD_TYPE_MAX; i++) {
1157                 trace_data[i] = kmalloc(sizeof(union trace_data_union) *
1158                                         NR_CPUS, GFP_KERNEL);
1159                 if (trace_data[i] == NULL)
1160                         goto out;
1161         }
1162
1163         tcd_for_each(tcd, i, j) {
1164                 spin_lock_init(&tcd->tcd_lock);
1165                 tcd->tcd_pages_factor = pages_factor[i];
1166                 tcd->tcd_type = i;
1167                 tcd->tcd_cpu = j;
1168                 INIT_LIST_HEAD(&tcd->tcd_pages);
1169                 INIT_LIST_HEAD(&tcd->tcd_stock_pages);
1170                 tcd->tcd_cur_pages = 0;
1171                 tcd->tcd_cur_stock_pages = 0;
1172                 tcd->tcd_max_pages = (max_pages * pages_factor[i]) / 100;
1173                 tcd->tcd_shutting_down = 0;
1174         }
1175
1176         for (i = 0; i < num_possible_cpus(); i++) {
1177                 for (j = 0; j < 3; j++) {
1178                         trace_console_buffers[i][j] =
1179                                 kmalloc(TRACE_CONSOLE_BUFFER_SIZE,
1180                                         GFP_KERNEL);
1181
1182                         if (trace_console_buffers[i][j] == NULL)
1183                                 goto out;
1184                 }
1185        }
1186
1187         return 0;
1188 out:
1189         trace_fini();
1190         printk(KERN_ERR "SPL: Insufficient memory for debug logs\n");
1191         return -ENOMEM;
1192 }
1193
1194 int
1195 debug_init(void)
1196 {
1197         int rc, max = spl_debug_mb;
1198
1199         spl_console_max_delay = SPL_DEFAULT_MAX_DELAY;
1200         spl_console_min_delay = SPL_DEFAULT_MIN_DELAY;
1201
1202         /* If spl_debug_mb is set to an invalid value or uninitialized
1203          * then just make the total buffers smp_num_cpus TCD_MAX_PAGES */
1204         if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 ||
1205             max >= 512 || max < 0) {
1206                 max = TCD_MAX_PAGES;
1207         } else {
1208                 max = (max / num_online_cpus()) << (20 - PAGE_SHIFT);
1209         }
1210
1211         rc = trace_init(max);
1212         if (rc)
1213                 return rc;
1214
1215         return rc;
1216 }
1217
1218 static void
1219 trace_cleanup_on_all_cpus(void)
1220 {
1221         struct trace_cpu_data *tcd;
1222         struct trace_page *tage;
1223         struct trace_page *tmp;
1224         int i, cpu;
1225
1226         for_each_possible_cpu(cpu) {
1227                 tcd_for_each_type_lock(tcd, i, cpu) {
1228                         tcd->tcd_shutting_down = 1;
1229
1230                         list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages,
1231                                                  linkage) {
1232                                 __ASSERT_TAGE_INVARIANT(tage);
1233
1234                                 list_del(&tage->linkage);
1235                                 tage_free(tage);
1236                         }
1237                         tcd->tcd_cur_pages = 0;
1238                 }
1239         }
1240 }
1241
1242 static void
1243 trace_fini(void)
1244 {
1245         int i, j;
1246
1247         trace_cleanup_on_all_cpus();
1248
1249         for (i = 0; i < num_possible_cpus(); i++) {
1250                 for (j = 0; j < 3; j++) {
1251                         if (trace_console_buffers[i][j] != NULL) {
1252                                 kfree(trace_console_buffers[i][j]);
1253                                 trace_console_buffers[i][j] = NULL;
1254                         }
1255                 }
1256         }
1257
1258         for (i = 0; i < TCD_TYPE_MAX && trace_data[i] != NULL; i++) {
1259                 kfree(trace_data[i]);
1260                 trace_data[i] = NULL;
1261         }
1262 }
1263
1264 void
1265 debug_fini(void)
1266 {
1267         trace_fini();
1268 }