From 57d1b1885897c9b079a6355a6d54d684ad003d46 Mon Sep 17 00:00:00 2001 From: behlendo Date: Fri, 18 Apr 2008 23:39:58 +0000 Subject: [PATCH] First commit of lustre style internal debug support. These changes bring over everything lustre had for debugging with two exceptions. I dropped by the debug daemon and upcalls just because it made things a little easier. They can be readded easily enough if we feel they are needed. Everything compiles and seems to work on first inspection but I suspect there are a handful of issues still lingering which I'll be sorting out right away. I just wanted to get all these changes commited and safe. I'm getting a little paranoid about losing them. git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@75 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c --- cmd/Makefile.am | 7 +- cmd/spl.c | 217 +++++++ include/sys/debug.h | 313 ++++++++- include/sys/proc.h | 9 + include/sys/sysmacros.h | 26 +- modules/spl/Makefile.in | 2 + modules/spl/spl-debug.c | 1293 +++++++++++++++++++++++++++++++++++++ modules/spl/spl-generic.c | 356 ++-------- modules/spl/spl-proc.c | 574 ++++++++++++++++ modules/spl/spl-vnode.c | 4 +- 10 files changed, 2464 insertions(+), 337 deletions(-) create mode 100644 cmd/spl.c create mode 100644 include/sys/proc.h create mode 100644 modules/spl/spl-debug.c create mode 100644 modules/spl/spl-proc.c diff --git a/cmd/Makefile.am b/cmd/Makefile.am index d3d747797..3d1a39297 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -1,7 +1,10 @@ DEFAULT_INCLUDES = -I. -I.. -I../lib -AM_CFLAGS = -g -O2 -W -Wall -Wstrict-prototypes -Wshadow +AM_CFLAGS = -g -O2 -W -Wall -Wstrict-prototypes -Wshadow -D__USE_LARGEFILE64 + +sbin_PROGRAMS = spl splat + +spl_SOURCES = spl.c -sbin_PROGRAMS = splat splat_SOURCES = splat.c splat_LDFLAGS = $(top_builddir)/lib/libcommon.la diff --git a/cmd/spl.c b/cmd/spl.c new file mode 100644 index 000000000..866bc8905 --- /dev/null +++ b/cmd/spl.c @@ -0,0 +1,217 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "../include/spl-ctl.h" + +static int spl_debug_mask = ~0; +static int spl_debug_subsystem = ~0; + +/* all strings nul-terminated; only the struct and hdr need to be freed */ +struct dbg_line { + struct spl_debug_header *hdr; + char *file; + char *fn; + char *text; +}; + +static int +cmp_rec(const void *p1, const void *p2) +{ + struct dbg_line *d1 = *(struct dbg_line **)p1; + struct dbg_line *d2 = *(struct dbg_line **)p2; + + if (d1->hdr->ph_sec < d2->hdr->ph_sec) + return -1; + + if (d1->hdr->ph_sec == d2->hdr->ph_sec && + d1->hdr->ph_usec < d2->hdr->ph_usec) + return -1; + + if (d1->hdr->ph_sec == d2->hdr->ph_sec && + d1->hdr->ph_usec == d2->hdr->ph_usec) + return 0; + + return 1; +} + +static void +print_rec(struct dbg_line **linev, int used, FILE *out) +{ + int i; + + for (i = 0; i < used; i++) { + struct dbg_line *line = linev[i]; + struct spl_debug_header *hdr = line->hdr; + + fprintf(out, "%08x:%08x:%u:%u.%06llu:%u:%u:(%s:%u:%s()) %s", + hdr->ph_subsys, hdr->ph_mask, hdr->ph_cpu_id, + hdr->ph_sec, (unsigned long long)hdr->ph_usec, + hdr->ph_stack, hdr->ph_pid, line->file, + hdr->ph_line_num, line->fn, line->text); + free(line->hdr); + free(line); + } + + free(linev); +} + +static int +add_rec(struct dbg_line *line, struct dbg_line ***linevp, int *lenp, int used) +{ + struct dbg_line **linev = *linevp; + + if (used == *lenp) { + int nlen = *lenp + 512; + int nsize = nlen * sizeof(struct dbg_line *); + + linev = *linevp ? realloc(*linevp, nsize) : malloc(nsize); + if (!linev) + return 0; + *linevp = linev; + *lenp = nlen; + } + linev[used] = line; + return 1; +} + +static int +parse_buffer(FILE *in, FILE *out) +{ + struct dbg_line *line; + struct spl_debug_header *hdr; + char buf[4097], *p; + unsigned long dropped = 0, kept = 0; + struct dbg_line **linev = NULL; + const int phl = sizeof(hdr->ph_len); + const int phf = sizeof(hdr->ph_flags); + int rc, linev_len = 0; + + while (1) { + rc = fread(buf, phl + phf, 1, in); + if (rc <= 0) + break; + + hdr = (void *)buf; + if (hdr->ph_len == 0) + break; + if (hdr->ph_len > 4094) { + fprintf(stderr, "unexpected large record: %d bytes. " + "aborting.\n", hdr->ph_len); + break; + } + + rc = fread(buf + phl + phf, 1, hdr->ph_len - phl - phf, in); + if (rc <= 0) + break; + + if (hdr->ph_mask && + (!(spl_debug_subsystem & hdr->ph_subsys) || + (!(spl_debug_mask & hdr->ph_mask)))) { + dropped++; + continue; + } + + line = malloc(sizeof(*line)); + if (line == NULL) { + fprintf(stderr, "malloc failed; printing accumulated " + "records and exiting.\n"); + break; + } + + line->hdr = malloc(hdr->ph_len + 1); + if (line->hdr == NULL) { + free(line); + fprintf(stderr, "malloc failed; printing accumulated " + "records and exiting.\n"); + break; + } + + p = (void *)line->hdr; + memcpy(line->hdr, buf, hdr->ph_len); + p[hdr->ph_len] = '\0'; + + p += sizeof(*hdr); + line->file = p; + p += strlen(line->file) + 1; + line->fn = p; + p += strlen(line->fn) + 1; + line->text = p; + + if (!add_rec(line, &linev, &linev_len, kept)) { + fprintf(stderr, "malloc failed; printing accumulated " + "records and exiting.\n"); + break; + } + kept++; + } + + if (linev) { + qsort(linev, kept, sizeof(struct dbg_line *), cmp_rec); + print_rec(linev, kept, out); + } + + printf("Debug log: %lu lines, %lu kept, %lu dropped.\n", + dropped + kept, kept, dropped); + return 0; +} + +int +main(int argc, char *argv[]) +{ + int fdin, fdout; + FILE *in, *out = stdout; + int rc, o_lf = 0; + + if (argc > 3 || argc < 2) { + fprintf(stderr, "usage: %s [output]\n", argv[0]); + return 0; + } + +#ifdef __USE_LARGEFILE64 + o_lf = O_LARGEFILE; +#endif + + fdin = open(argv[1], O_RDONLY | o_lf); + if (fdin == -1) { + fprintf(stderr, "open(%s) failed: %s\n", argv[1], + strerror(errno)); + return 1; + } + in = fdopen(fdin, "r"); + if (in == NULL) { + fprintf(stderr, "fopen(%s) failed: %s\n", argv[1], + strerror(errno)); + close(fdin); + return 1; + } + if (argc > 2) { + fdout = open(argv[2], O_CREAT | O_TRUNC | O_WRONLY | o_lf, 0600); + if (fdout == -1) { + fprintf(stderr, "open(%s) failed: %s\n", argv[2], + strerror(errno)); + fclose(in); + return 1; + } + out = fdopen(fdout, "w"); + if (out == NULL) { + fprintf(stderr, "fopen(%s) failed: %s\n", argv[2], + strerror(errno)); + fclose(in); + close(fdout); + return 1; + } + } + + rc = parse_buffer(in, out); + + fclose(in); + if (out != stdout) + fclose(out); + + return rc; +} diff --git a/include/sys/debug.h b/include/sys/debug.h index 4a6ed5c3d..a670c0374 100644 --- a/include/sys/debug.h +++ b/include/sys/debug.h @@ -1,11 +1,318 @@ #ifndef _SPL_DEBUG_H #define _SPL_DEBUG_H +#include /* THREAD_SIZE */ +#include + extern unsigned long spl_debug_mask; extern unsigned long spl_debug_subsys; -void __dprintf(const char *file, const char *func, int line, const char *fmt, ...); -void spl_set_debug_mask(unsigned long mask); -void spl_set_debug_subsys(unsigned long mask); +#define S_UNDEFINED 0x00000001 +#define S_ATOMIC 0x00000002 +#define S_KOBJ 0x00000004 +#define S_VNODE 0x00000008 +#define S_TIME 0x00000010 +#define S_RWLOCK 0x00000020 +#define S_THREAD 0x00000040 +#define S_CONDVAR 0x00000080 +#define S_MUTEX 0x00000100 +#define S_RNG 0x00000200 +#define S_TASKQ 0x00000400 +#define S_KMEM 0x00000800 +#define S_DEBUG 0x00001000 +#define S_GENERIC 0x00002000 +#define S_PROC 0x00004000 + +#define D_TRACE 0x00000001 +#define D_INFO 0x00000002 +#define D_WARNING 0x00000004 +#define D_ERROR 0x00000008 +#define D_EMERG 0x00000010 +#define D_CONSOLE 0x00000020 +#define D_IOCTL 0x00000040 +#define D_DPRINTF 0x00000080 +#define D_OTHER 0x00000100 + +#define D_CANTMASK (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE) +#define DEBUG_SUBSYSTEM S_UNDEFINED + +int debug_init(void); +void debug_fini(void); +int spl_debug_mask2str(char *str, int size, unsigned long mask, int is_subsys); +int spl_debug_str2mask(unsigned long *mask, const char *str, int is_subsys); + +extern unsigned long spl_debug_subsys; +extern unsigned long spl_debug_mask; +extern unsigned long spl_debug_printk; +extern int spl_debug_mb; +extern unsigned int spl_debug_binary; +extern unsigned int spl_debug_catastrophe; +extern unsigned int spl_debug_panic_on_bug; +extern char spl_debug_file_path[PATH_MAX]; +extern unsigned int spl_console_ratelimit; +extern long spl_console_max_delay; +extern long spl_console_min_delay; +extern unsigned int spl_console_backoff; +extern unsigned int spl_debug_stack; + +#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT)) +#define TCD_STOCK_PAGES (TCD_MAX_PAGES) +#define TRACE_CONSOLE_BUFFER_SIZE 1024 + +#define SPL_DEFAULT_MAX_DELAY (600 * HZ) +#define SPL_DEFAULT_MIN_DELAY ((HZ + 1) / 2) +#define SPL_DEFAULT_BACKOFF 2 + +typedef struct { + unsigned long cdls_next; + int cdls_count; + long cdls_delay; +} spl_debug_limit_state_t; + +/* Three trace data types */ +typedef enum { + TCD_TYPE_PROC, + TCD_TYPE_SOFTIRQ, + TCD_TYPE_IRQ, + TCD_TYPE_MAX +} tcd_type_t; + +union trace_data_union { + struct trace_cpu_data { + /* pages with trace records not yet processed by tracefiled */ + struct list_head tcd_pages; + /* number of pages on ->tcd_pages */ + unsigned long tcd_cur_pages; + /* Max number of pages allowed on ->tcd_pages */ + unsigned long tcd_max_pages; + + /* + * preallocated pages to write trace records into. Pages from + * ->tcd_stock_pages are moved to ->tcd_pages by spl_debug_msg(). + * + * This list is necessary, because on some platforms it's + * impossible to perform efficient atomic page allocation in a + * non-blockable context. + * + * Such platforms fill ->tcd_stock_pages "on occasion", when + * tracing code is entered in blockable context. + * + * trace_get_tage_try() tries to get a page from + * ->tcd_stock_pages first and resorts to atomic page + * allocation only if this queue is empty. ->tcd_stock_pages + * is replenished when tracing code is entered in blocking + * context (darwin-tracefile.c:trace_get_tcd()). We try to + * maintain TCD_STOCK_PAGES (40 by default) pages in this + * queue. Atomic allocation is only required if more than + * TCD_STOCK_PAGES pagesful are consumed by trace records all + * emitted in non-blocking contexts. Which is quite unlikely. + */ + struct list_head tcd_stock_pages; + /* number of pages on ->tcd_stock_pages */ + unsigned long tcd_cur_stock_pages; + + unsigned short tcd_shutting_down; + unsigned short tcd_cpu; + unsigned short tcd_type; + /* The factors to share debug memory. */ + unsigned short tcd_pages_factor; + } tcd; + char __pad[L1_CACHE_ALIGN(sizeof(struct trace_cpu_data))]; +}; + +extern union trace_data_union (*trace_data[TCD_TYPE_MAX])[NR_CPUS]; + +#define tcd_for_each(tcd, i, j) \ + for (i = 0; trace_data[i] != NULL; i++) \ + for (j = 0, ((tcd) = &(*trace_data[i])[j].tcd); \ + j < num_possible_cpus(); j++, (tcd) = &(*trace_data[i])[j].tcd) + +#define tcd_for_each_type_lock(tcd, i) \ + for (i = 0; trace_data[i] && \ + (tcd = &(*trace_data[i])[smp_processor_id()].tcd) && \ + trace_lock_tcd(tcd); trace_unlock_tcd(tcd), i++) + +struct trace_page { + struct page * page; /* page itself */ + struct list_head linkage; /* Used by lists in trace_data_union */ + unsigned int used; /* number of bytes used within this page */ + unsigned short cpu; /* cpu that owns this page */ + unsigned short type; /* type(context) of this page */ +}; + +struct page_collection { + struct list_head pc_pages; + spinlock_t pc_lock; + int pc_want_daemon_pages; +}; + +/* ASSERTION that is safe to use within the debug system */ +#define __ASSERT(cond) \ +do { \ + if (unlikely(!(cond))) { \ + printk(KERN_ERR "ASSERTION("#cond") failed"); \ + BUG(); \ + } \ +} while (0) + +#define __ASSERT_TAGE_INVARIANT(tage) \ +do { \ + __ASSERT(tage != NULL); \ + __ASSERT(tage->page != NULL); \ + __ASSERT(tage->used <= PAGE_SIZE); \ + __ASSERT(page_count(tage->page) > 0); \ +} while(0) + +/* ASSERTION that will debug log used outside the debug sysytem */ +#define ASSERT(cond) \ +do { \ + if (unlikely(!(cond))) { \ + spl_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \ + __FILE__, __FUNCTION__, __LINE__, \ + "ASSERTION(" #cond ") failed\n"); \ + spl_debug_bug(__FILE__, __FUNCTION__, __LINE__); \ + } \ +} while (0) + +#define ASSERTF(cond, fmt, a...) \ +do { \ + if (unlikely(!(cond))) { \ + spl_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \ + __FILE__, __FUNCTION__, __LINE__, \ + "ASSERTION(" #cond ") failed:" fmt, \ + ## a); \ + spl_debug_bug(__FILE__, __FUNCTION__, __LINE__) \ + } \ +} while (0) + +#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE, FMT, CAST) \ +do { \ + const TYPE __left = (TYPE)(LEFT); \ + const TYPE __right = (TYPE)(RIGHT); \ + if (!(__left OP __right)) { \ + spl_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \ + __FILE__, __FUNCTION__, __LINE__, \ + "VERIFY3(" FMT " " #OP " " FMT ")\n", \ + CAST __left, CAST __right); \ + spl_debug_bug(__FILE__, __FUNCTION__, __LINE__) \ + } \ +} while (0) + +#define VERIFY3S(x,y,z) VERIFY3_IMPL(x, y, z, int64_t, "%ld", (long)) +#define VERIFY3U(x,y,z) VERIFY3_IMPL(x, y, z, uint64_t, "%lu", (unsigned long)) +#define VERIFY3P(x,y,z) VERIFY3_IMPL(x, y, z, uintptr_t, "%p", (void *)) + +#define ASSERT3S(x,y,z) VERIFY3S(x, y, z) +#define ASSERT3U(x,y,z) VERIFY3U(x, y, z) +#define ASSERT3P(x,y,z) VERIFY3P(x, y, z) + +#define VERIFY(x) ASSERT(x) +#define SBUG() spl_debug_bug(__FILE__, __FUNCTION__, __LINE__); + +#define spl_debug_msg(cdls, subsys, mask, file, fn, line, format, a...) \ + spl_debug_vmsg(cdls, subsys, mask, file, fn, \ + line, NULL, NULL, format, ##a) + +#ifdef __ia64__ +#define CDEBUG_STACK() (THREAD_SIZE - \ + ((unsigned long)__builtin_dwarf_cfa() & \ + (THREAD_SIZE - 1))) +#else +#define CDEBUG_STACK() (THREAD_SIZE - \ + ((unsigned long)__builtin_frame_address(0) & \ + (THREAD_SIZE - 1))) +# endif /* __ia64__ */ + +#define __CHECK_STACK(file, func, line) \ +do { \ + unsigned long _stack = CDEBUG_STACK(); \ + \ + if (_stack > (3*THREAD_SIZE/4) && _stack > spl_debug_stack) { \ + spl_debug_stack = _stack; \ + spl_debug_msg(NULL, D_TRACE, D_WARNING, \ + file, func, line, \ + "Exceeded maximum safe stack " \ + "%lu/%lu\n", _stack, THREAD_SIZE); \ + __ASSERT(0); \ + } \ +} while (0) + +#define CHECK_STACK()__CHECK_STACK(__FILE__, __func__, __LINE__) + +#define __CDEBUG(cdls, subsys, mask, format, a...) \ +do { \ + CHECK_STACK(); \ + \ + if (((mask) & D_CANTMASK) != 0 || \ + ((spl_debug_mask & (mask)) != 0 && \ + (spl_debug_subsys & (subsys)) != 0)) \ + spl_debug_msg(cdls, subsys, mask, \ + __FILE__, __FUNCTION__, __LINE__, \ + format, ## a); \ +} while (0) + +#define CDEBUG(mask, format, a...) \ + __CDEBUG(NULL, DEBUG_SUBSYSTEM, mask, format, ## a) + +#define __CDEBUG_LIMIT(subsys, mask, format, a...) \ +do { \ + static spl_debug_limit_state_t cdls; \ + \ + __CDEBUG(&cdls, subsys, mask, format, ## a); \ +} while (0) + +#define CDEBUG_LIMIT(mask, format, a...) \ + __CDEBUG_LIMIT(DEBUG_SUBSYSTEM, mask, format, ## a) + +#define dprintf(fmt, a...) CDEBUG_LIMIT(D_INFO, fmt, ## a) +#define CWARN(fmt, a...) CDEBUG_LIMIT(D_WARNING, fmt, ## a) +#define CERROR(fmt, a...) CDEBUG_LIMIT(D_ERROR, fmt, ## a) +#define CEMERG(fmt, a...) CDEBUG_LIMIT(D_EMERG, fmt, ## a) +#define CONSOLE(mask, fmt, a...) CDEBUG(D_CONSOLE | (mask), fmt, ## a) + +#define GOTO(label, rc) \ +do { \ + long GOTO__ret = (long)(rc); \ + CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \ + #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\ + (signed long)GOTO__ret); \ + goto label; \ +} while (0) + +#define RETURN(rc) \ +do { \ + typeof(rc) RETURN__ret = (rc); \ + CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \ + (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\ + return RETURN__ret; \ +} while (0) + +#define ENTRY \ +do { \ + CDEBUG(D_TRACE, "Process entered\n"); \ +} while (0) + +#define EXIT \ +do { \ + CDEBUG(D_TRACE, "Process leaving\n"); \ +} while(0) + +extern int spl_debug_vmsg(spl_debug_limit_state_t *cdls, int subsys, int mask, + const char *file, const char *fn, const int line, + const char *format1, va_list args, const char *format2, ...); + +extern unsigned long spl_debug_set_mask(unsigned long mask); +extern unsigned long spl_debug_get_mask(void); +extern unsigned long spl_debug_set_subsys(unsigned long mask); +extern unsigned long spl_debug_get_subsys(void); +extern int spl_debug_set_mb(int mb); +extern int spl_debug_get_mb(void); + +extern int spl_debug_dumplog(void); +extern void spl_debug_dumpstack(struct task_struct *tsk); +extern void spl_debug_bug(char *file, const char *func, const int line); + +extern int spl_debug_clear_buffer(void); +extern int spl_debug_mark_buffer(char *text); #endif /* SPL_DEBUG_H */ diff --git a/include/sys/proc.h b/include/sys/proc.h new file mode 100644 index 000000000..e77ea5fb2 --- /dev/null +++ b/include/sys/proc.h @@ -0,0 +1,9 @@ +#ifndef _SPL_PROC_H +#define _SPL_PROC_H + +#include + +int proc_init(void); +void proc_fini(void); + +#endif /* SPL_PROC_H */ diff --git a/include/sys/sysmacros.h b/include/sys/sysmacros.h index d5f8597c2..62a51aa38 100644 --- a/include/sys/sysmacros.h +++ b/include/sys/sysmacros.h @@ -6,6 +6,7 @@ extern "C" { #endif #include +#include #include #include #include @@ -73,26 +74,6 @@ extern "C" { #define bzero(ptr,size) memset(ptr,0,size) #define bcopy(src,dest,size) memcpy(dest,src,size) #define bcmp(src,dest,size) memcmp((src), (dest), (size_t)(size)) -#define ASSERT(x) BUG_ON(!(x)) -#define VERIFY(x) ASSERT(x) - -#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE, FMT, CAST) do { \ - const TYPE __left = (TYPE)(LEFT); \ - const TYPE __right = (TYPE)(RIGHT); \ - if (!(__left OP __right)) { \ - printk("spl: Error VERIFY3(" FMT " " #OP " " FMT ")\n", \ - CAST __left, CAST __right); \ - BUG(); \ - } \ -} while (0) - -#define VERIFY3S(x, y, z) VERIFY3_IMPL(x, y, z, int64_t, "%ld", (long)) -#define VERIFY3U(x, y, z) VERIFY3_IMPL(x, y, z, uint64_t, "%lu", (unsigned long)) -#define VERIFY3P(x, y, z) VERIFY3_IMPL(x, y, z, uintptr_t, "%p", (void *)) - -#define ASSERT3S(x, y, z) VERIFY3S(x, y, z) -#define ASSERT3U(x, y, z) VERIFY3U(x, y, z) -#define ASSERT3P(x, y, z) VERIFY3P(x, y, z) /* Dtrace probes do not exist in the linux kernel */ @@ -117,12 +98,13 @@ extern "C" { #define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) ((void)0) /* Missing globals */ +extern unsigned long spl_hostid; +extern char spl_hw_serial[11]; extern int p0; -extern char hw_serial[11]; /* Missing misc functions */ extern int highbit(unsigned long i); -extern int ddi_strtoul(const char *hw_serial, char **nptr, +extern int ddi_strtoul(const char *str, char **nptr, int base, unsigned long *result); #define makedevice(maj,min) makedev(maj,min) diff --git a/modules/spl/Makefile.in b/modules/spl/Makefile.in index 7f16904ef..ff283dfd6 100644 --- a/modules/spl/Makefile.in +++ b/modules/spl/Makefile.in @@ -9,6 +9,8 @@ CPPFLAGS += @KERNELCPPFLAGS@ # Solaris porting layer module obj-m := spl.o +spl-objs += spl-debug.o +spl-objs += spl-proc.o spl-objs += spl-kmem.o spl-objs += spl-thread.o spl-objs += spl-taskq.o diff --git a/modules/spl/spl-debug.c b/modules/spl/spl-debug.c new file mode 100644 index 000000000..f36a1c39e --- /dev/null +++ b/modules/spl/spl-debug.c @@ -0,0 +1,1293 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Zach Brown + * Author: Phil Schwan + * Author: Brian Behlendorf + * + * This file was originally part of Lustre, http://www.lustre.org. + * but has subsequently been adapted for use in the SPL in + * accordance with the GPL. + * + * SPL is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * SPL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with SPL; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "config.h" + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_DEBUG + +unsigned long spl_debug_subsys = ~0; +EXPORT_SYMBOL(spl_debug_subsys); +module_param(spl_debug_subsys, long, 0644); +MODULE_PARM_DESC(spl_debug_subsys, "Subsystem debugging level mask."); + +unsigned long spl_debug_mask = (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE); +EXPORT_SYMBOL(spl_debug_mask); +module_param(spl_debug_mask, long, 0644); +MODULE_PARM_DESC(spl_debug_mask, "Debugging level mask."); + +unsigned long spl_debug_printk = D_CANTMASK; +EXPORT_SYMBOL(spl_debug_printk); +module_param(spl_debug_printk, long, 0644); +MODULE_PARM_DESC(spl_debug_printk, "Console printk level mask."); + +int spl_debug_mb = -1; +EXPORT_SYMBOL(spl_debug_mb); +module_param(spl_debug_mb, int, 0644); +MODULE_PARM_DESC(spl_debug_mb, "Total debug buffer size."); + +unsigned int spl_debug_binary = 1; +EXPORT_SYMBOL(spl_debug_binary); + +unsigned int spl_debug_catastrophe; +EXPORT_SYMBOL(spl_debug_catastrophe); + +unsigned int spl_debug_panic_on_bug = 1; +EXPORT_SYMBOL(spl_debug_panic_on_bug); +module_param(spl_debug_panic_on_bug, int, 0644); +MODULE_PARM_DESC(spl_debug_panic_on_bug, "Panic on BUG"); + +static char spl_debug_file_name[PATH_MAX]; +char spl_debug_file_path[PATH_MAX] = "/var/dumps/spl-log"; + +unsigned int spl_console_ratelimit = 1; +EXPORT_SYMBOL(spl_console_ratelimit); + +long spl_console_max_delay; +EXPORT_SYMBOL(spl_console_max_delay); + +long spl_console_min_delay; +EXPORT_SYMBOL(spl_console_min_delay); + +unsigned int spl_console_backoff = SPL_DEFAULT_BACKOFF; +EXPORT_SYMBOL(spl_console_backoff); + +unsigned int spl_debug_stack; +EXPORT_SYMBOL(spl_debug_stack); + +static int spl_panic_in_progress; + +union trace_data_union (*trace_data[TCD_TYPE_MAX])[NR_CPUS] __cacheline_aligned; +char *trace_console_buffers[NR_CPUS][3]; +struct rw_semaphore trace_sem; +atomic_t trace_tage_allocated = ATOMIC_INIT(0); + +static int panic_notifier(struct notifier_block *, unsigned long, void *); +static int spl_debug_dump_all_pages(char *); +static void trace_fini(void); + + +/* Memory percentage breakdown by type */ +static unsigned int pages_factor[TCD_TYPE_MAX] = { + 80, /* 80% pages for TCD_TYPE_PROC */ + 10, /* 10% pages for TCD_TYPE_SOFTIRQ */ + 10 /* 10% pages for TCD_TYPE_IRQ */ +}; + +static struct notifier_block spl_panic_notifier = { + notifier_call: panic_notifier, + next: NULL, + priority: 10000 +}; + +const char * +spl_debug_subsys2str(int subsys) +{ + switch (subsys) { + default: + return NULL; + case S_UNDEFINED: + return "undefined"; + case S_ATOMIC: + return "atomic"; + case S_KOBJ: + return "kobj"; + case S_VNODE: + return "vnode"; + case S_TIME: + return "time"; + case S_RWLOCK: + return "rwlock"; + case S_THREAD: + return "thread"; + case S_CONDVAR: + return "condvar"; + case S_MUTEX: + return "mutex"; + case S_RNG: + return "rng"; + case S_TASKQ: + return "taskq"; + case S_KMEM: + return "kmem"; + } +} + +const char * +spl_debug_dbg2str(int debug) +{ + switch (debug) { + default: + return NULL; + case D_TRACE: + return "trace"; + case D_INFO: + return "info"; + case D_WARNING: + return "warning"; + case D_ERROR: + return "error"; + case D_EMERG: + return "emerg"; + case D_CONSOLE: + return "console"; + case D_IOCTL: + return "ioctl"; + case D_DPRINTF: + return "dprintf"; + case D_OTHER: + return "other"; + } +} + +int +spl_debug_mask2str(char *str, int size, unsigned long mask, int is_subsys) +{ + const char *(*fn)(int bit) = is_subsys ? spl_debug_subsys2str : + spl_debug_dbg2str; + const char *token; + int i, bit, len = 0; + + if (mask == 0) { /* "0" */ + if (size > 0) + str[0] = '0'; + len = 1; + } else { /* space-separated tokens */ + for (i = 0; i < 32; i++) { + bit = 1 << i; + + if ((mask & bit) == 0) + continue; + + token = fn(bit); + if (token == NULL) /* unused bit */ + continue; + + if (len > 0) { /* separator? */ + if (len < size) + str[len] = ' '; + len++; + } + + while (*token != 0) { + if (len < size) + str[len] = *token; + token++; + len++; + } + } + } + + /* terminate 'str' */ + if (len < size) + str[len] = 0; + else + str[size - 1] = 0; + + return len; +} + +static int +spl_debug_token2mask(int *mask, const char *str, int len, int is_subsys) +{ + const char *(*fn)(int bit) = is_subsys ? spl_debug_subsys2str : + spl_debug_dbg2str; + const char *token; + int i, j, bit; + + /* match against known tokens */ + for (i = 0; i < 32; i++) { + bit = 1 << i; + + token = fn(bit); + if (token == NULL) /* unused? */ + continue; + + /* strcasecmp */ + for (j = 0; ; j++) { + if (j == len) { /* end of token */ + if (token[j] == 0) { + *mask = bit; + return 0; + } + break; + } + + if (token[j] == 0) + break; + + if (str[j] == token[j]) + continue; + + if (str[j] < 'A' || 'Z' < str[j]) + break; + + if (str[j] - 'A' + 'a' != token[j]) + break; + } + } + + return -EINVAL; /* no match */ +} + +int +spl_debug_str2mask(unsigned long *mask, const char *str, int is_subsys) +{ + char op = 0; + int m = 0, matched, n, t; + + /* Allow a number for backwards compatibility */ + for (n = strlen(str); n > 0; n--) + if (!isspace(str[n-1])) + break; + matched = n; + + if ((t = sscanf(str, "%i%n", &m, &matched)) >= 1 && matched == n) { + *mask = m; + return 0; + } + + /* must be a list of debug tokens or numbers separated by + * whitespace and optionally an operator ('+' or '-'). If an operator + * appears first in , '*mask' is used as the starting point + * (relative), otherwise 0 is used (absolute). An operator applies to + * all following tokens up to the next operator. */ + matched = 0; + while (*str != 0) { + while (isspace(*str)) /* skip whitespace */ + str++; + + if (*str == 0) + break; + + if (*str == '+' || *str == '-') { + op = *str++; + + /* op on first token == relative */ + if (!matched) + m = *mask; + + while (isspace(*str)) /* skip whitespace */ + str++; + + if (*str == 0) /* trailing op */ + return -EINVAL; + } + + /* find token length */ + for (n = 0; str[n] != 0 && !isspace(str[n]); n++); + + /* match token */ + if (spl_debug_token2mask(&t, str, n, is_subsys) != 0) + return -EINVAL; + + matched = 1; + if (op == '-') + m &= ~t; + else + m |= t; + + str += n; + } + + if (!matched) + return -EINVAL; + + *mask = m; + return 0; +} + +typedef struct dumplog_priv { + wait_queue_head_t dp_waitq; + pid_t dp_pid; + atomic_t dp_flag; +} dumplog_priv_t; + +static void +spl_debug_dumplog_internal(dumplog_priv_t *dp) +{ + void *journal_info; + + journal_info = current->journal_info; + current->journal_info = NULL; + + snprintf(spl_debug_file_name, sizeof(spl_debug_file_path) - 1, + "%s.%ld.%ld", spl_debug_file_path, + get_seconds(), (long)dp->dp_pid); + printk(KERN_ALERT "SPL: dumping log to %s\n", spl_debug_file_name); + spl_debug_dump_all_pages(spl_debug_file_name); + + current->journal_info = journal_info; +} + +static int +spl_debug_dumplog_thread(void *arg) +{ + dumplog_priv_t *dp = (dumplog_priv_t *)arg; + + spl_debug_dumplog_internal(dp); + atomic_set(&dp->dp_flag, 1); + wake_up(&dp->dp_waitq); + do_exit(0); + + return 0; /* Unreachable */ +} + +int +spl_debug_dumplog(void) +{ + struct task_struct *tsk; + dumplog_priv_t dp; + ENTRY; + + init_waitqueue_head(&dp.dp_waitq); + dp.dp_pid = current->pid; + atomic_set(&dp.dp_flag, 0); + + tsk = kthread_create(spl_debug_dumplog_thread,(void *)&dp,"spl_debug"); + if (tsk == NULL) + RETURN(-ENOMEM); + + wake_up_process(tsk); + wait_event(dp.dp_waitq, atomic_read(&dp.dp_flag)); + + RETURN(0); +} +EXPORT_SYMBOL(spl_debug_dumplog); + +static char * +trace_get_console_buffer(void) +{ + int cpu = get_cpu(); + int idx; + + if (in_irq()) { + idx = 0; + } else if (in_softirq()) { + idx = 1; + } else { + idx = 2; + } + + return trace_console_buffers[cpu][idx]; +} + +static void +trace_put_console_buffer(char *buffer) +{ + put_cpu(); +} + +static struct trace_cpu_data * +trace_get_tcd(void) +{ + int cpu; + + cpu = get_cpu(); + if (in_irq()) + return &(*trace_data[TCD_TYPE_IRQ])[cpu].tcd; + else if (in_softirq()) + return &(*trace_data[TCD_TYPE_SOFTIRQ])[cpu].tcd; + + return &(*trace_data[TCD_TYPE_PROC])[cpu].tcd; +} + +static void +trace_put_tcd (struct trace_cpu_data *tcd) +{ + put_cpu(); +} + +static int +trace_lock_tcd(struct trace_cpu_data *tcd) +{ + __ASSERT(tcd->tcd_type < TCD_TYPE_MAX); + + if (tcd->tcd_type == TCD_TYPE_IRQ) + local_irq_disable(); + else if (tcd->tcd_type == TCD_TYPE_SOFTIRQ) + local_bh_disable(); + + return 1; +} + +static void +trace_unlock_tcd(struct trace_cpu_data *tcd) +{ + __ASSERT(tcd->tcd_type < TCD_TYPE_MAX); + + if (tcd->tcd_type == TCD_TYPE_IRQ) + local_irq_enable(); + else if (tcd->tcd_type == TCD_TYPE_SOFTIRQ) + local_bh_enable(); +} + +static void +trace_set_debug_header(struct spl_debug_header *header, int subsys, + int mask, const int line, unsigned long stack) +{ + struct timeval tv; + + do_gettimeofday(&tv); + + header->ph_subsys = subsys; + header->ph_mask = mask; + header->ph_cpu_id = smp_processor_id(); + header->ph_sec = (__u32)tv.tv_sec; + header->ph_usec = tv.tv_usec; + header->ph_stack = stack; + header->ph_pid = current->pid; + header->ph_line_num = line; + + return; +} + +static void +trace_print_to_console(struct spl_debug_header *hdr, int mask, const char *buf, + int len, const char *file, const char *fn) +{ + char *prefix = "SPL", *ptype = NULL; + + if ((mask & D_EMERG) != 0) { + prefix = "SPLError"; + ptype = KERN_EMERG; + } else if ((mask & D_ERROR) != 0) { + prefix = "SPLError"; + ptype = KERN_ERR; + } else if ((mask & D_WARNING) != 0) { + prefix = "SPL"; + ptype = KERN_WARNING; + } else if ((mask & (D_CONSOLE | spl_debug_printk)) != 0) { + prefix = "SPL"; + ptype = KERN_INFO; + } + + if ((mask & D_CONSOLE) != 0) { + printk("%s%s: %.*s", ptype, prefix, len, buf); + } else { + printk("%s%s: %d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid, + file, hdr->ph_line_num, fn, len, buf); + } + + return; +} + +static int +trace_max_debug_mb(void) +{ + return MAX(512, ((num_physpages >> (20 - PAGE_SHIFT)) * 80) / 100); +} + +static void +trace_call_on_all_cpus(void (*fn)(void *arg), void *arg) +{ + cpumask_t mask, cpus_allowed = current->cpus_allowed; + int cpu; + + for_each_online_cpu(cpu) { + cpus_clear(mask); + cpu_set(cpu, mask); + set_cpus_allowed(current, mask); + + fn(arg); + + set_cpus_allowed(current, cpus_allowed); + } +} + +static struct trace_page * +tage_alloc(int gfp) +{ + struct page *page; + struct trace_page *tage; + + page = alloc_pages(gfp | __GFP_NOWARN, 0); + if (page == NULL) + return NULL; + + tage = kmalloc(sizeof(*tage), gfp); + if (tage == NULL) { + __free_pages(page, 0); + return NULL; + } + + tage->page = page; + atomic_inc(&trace_tage_allocated); + + return tage; +} + +static void +tage_free(struct trace_page *tage) +{ + __ASSERT(tage != NULL); + __ASSERT(tage->page != NULL); + + __free_pages(tage->page, 0); + kfree(tage); + atomic_dec(&trace_tage_allocated); +} + +static struct trace_page * +tage_from_list(struct list_head *list) +{ + return list_entry(list, struct trace_page, linkage); +} + +static void +tage_to_tail(struct trace_page *tage, struct list_head *queue) +{ + __ASSERT(tage != NULL); + __ASSERT(queue != NULL); + + list_move_tail(&tage->linkage, queue); +} + +/* try to return a page that has 'len' bytes left at the end */ +static struct trace_page * +trace_get_tage_try(struct trace_cpu_data *tcd, unsigned long len) +{ + struct trace_page *tage; + + if (tcd->tcd_cur_pages > 0) { + __ASSERT(!list_empty(&tcd->tcd_pages)); + tage = tage_from_list(tcd->tcd_pages.prev); + if (tage->used + len <= PAGE_SIZE) + return tage; + } + + if (tcd->tcd_cur_pages < tcd->tcd_max_pages) { + if (tcd->tcd_cur_stock_pages > 0) { + tage = tage_from_list(tcd->tcd_stock_pages.prev); + tcd->tcd_cur_stock_pages--; + list_del_init(&tage->linkage); + } else { + tage = tage_alloc(GFP_ATOMIC); + if (tage == NULL) { + printk(KERN_WARNING + "failure to allocate a tage (%ld)\n", + tcd->tcd_cur_pages); + return NULL; + } + } + + tage->used = 0; + tage->cpu = smp_processor_id(); + tage->type = tcd->tcd_type; + list_add_tail(&tage->linkage, &tcd->tcd_pages); + tcd->tcd_cur_pages++; + + return tage; + } + + return NULL; +} + +/* return a page that has 'len' bytes left at the end */ +static struct trace_page * +trace_get_tage(struct trace_cpu_data *tcd, unsigned long len) +{ + struct trace_page *tage; + + __ASSERT(len <= PAGE_SIZE); + + tage = trace_get_tage_try(tcd, len); + if (tage) + return tage; + + if (tcd->tcd_cur_pages > 0) { + tage = tage_from_list(tcd->tcd_pages.next); + tage->used = 0; + tage_to_tail(tage, &tcd->tcd_pages); + } + + return tage; +} + +int +spl_debug_vmsg(spl_debug_limit_state_t *cdls, int subsys, int mask, + const char *file, const char *fn, const int line, + const char *format1, va_list args, const char *format2, ...) +{ + struct trace_cpu_data *tcd = NULL; + struct spl_debug_header header; + struct trace_page *tage; + /* string_buf is used only if tcd != NULL, and is always set then */ + char *string_buf = NULL; + char *debug_buf; + int known_size; + int needed = 85; /* average message length */ + int max_nob; + va_list ap; + int i; + int remain; + + if (strchr(file, '/')) + file = strrchr(file, '/') + 1; + + trace_set_debug_header(&header, subsys, mask, line, CDEBUG_STACK()); + + tcd = trace_get_tcd(); + if (tcd == NULL) + goto console; + + if (tcd->tcd_shutting_down) { + trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + known_size = strlen(file) + 1; + if (fn) + known_size += strlen(fn) + 1; + + if (spl_debug_binary) + known_size += sizeof(header); + + /* '2' used because vsnprintf returns real size required for output + * _without_ terminating NULL. */ + for (i = 0; i < 2; i++) { + tage = trace_get_tage(tcd, needed + known_size + 1); + if (tage == NULL) { + if (needed + known_size > PAGE_SIZE) + mask |= D_ERROR; + + trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + string_buf = (char *)page_address(tage->page) + + tage->used + known_size; + + max_nob = PAGE_SIZE - tage->used - known_size; + if (max_nob <= 0) { + printk(KERN_EMERG "negative max_nob: %i\n", max_nob); + mask |= D_ERROR; + trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + needed = 0; + if (format1) { + va_copy(ap, args); + needed = vsnprintf(string_buf, max_nob, format1, ap); + va_end(ap); + } + + if (format2) { + remain = max_nob - needed; + if (remain < 0) + remain = 0; + + va_start(ap, format2); + needed += vsnprintf(string_buf+needed, remain, format2, ap); + va_end(ap); + } + + if (needed < max_nob) + break; + } + + if (unlikely(*(string_buf + needed - 1) != '\n')) + printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n", + file, line, fn); + + header.ph_len = known_size + needed; + debug_buf = (char *)page_address(tage->page) + tage->used; + + if (spl_debug_binary) { + memcpy(debug_buf, &header, sizeof(header)); + tage->used += sizeof(header); + debug_buf += sizeof(header); + } + + strcpy(debug_buf, file); + tage->used += strlen(file) + 1; + debug_buf += strlen(file) + 1; + + if (fn) { + strcpy(debug_buf, fn); + tage->used += strlen(fn) + 1; + debug_buf += strlen(fn) + 1; + } + + __ASSERT(debug_buf == string_buf); + + tage->used += needed; + __ASSERT (tage->used <= PAGE_SIZE); + +console: + if ((mask & spl_debug_printk) == 0) { + /* no console output requested */ + if (tcd != NULL) + trace_put_tcd(tcd); + return 1; + } + + if (cdls != NULL) { + if (spl_console_ratelimit && cdls->cdls_next != 0 && + !time_before(cdls->cdls_next, jiffies)) { + /* skipping a console message */ + cdls->cdls_count++; + if (tcd != NULL) + trace_put_tcd(tcd); + return 1; + } + + if (time_before(cdls->cdls_next + spl_console_max_delay + + (10 * HZ), jiffies)) { + /* last timeout was a long time ago */ + cdls->cdls_delay /= spl_console_backoff * 4; + } else { + cdls->cdls_delay *= spl_console_backoff; + + if (cdls->cdls_delay < spl_console_min_delay) + cdls->cdls_delay = spl_console_min_delay; + else if (cdls->cdls_delay > spl_console_max_delay) + cdls->cdls_delay = spl_console_max_delay; + } + + /* ensure cdls_next is never zero after it's been seen */ + cdls->cdls_next = (jiffies + cdls->cdls_delay) | 1; + } + + if (tcd != NULL) { + trace_print_to_console(&header, mask, string_buf, needed, file, fn); + trace_put_tcd(tcd); + } else { + string_buf = trace_get_console_buffer(); + + needed = 0; + if (format1 != NULL) { + va_copy(ap, args); + needed = vsnprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, format1, ap); + va_end(ap); + } + if (format2 != NULL) { + remain = TRACE_CONSOLE_BUFFER_SIZE - needed; + if (remain > 0) { + va_start(ap, format2); + needed += vsnprintf(string_buf+needed, remain, format2, ap); + va_end(ap); + } + } + trace_print_to_console(&header, mask, + string_buf, needed, file, fn); + + trace_put_console_buffer(string_buf); + } + + if (cdls != NULL && cdls->cdls_count != 0) { + string_buf = trace_get_console_buffer(); + + needed = snprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, + "Skipped %d previous similar message%s\n", + cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : ""); + + trace_print_to_console(&header, mask, + string_buf, needed, file, fn); + + trace_put_console_buffer(string_buf); + cdls->cdls_count = 0; + } + + return 0; +} +EXPORT_SYMBOL(spl_debug_vmsg); + +/* Do the collect_pages job on a single CPU: assumes that all other + * CPUs have been stopped during a panic. If this isn't true for + * some arch, this will have to be implemented separately in each arch. + */ +static void +panic_collect_pages(struct page_collection *pc) +{ + struct trace_cpu_data *tcd; + int i, j; + + tcd_for_each(tcd, i, j) { + list_splice_init(&tcd->tcd_pages, &pc->pc_pages); + tcd->tcd_cur_pages = 0; + } +} + +static void +collect_pages_on_cpu(void *info) +{ + struct trace_cpu_data *tcd; + struct page_collection *pc = info; + int i; + + spin_lock(&pc->pc_lock); + tcd_for_each_type_lock(tcd, i) { + list_splice_init(&tcd->tcd_pages, &pc->pc_pages); + tcd->tcd_cur_pages = 0; + } + spin_unlock(&pc->pc_lock); +} + +static void +collect_pages(struct page_collection *pc) +{ + INIT_LIST_HEAD(&pc->pc_pages); + + if (spl_panic_in_progress) + panic_collect_pages(pc); + else + trace_call_on_all_cpus(collect_pages_on_cpu, pc); +} + +static void +put_pages_back_on_cpu(void *info) +{ + struct page_collection *pc = info; + struct trace_cpu_data *tcd; + struct list_head *cur_head; + struct trace_page *tage; + struct trace_page *tmp; + int i; + + spin_lock(&pc->pc_lock); + tcd_for_each_type_lock(tcd, i) { + cur_head = tcd->tcd_pages.next; + + list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { + + __ASSERT_TAGE_INVARIANT(tage); + + if (tage->cpu != smp_processor_id() || tage->type != i) + continue; + + tage_to_tail(tage, cur_head); + tcd->tcd_cur_pages++; + } + } + spin_unlock(&pc->pc_lock); +} + +static void +put_pages_back(struct page_collection *pc) +{ + if (!spl_panic_in_progress) + trace_call_on_all_cpus(put_pages_back_on_cpu, pc); +} + +static struct file * +trace_filp_open (const char *name, int flags, int mode, int *err) +{ + struct file *filp = NULL; + int rc; + + filp = filp_open(name, flags, mode); + if (IS_ERR(filp)) { + rc = PTR_ERR(filp); + printk(KERN_ERR "SPL: Can't open %s file: %d\n", name, rc); + if (err) + *err = rc; + filp = NULL; + } + return filp; +} + +#define trace_filp_write(fp, b, s, p) (fp)->f_op->write((fp), (b), (s), p) +#define trace_filp_fsync(fp) (fp)->f_op->fsync((fp),(fp)->f_dentry,1) +#define trace_filp_close(f) filp_close(f, NULL) +#define trace_filp_poff(f) (&(f)->f_pos) + +static int +spl_debug_dump_all_pages(char *filename) +{ + struct page_collection pc; + struct file *filp; + struct trace_page *tage; + struct trace_page *tmp; + mm_segment_t oldfs; + int rc = 0; + + down_write(&trace_sem); + + filp = trace_filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, + 0600, &rc); + if (filp == NULL) { + if (rc != -EEXIST) + printk(KERN_ERR "SPL: Can't open %s for dump: %d\n", + filename, rc); + goto out; + } + + spin_lock_init(&pc.pc_lock); + collect_pages(&pc); + if (list_empty(&pc.pc_pages)) { + rc = 0; + goto close; + } + + oldfs = get_fs(); + set_fs(get_ds()); + + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + __ASSERT_TAGE_INVARIANT(tage); + + rc = trace_filp_write(filp, page_address(tage->page), + tage->used, trace_filp_poff(filp)); + if (rc != (int)tage->used) { + printk(KERN_WARNING "SPL: Wanted to write %u " + "but wrote %d\n", tage->used, rc); + put_pages_back(&pc); + __ASSERT(list_empty(&pc.pc_pages)); + break; + } + list_del(&tage->linkage); + tage_free(tage); + } + + set_fs(oldfs); + + rc = trace_filp_fsync(filp); + if (rc) + printk(KERN_ERR "SPL: Unable to sync: %d\n", rc); + close: + trace_filp_close(filp); + out: + up_write(&trace_sem); + + return rc; +} + +static void +spl_debug_flush_pages(void) +{ + struct page_collection pc; + struct trace_page *tage; + struct trace_page *tmp; + + spin_lock_init(&pc.pc_lock); + + collect_pages(&pc); + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + __ASSERT_TAGE_INVARIANT(tage); + list_del(&tage->linkage); + tage_free(tage); + } +} + +unsigned long +spl_debug_set_mask(unsigned long mask) { + spl_debug_mask = mask; + return 0; +} +EXPORT_SYMBOL(spl_debug_set_mask); + +unsigned long +spl_debug_get_mask(void) { + return spl_debug_mask; +} +EXPORT_SYMBOL(spl_debug_get_mask); + +unsigned long +spl_debug_set_subsys(unsigned long subsys) { + spl_debug_subsys = subsys; + return 0; +} +EXPORT_SYMBOL(spl_debug_set_subsys); + +unsigned long +spl_debug_get_subsys(void) { + return spl_debug_subsys; +} +EXPORT_SYMBOL(spl_debug_get_subsys); + +int +spl_debug_set_mb(int mb) +{ + int i, j, pages; + int limit = trace_max_debug_mb(); + struct trace_cpu_data *tcd; + + if (mb < num_possible_cpus()) { + printk(KERN_ERR "SPL: Refusing to set debug buffer size to " + "%dMB - lower limit is %d\n", mb, num_possible_cpus()); + return -EINVAL; + } + + if (mb > limit) { + printk(KERN_ERR "SPL: Refusing to set debug buffer size to " + "%dMB - upper limit is %d\n", mb, limit); + return -EINVAL; + } + + mb /= num_possible_cpus(); + pages = mb << (20 - PAGE_SHIFT); + + down_write(&trace_sem); + + tcd_for_each(tcd, i, j) + tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100; + + up_write(&trace_sem); + + return 0; +} +EXPORT_SYMBOL(spl_debug_set_mb); + +int +spl_debug_get_mb(void) +{ + int i, j; + struct trace_cpu_data *tcd; + int total_pages = 0; + + down_read(&trace_sem); + + tcd_for_each(tcd, i, j) + total_pages += tcd->tcd_max_pages; + + up_read(&trace_sem); + + return (total_pages >> (20 - PAGE_SHIFT)) + 1; +} +EXPORT_SYMBOL(spl_debug_get_mb); + +void spl_debug_dumpstack(struct task_struct *tsk) +{ + extern void show_task(struct task_struct *); + + if (tsk == NULL) + tsk = current; + + CWARN("showing stack for process %d\n", tsk->pid); + show_task(tsk); +} +EXPORT_SYMBOL(spl_debug_dumpstack); + +void spl_debug_bug(char *file, const char *func, const int line) +{ + spl_debug_catastrophe = 1; + spl_debug_msg(NULL, 0, D_EMERG, file, func, line, "SPL BUG\n"); + + if (in_interrupt()) { + panic("SPL BUG in interrupt.\n"); + /* not reached */ + } + + /* Ensure all debug pages and dumped by current cpu */ + if (spl_debug_panic_on_bug) + spl_panic_in_progress = 1; + + spl_debug_dumpstack(NULL); + spl_debug_dumplog(); + + if (spl_debug_panic_on_bug) + panic("SPL BUG"); + + set_task_state(current, TASK_UNINTERRUPTIBLE); + while (1) + schedule(); +} +EXPORT_SYMBOL(spl_debug_bug); + +int +spl_debug_clear_buffer(void) +{ + spl_debug_flush_pages(); + return 0; +} +EXPORT_SYMBOL(spl_debug_clear_buffer); + +int +spl_debug_mark_buffer(char *text) +{ + CDEBUG(D_WARNING, "*************************************\n"); + CDEBUG(D_WARNING, "DEBUG MARKER: %s\n", text); + CDEBUG(D_WARNING, "*************************************\n"); + + return 0; +} +EXPORT_SYMBOL(spl_debug_mark_buffer); + +static int +panic_notifier(struct notifier_block *self, + unsigned long unused1, void *unused2) +{ + if (spl_panic_in_progress) + return 0; + + spl_panic_in_progress = 1; + mb(); + + if (!in_interrupt()) { + while (current->lock_depth >= 0) + unlock_kernel(); + + spl_debug_dumplog_internal((void *)(long)current->pid); + } + + return 0; +} + +static int +trace_init(int max_pages) +{ + struct trace_cpu_data *tcd; + int i, j; + + init_rwsem(&trace_sem); + + /* initialize trace_data */ + memset(trace_data, 0, sizeof(trace_data)); + for (i = 0; i < TCD_TYPE_MAX; i++) { + trace_data[i] = kmalloc(sizeof(union trace_data_union) * + NR_CPUS, GFP_KERNEL); + if (trace_data[i] == NULL) + goto out; + } + + tcd_for_each(tcd, i, j) { + tcd->tcd_pages_factor = pages_factor[i]; + tcd->tcd_type = i; + tcd->tcd_cpu = j; + INIT_LIST_HEAD(&tcd->tcd_pages); + INIT_LIST_HEAD(&tcd->tcd_stock_pages); + tcd->tcd_cur_pages = 0; + tcd->tcd_cur_stock_pages = 0; + tcd->tcd_max_pages = (max_pages * pages_factor[i]) / 100; + tcd->tcd_shutting_down = 0; + } + + for (i = 0; i < num_possible_cpus(); i++) { + for (j = 0; j < 3; j++) { + trace_console_buffers[i][j] = + kmalloc(TRACE_CONSOLE_BUFFER_SIZE, + GFP_KERNEL); + + if (trace_console_buffers[i][j] == NULL) + goto out; + } + } + + return 0; +out: + trace_fini(); + printk(KERN_ERR "SPL: Insufficient memory for debug logs\n"); + return -ENOMEM; +} + +int +debug_init(void) +{ + int rc, max = spl_debug_mb; + + spl_console_max_delay = SPL_DEFAULT_MAX_DELAY; + spl_console_min_delay = SPL_DEFAULT_MIN_DELAY; + + /* If spl_debug_mb is set to an invalid value or uninitialized + * then just make the total buffers smp_num_cpus TCD_MAX_PAGES */ + if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || + max >= 512 || max < 0) { + max = TCD_MAX_PAGES; + } else { + max = (max / num_online_cpus()) << (20 - PAGE_SHIFT); + } + + rc = trace_init(max); + if (rc) + return rc; + + atomic_notifier_chain_register(&panic_notifier_list, + &spl_panic_notifier); + return rc; +} + +static void +trace_cleanup_on_cpu(void *info) +{ + struct trace_cpu_data *tcd; + struct trace_page *tage; + struct trace_page *tmp; + int i; + + tcd_for_each_type_lock(tcd, i) { + tcd->tcd_shutting_down = 1; + + list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { + __ASSERT_TAGE_INVARIANT(tage); + + list_del(&tage->linkage); + tage_free(tage); + } + tcd->tcd_cur_pages = 0; + } +} + +static void +trace_fini(void) +{ + int i, j; + + trace_call_on_all_cpus(trace_cleanup_on_cpu, NULL); + + for (i = 0; i < num_possible_cpus(); i++) { + for (j = 0; j < 3; j++) { + if (trace_console_buffers[i][j] != NULL) { + kfree(trace_console_buffers[i][j]); + trace_console_buffers[i][j] = NULL; + } + } + } + + for (i = 0; trace_data[i] != NULL; i++) { + kfree(trace_data[i]); + trace_data[i] = NULL; + } +} + +void +debug_fini(void) +{ + atomic_notifier_chain_unregister(&panic_notifier_list, + &spl_panic_notifier); + trace_fini(); + + return; +} diff --git a/modules/spl/spl-generic.c b/modules/spl/spl-generic.c index 8cd217cf1..252e9788d 100644 --- a/modules/spl/spl-generic.c +++ b/modules/spl/spl-generic.c @@ -3,32 +3,21 @@ #include #include #include -#include +#include #include #include "config.h" -/* - * Generic support - */ -static char spl_debug_buffer1[1024]; -static char spl_debug_buffer2[1024]; -static spinlock_t spl_debug_lock = SPIN_LOCK_UNLOCKED; +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif -unsigned long spl_debug_mask = 0; -unsigned long spl_debug_subsys = 0xff; -unsigned long spl_hostid = 0; -char hw_serial[11] = ""; +#define DEBUG_SUBSYSTEM S_GENERIC -EXPORT_SYMBOL(spl_debug_mask); -EXPORT_SYMBOL(spl_debug_subsys); +unsigned long spl_hostid = 0; EXPORT_SYMBOL(spl_hostid); -EXPORT_SYMBOL(hw_serial); -static struct proc_dir_entry *spl_proc_root = NULL; -static struct proc_dir_entry *spl_proc_debug_mask = NULL; -static struct proc_dir_entry *spl_proc_debug_subsys = NULL; -static struct proc_dir_entry *spl_proc_hostid = NULL; -static struct proc_dir_entry *spl_proc_hw_serial = NULL; +char spl_hw_serial[11] = ""; +EXPORT_SYMBOL(spl_hw_serial); int p0 = 0; EXPORT_SYMBOL(p0); @@ -36,14 +25,14 @@ EXPORT_SYMBOL(p0); vmem_t *zio_alloc_arena = NULL; EXPORT_SYMBOL(zio_alloc_arena); - int highbit(unsigned long i) { register int h = 1; + ENTRY; if (i == 0) - return (0); + RETURN(0); #if BITS_PER_LONG == 64 if (i & 0xffffffff00000000ul) { h += 32; i >>= 32; @@ -64,7 +53,7 @@ highbit(unsigned long i) if (i & 0x2) { h += 1; } - return (h); + RETURN(h); } EXPORT_SYMBOL(highbit); @@ -76,324 +65,75 @@ ddi_strtoul(const char *str, char **nptr, int base, unsigned long *result) } EXPORT_SYMBOL(ddi_strtoul); -/* XXX: Not the most efficient debug function ever. This should be re-done - * as an internal per-cpu in-memory debug log accessable via /proc/. Not as - * a shared global buffer everything gets serialize though. That said I'll - * worry about performance considerations once I've dealt with correctness. - */ -void -__dprintf(const char *file, const char *func, int line, const char *fmt, ...) -{ - char *sfp; - struct timeval tv; - unsigned long flags; - va_list ap; - - sfp = strrchr(file, '/'); - do_gettimeofday(&tv); - - /* XXX: This is particularly bad for performance, but we need to - * disable irqs here or two __dprintf()'s may deadlock on each - * other if one if called from an irq handler. This is yet another - * reason why we really, really, need an internal debug log. - */ - spin_lock_irqsave(&spl_debug_lock, flags); - memset(spl_debug_buffer1, 0, 1024); - memset(spl_debug_buffer2, 0, 1024); - - snprintf(spl_debug_buffer1, 1023, - "spl: %lu.%06lu:%d:%u:%s:%d:%s(): ", - tv.tv_sec, tv.tv_usec, current->pid, - smp_processor_id(), - sfp == NULL ? file : sfp + 1, - line, func); - - va_start(ap, fmt); - vsnprintf(spl_debug_buffer2, 1023, fmt, ap); - va_end(ap); - - printk("%s%s", spl_debug_buffer1, spl_debug_buffer2); - spin_unlock_irqrestore(&spl_debug_lock, flags); -} -EXPORT_SYMBOL(__dprintf); - -static int -spl_proc_rd_generic_ul(char *page, char **start, off_t off, - int count, int *eof, unsigned long val) -{ - *start = page; - *eof = 1; - - if (off || count > PAGE_SIZE) - return 0; - - return snprintf(page, PAGE_SIZE, "0x%lx\n", val & 0xffffffff); -} - -static int -spl_proc_rd_debug_mask(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int rc; - - spin_lock(&spl_debug_lock); - rc = spl_proc_rd_generic_ul(page, start, off, count, - eof, spl_debug_mask); - spin_unlock(&spl_debug_lock); - - return rc; -} - -static int -spl_proc_rd_debug_subsys(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int rc; - - spin_lock(&spl_debug_lock); - rc = spl_proc_rd_generic_ul(page, start, off, count, - eof, spl_debug_subsys); - spin_unlock(&spl_debug_lock); - - return rc; -} - -static int -spl_proc_rd_hostid(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - *start = page; - *eof = 1; - - if (off || count > PAGE_SIZE) - return 0; - - return snprintf(page, PAGE_SIZE, "%lx\n", spl_hostid & 0xffffffff); -} - -static int -spl_proc_rd_hw_serial(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - *start = page; - *eof = 1; - - if (off || count > PAGE_SIZE) - return 0; - - strncpy(page, hw_serial, 11); - return strlen(page); -} - -static int -spl_proc_wr_generic_ul(const char *ubuf, unsigned long count, - unsigned long *val, int base) -{ - char *end, kbuf[32]; - - if (count >= sizeof(kbuf)) - return -EOVERFLOW; - - if (copy_from_user(kbuf, ubuf, count)) - return -EFAULT; - - kbuf[count] = '\0'; - *val = (int)simple_strtoul(kbuf, &end, base); - if (kbuf == end) - return -EINVAL; - - return 0; -} - static int -spl_proc_wr_debug_mask(struct file *file, const char *ubuf, - unsigned long count, void *data, int mode) +set_hostid(void) { - unsigned long val; - int rc; - - rc = spl_proc_wr_generic_ul(ubuf, count, &val, 16); - if (rc) - return rc; - - spin_lock(&spl_debug_lock); - spl_debug_mask = val; - spin_unlock(&spl_debug_lock); - - return count; -} - -static int -spl_proc_wr_debug_subsys(struct file *file, const char *ubuf, - unsigned long count, void *data, int mode) -{ - unsigned long val; - int rc; - - rc = spl_proc_wr_generic_ul(ubuf, count, &val, 16); - if (rc) - return rc; - - spin_lock(&spl_debug_lock); - spl_debug_subsys = val; - spin_unlock(&spl_debug_lock); - - return count; -} - -static int -spl_proc_wr_hostid(struct file *file, const char *ubuf, - unsigned long count, void *data, int mode) -{ - unsigned long val; - int rc; - - rc = spl_proc_wr_generic_ul(ubuf, count, &val, 16); - if (rc) - return rc; - - spl_hostid = val; - sprintf(hw_serial, "%lu\n", ((long)val >= 0) ? val : -val); - - return count; -} - -static struct proc_dir_entry * -spl_register_proc_entry(const char *name, mode_t mode, - struct proc_dir_entry *parent, void *data, - void *read_proc, void *write_proc) -{ - struct proc_dir_entry *entry; - - entry = create_proc_entry(name, mode, parent); - if (!entry) - return ERR_PTR(-EINVAL); - - entry->data = data; - entry->read_proc = read_proc; - entry->write_proc = write_proc; - - return entry; -} /* register_proc_entry() */ - -void spl_set_debug_mask(unsigned long mask) { - spin_lock(&spl_debug_lock); - spl_debug_mask = mask; - spin_unlock(&spl_debug_lock); -} -EXPORT_SYMBOL(spl_set_debug_mask); - -void spl_set_debug_subsys(unsigned long mask) { - spin_lock(&spl_debug_lock); - spl_debug_subsys = mask; - spin_unlock(&spl_debug_lock); -} -EXPORT_SYMBOL(spl_set_debug_subsys); - -static int __init spl_init(void) -{ - int rc = 0; char sh_path[] = "/bin/sh"; char *argv[] = { sh_path, "-c", - "/usr/bin/hostid >/proc/spl/hostid", + "/usr/bin/hostid >/proc/sys/spl/hostid", NULL }; char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - spl_proc_root = proc_mkdir("spl", NULL); - if (!spl_proc_root) { - printk("spl: Error unable to create /proc/spl/ directory\n"); - return -EINVAL; - } - - spl_proc_debug_mask = spl_register_proc_entry("debug_mask", 0644, - spl_proc_root, NULL, - spl_proc_rd_debug_mask, - spl_proc_wr_debug_mask); - if (IS_ERR(spl_proc_debug_mask)) { - rc = PTR_ERR(spl_proc_debug_mask); - goto out; - } - - spl_proc_debug_subsys = spl_register_proc_entry("debug_subsys", 0644, - spl_proc_root, NULL, - spl_proc_rd_debug_subsys, - spl_proc_wr_debug_subsys); - if (IS_ERR(spl_proc_debug_subsys)) { - rc = PTR_ERR(spl_proc_debug_subsys); - goto out2; - } + /* Doing address resolution in the kernel is tricky and just + * not a good idea in general. So to set the proper 'spl_hw_serial' + * use the usermodehelper support to ask '/bin/sh' to run + * '/usr/bin/hostid' and redirect the result to /proc/sys/spl/hostid + * for us to use. It's a horific solution but it will do for now. + */ + return call_usermodehelper(sh_path, argv, envp, 1); +} - spl_proc_hostid = spl_register_proc_entry("hostid", 0644, - spl_proc_root, NULL, - spl_proc_rd_hostid, - spl_proc_wr_hostid); - if (IS_ERR(spl_proc_hostid)) { - rc = PTR_ERR(spl_proc_hostid); - goto out3; - } +static int __init spl_init(void) +{ + int rc = 0; + ENTRY; - spl_proc_hw_serial = spl_register_proc_entry("hw_serial", 0444, - spl_proc_root, NULL, - spl_proc_rd_hw_serial, - NULL); - if (IS_ERR(spl_proc_hw_serial)) { - rc = PTR_ERR(spl_proc_hw_serial); - goto out4; - } + if ((rc = debug_init())) + RETURN(rc); if ((rc = kmem_init())) - goto out4; + GOTO(out , rc); if ((rc = vn_init())) - goto out4; - - /* Doing address resolution in the kernel is tricky and just - * not a good idea in general. So to set the proper 'hw_serial' - * use the usermodehelper support to ask '/bin/sh' to run - * '/usr/bin/hostid' and redirect the result to /proc/spl/hostid - * for us to use. It's a horific solution but it will do. - */ - if ((rc = call_usermodehelper(sh_path, argv, envp, 1))) - goto out4; + GOTO(out2, rc); - printk("spl: Loaded Solaris Porting Layer v%s\n", VERSION); + if ((rc = proc_init())) + GOTO(out3, rc); - return 0; + if ((rc = set_hostid())) + GOTO(out4, rc = -EADDRNOTAVAIL); + CWARN("Loaded Solaris Porting Layer v%s\n", VERSION); + RETURN(rc); out4: - if (spl_proc_hw_serial) - remove_proc_entry("hw_serial", spl_proc_root); + proc_fini(); out3: - if (spl_proc_hostid) - remove_proc_entry("hostid", spl_proc_root); + vn_fini(); out2: - if (spl_proc_debug_mask) - remove_proc_entry("debug_mask", spl_proc_root); - - if (spl_proc_debug_subsys) - remove_proc_entry("debug_subsys", spl_proc_root); + kmem_fini(); out: - remove_proc_entry("spl", NULL); + debug_fini(); - return rc; + printk("SPL: Failed to Load Solaris Porting Layer v%s, " + "rc = %d\n", VERSION, rc); + RETURN(rc); } static void spl_fini(void) { + ENTRY; + + CWARN("Unloaded Solaris Porting Layer v%s\n", VERSION); + proc_fini(); vn_fini(); kmem_fini(); + debug_fini(); - remove_proc_entry("hw_serial", spl_proc_root); - remove_proc_entry("hostid", spl_proc_root); - remove_proc_entry("debug_subsys", spl_proc_root); - remove_proc_entry("debug_mask", spl_proc_root); - remove_proc_entry("spl", NULL); - - return; + EXIT; } module_init(spl_init); diff --git a/modules/spl/spl-proc.c b/modules/spl/spl-proc.c new file mode 100644 index 000000000..89ffc2baa --- /dev/null +++ b/modules/spl/spl-proc.c @@ -0,0 +1,574 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "config.h" + +#ifdef DEBUG_SUBSYSTEM +#undef DEBUG_SUBSYSTEM +#endif + +#define DEBUG_SUBSYSTEM S_PROC + +static struct ctl_table_header *spl_header = NULL; +static unsigned long table_min = 0; +static unsigned long table_max = ~0; + +#define CTL_SPL 0x87 +enum { + CTL_DEBUG_SUBSYS = 1, /* Debug subsystem */ + CTL_DEBUG_MASK, /* Debug mask */ + CTL_DEBUG_PRINTK, /* Force all messages to console */ + CTL_DEBUG_MB, /* Debug buffer size */ + CTL_DEBUG_BINARY, /* Include binary data in buffer */ + CTL_DEBUG_CATASTROPHE, /* Set if we have BUG'd or panic'd */ + CTL_DEBUG_PANIC_ON_BUG, /* Set if we should panic on BUG */ + CTL_DEBUG_PATH, /* Dump log location */ + CTL_DEBUG_DUMP, /* Dump debug buffer to file */ + CTL_DEBUG_FORCE_BUG, /* Hook to force a BUG */ + CTL_CONSOLE_RATELIMIT, /* Ratelimit console messages */ + CTL_CONSOLE_MAX_DELAY_CS, /* Max delay at which we skip messages */ + CTL_CONSOLE_MIN_DELAY_CS, /* Init delay at which we skip messages */ + CTL_CONSOLE_BACKOFF, /* Delay increase factor */ + CTL_STACK_SIZE, /* Max observed stack size */ +#ifdef DEBUG_KMEM + CTL_KMEM_KMEMUSED, /* Crrently alloc'd kmem bytes */ + CTL_KMEM_KMEMMAX, /* Max alloc'd by kmem bytes */ + CTL_KMEM_VMEMUSED, /* Currently alloc'd vmem bytes */ + CTL_KMEM_VMEMMAX, /* Max alloc'd by vmem bytes */ +#endif + CTL_HOSTID, /* Host id reported by /usr/bin/hostid */ + CTL_HW_SERIAL, /* Hardware serial number from hostid */ +}; + +static int +proc_copyin_string(char *kbuffer, int kbuffer_size, + const char *ubuffer, int ubuffer_size) +{ + int size; + + if (ubuffer_size > kbuffer_size) + return -EOVERFLOW; + + if (copy_from_user((void *)kbuffer, (void *)ubuffer, ubuffer_size)) + return -EFAULT; + + /* strip trailing whitespace */ + size = strnlen(kbuffer, ubuffer_size); + while (size-- >= 0) + if (!isspace(kbuffer[size])) + break; + + /* empty string */ + if (size < 0) + return -EINVAL; + + /* no space to terminate */ + if (size == kbuffer_size) + return -EOVERFLOW; + + kbuffer[size + 1] = 0; + return 0; +} + +static int +proc_copyout_string(char *ubuffer, int ubuffer_size, + const char *kbuffer, char *append) +{ + /* NB if 'append' != NULL, it's a single character to append to the + * copied out string - usually "\n", for /proc entries and + * (i.e. a terminating zero byte) for sysctl entries + */ + int size = MIN(strlen(kbuffer), ubuffer_size); + + if (copy_to_user(ubuffer, kbuffer, size)) + return -EFAULT; + + if (append != NULL && size < ubuffer_size) { + if (copy_to_user(ubuffer + size, append, 1)) + return -EFAULT; + + size++; + } + + return size; +} + +static int +proc_dobitmasks(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + unsigned long *mask = table->data; + int is_subsys = (mask == &spl_debug_subsys) ? 1 : 0; + int is_printk = (mask == &spl_debug_printk) ? 1 : 0; + int size = 512, rc; + char *str; + ENTRY; + + str = kmem_alloc(size, KM_SLEEP); + if (str == NULL) + RETURN(-ENOMEM); + + if (write) { + rc = proc_copyin_string(str, size, buffer, *lenp); + if (rc < 0) + RETURN(rc); + + rc = spl_debug_str2mask(mask, str, is_subsys); + /* Always print BUG/ASSERT to console, so keep this mask */ + if (is_printk) + *mask |= D_EMERG; + + *ppos += *lenp; + } else { + rc = spl_debug_mask2str(str, size, *mask, is_subsys); + if (*ppos >= rc) + rc = 0; + else + rc = proc_copyout_string(buffer, *lenp, + str + *ppos, "\n"); + if (rc >= 0) { + *lenp = rc; + *ppos += rc; + } + } + + kmem_free(str, size); + RETURN(rc); +} + +static int +proc_debug_mb(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + char str[32]; + int rc, len; + ENTRY; + + if (write) { + rc = proc_copyin_string(str, sizeof(str), buffer, *lenp); + if (rc < 0) + RETURN(rc); + + rc = spl_debug_set_mb(simple_strtoul(str, NULL, 0)); + *ppos += *lenp; + } else { + len = snprintf(str, sizeof(str), "%d", spl_debug_get_mb()); + if (*ppos >= len) + rc = 0; + else + rc = proc_copyout_string(buffer, *lenp, str + *ppos, "\n"); + + if (rc >= 0) { + *lenp = rc; + *ppos += rc; + } + } + + RETURN(rc); +} + +static int +proc_dump_kernel(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + ENTRY; + + if (write) { + spl_debug_dumplog(); + *ppos += *lenp; + } else { + *lenp = 0; + } + + RETURN(0); +} + +static int +proc_force_bug(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + ENTRY; + + if (write) { + CERROR("Crashing due to forced BUG\n"); + BUG(); + /* Unreachable */ + } else { + *lenp = 0; + } + + RETURN(0); +} + +static int +proc_console_max_delay_cs(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int rc, max_delay_cs; + struct ctl_table dummy = *table; + long d; + ENTRY; + + dummy.data = &max_delay_cs; + dummy.proc_handler = &proc_dointvec; + + if (write) { + max_delay_cs = 0; + rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + if (rc < 0) + RETURN(rc); + + if (max_delay_cs <= 0) + RETURN(-EINVAL); + + d = (max_delay_cs * HZ) / 100; + if (d == 0 || d < spl_console_min_delay) + RETURN(-EINVAL); + + spl_console_max_delay = d; + } else { + max_delay_cs = (spl_console_max_delay * 100) / HZ; + rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + } + + RETURN(rc); +} + +static int +proc_console_min_delay_cs(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int rc, min_delay_cs; + struct ctl_table dummy = *table; + long d; + ENTRY; + + dummy.data = &min_delay_cs; + dummy.proc_handler = &proc_dointvec; + + if (write) { + min_delay_cs = 0; + rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + if (rc < 0) + RETURN(rc); + + if (min_delay_cs <= 0) + RETURN(-EINVAL); + + d = (min_delay_cs * HZ) / 100; + if (d == 0 || d > spl_console_max_delay) + RETURN(-EINVAL); + + spl_console_min_delay = d; + } else { + min_delay_cs = (spl_console_min_delay * 100) / HZ; + rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + } + + RETURN(rc); +} + +static int +proc_console_backoff(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int rc, backoff; + struct ctl_table dummy = *table; + ENTRY; + + dummy.data = &backoff; + dummy.proc_handler = &proc_dointvec; + + if (write) { + backoff = 0; + rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + if (rc < 0) + RETURN(rc); + + if (backoff <= 0) + RETURN(-EINVAL); + + spl_console_backoff = backoff; + } else { + backoff = spl_console_backoff; + rc = proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + } + + RETURN(rc); +} + +static int +proc_doatomic64(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int rc = 0; + unsigned long min = 0, max = ~0, val; + struct ctl_table dummy = *table; + ENTRY; + + dummy.data = &val; + dummy.proc_handler = &proc_dointvec; + dummy.extra1 = &min; + dummy.extra2 = &max; + + if (write) { + *ppos += *lenp; + } else { + val = atomic_read((atomic64_t *)table->data); + rc = proc_doulongvec_minmax(&dummy, write, filp, + buffer, lenp, ppos); + } + + RETURN(rc); +} + +static int +proc_dohostid(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int len, rc = 0; + unsigned long val; + char *end, str[32]; + ENTRY; + + if (write) { + /* We can't use proc_doulongvec_minmax() in the write + * case hear because hostid while a hex value has no + * leading 0x which confuses the helper function. */ + rc = proc_copyin_string(str, sizeof(str), buffer, *lenp); + if (rc < 0) + RETURN(rc); + + val = simple_strtoul(str, &end, 16); + if (str == end) + RETURN(-EINVAL); + + spl_hostid = val; + sprintf(spl_hw_serial, "%lu", ((long)val >= 0) ? val : -val); + *ppos += *lenp; + } else { + len = snprintf(str, sizeof(str), "%lx", spl_hostid); + if (*ppos >= len) + rc = 0; + else + rc = proc_copyout_string(buffer, *lenp, str + *ppos, "\n"); + + if (rc >= 0) { + *lenp = rc; + *ppos += rc; + } + } + + RETURN(rc); +} + +static struct ctl_table spl_table[] = { + /* NB No .strategy entries have been provided since + * sysctl(8) prefers to go via /proc for portability. + */ + { + .ctl_name = CTL_DEBUG_SUBSYS, + .procname = "debug_subsystem", + .data = &spl_debug_subsys, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_dobitmasks + }, + { + .ctl_name = CTL_DEBUG_MASK, + .procname = "debug_mask", + .data = &spl_debug_mask, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_dobitmasks + }, + { + .ctl_name = CTL_DEBUG_PRINTK, + .procname = "debug_printk", + .data = &spl_debug_printk, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_dobitmasks + }, + { + .ctl_name = CTL_DEBUG_MB, + .procname = "debug_mb", + .mode = 0644, + .proc_handler = &proc_debug_mb, + }, + { + .ctl_name = CTL_DEBUG_BINARY, + .procname = "debug_binary", + .data = &spl_debug_binary, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_DEBUG_CATASTROPHE, + .procname = "catastrophe", + .data = &spl_debug_catastrophe, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_DEBUG_PANIC_ON_BUG, + .procname = "panic_on_bug", + .data = &spl_debug_panic_on_bug, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_DEBUG_PATH, + .procname = "debug_path", + .data = spl_debug_file_path, + .maxlen = sizeof(spl_debug_file_path), + .mode = 0644, + .proc_handler = &proc_dostring, + }, + { + .ctl_name = CTL_DEBUG_DUMP, + .procname = "debug_dump", + .mode = 0200, + .proc_handler = &proc_dump_kernel, + }, + { .ctl_name = CTL_DEBUG_FORCE_BUG, + .procname = "force_bug", + .mode = 0200, + .proc_handler = &proc_force_bug, + }, + { + .ctl_name = CTL_CONSOLE_RATELIMIT, + .procname = "console_ratelimit", + .data = &spl_console_ratelimit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_CONSOLE_MAX_DELAY_CS, + .procname = "console_max_delay_centisecs", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_console_max_delay_cs, + }, + { + .ctl_name = CTL_CONSOLE_MIN_DELAY_CS, + .procname = "console_min_delay_centisecs", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_console_min_delay_cs, + }, + { + .ctl_name = CTL_CONSOLE_BACKOFF, + .procname = "console_backoff", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_console_backoff, + }, + { + .ctl_name = CTL_STACK_SIZE, + .procname = "stack_max", + .data = &spl_debug_stack, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, +#ifdef DEBUG_KMEM + { + .ctl_name = CTL_KMEM_KMEMUSED, + .procname = "kmem_used", + .data = &kmem_alloc_used, + .maxlen = sizeof(atomic64_t), + .mode = 0444, + .proc_handler = &proc_doatomic64, + }, + { + .ctl_name = CTL_KMEM_KMEMMAX, + .procname = "kmem_max", + .data = &kmem_alloc_max, + .maxlen = sizeof(unsigned long), + .extra1 = &table_min, + .extra2 = &table_max, + .mode = 0444, + .proc_handler = &proc_doulongvec_minmax, + }, + { + .ctl_name = CTL_KMEM_VMEMUSED, + .procname = "vmem_used", + .data = &vmem_alloc_used, + .maxlen = sizeof(atomic64_t), + .mode = 0444, + .proc_handler = &proc_doatomic64, + }, + { + .ctl_name = CTL_KMEM_VMEMMAX, + .procname = "vmem_max", + .data = &vmem_alloc_max, + .maxlen = sizeof(unsigned long), + .extra1 = &table_min, + .extra2 = &table_max, + .mode = 0444, + .proc_handler = &proc_doulongvec_minmax, + }, +#endif + { + .ctl_name = CTL_HOSTID, + .procname = "hostid", + .data = &spl_hostid, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_dohostid, + }, + { + .ctl_name = CTL_HW_SERIAL, + .procname = "hw_serial", + .data = spl_hw_serial, + .maxlen = sizeof(spl_hw_serial), + .mode = 0444, + .proc_handler = &proc_dostring, + }, + { 0 }, +}; + +static struct ctl_table spl_dir_table[] = { + { + .ctl_name = CTL_SPL, + .procname = "spl", + .mode = 0555, + .child = spl_table, + }, + {0} +}; + +int +proc_init(void) +{ + ENTRY; + +#ifdef CONFIG_SYSCTL + spl_header = register_sysctl_table(spl_dir_table, 0); + if (spl_header == NULL) + RETURN(-EUNATCH); +#endif + RETURN(0); +} + +void +proc_fini(void) +{ + ENTRY; + +#ifdef CONFIG_SYSCTL + ASSERT(spl_header != NULL); + unregister_sysctl_table(spl_header); +#endif + EXIT; +} diff --git a/modules/spl/spl-vnode.c b/modules/spl/spl-vnode.c index 10b227761..2ac34237b 100644 --- a/modules/spl/spl-vnode.c +++ b/modules/spl/spl-vnode.c @@ -588,7 +588,7 @@ vn_fini(void) rc = kmem_cache_destroy(vn_file_cache); if (rc) - printk("spl: Warning leaked vn_file_cache objects\n"); + printk("spl: Warning leaked vn_file_cache objects, %d\n", rc); vn_file_cache = NULL; spin_unlock(&vn_file_lock); @@ -598,7 +598,7 @@ vn_fini(void) rc = kmem_cache_destroy(vn_cache); if (rc) - printk("spl: Warning leaked vn_cache objects\n"); + printk("spl: Warning leaked vn_cache objects, %d\n", rc); return; } /* vn_fini() */ -- 2.40.0