]> granicus.if.org Git - zfs/blob - module/spl/spl-generic.c
Prefix all SPL debug macros with 'S'
[zfs] / module / spl / spl-generic.c
1 /*****************************************************************************\
2  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3  *  Copyright (C) 2007 The Regents of the University of California.
4  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6  *  UCRL-CODE-235197
7  *
8  *  This file is part of the SPL, Solaris Porting Layer.
9  *  For details, see <http://github.com/behlendorf/spl/>.
10  *
11  *  The SPL is free software; you can redistribute it and/or modify it
12  *  under the terms of the GNU General Public License as published by the
13  *  Free Software Foundation; either version 2 of the License, or (at your
14  *  option) any later version.
15  *
16  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
17  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
19  *  for more details.
20  *
21  *  You should have received a copy of the GNU General Public License along
22  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
23  *****************************************************************************
24  *  Solaris Porting Layer (SPL) Generic Implementation.
25 \*****************************************************************************/
26
27 #include <sys/sysmacros.h>
28 #include <sys/systeminfo.h>
29 #include <sys/vmsystm.h>
30 #include <sys/vnode.h>
31 #include <sys/kmem.h>
32 #include <sys/mutex.h>
33 #include <sys/rwlock.h>
34 #include <sys/taskq.h>
35 #include <sys/debug.h>
36 #include <sys/proc.h>
37 #include <sys/kstat.h>
38 #include <sys/utsname.h>
39 #include <sys/file.h>
40 #include <linux/kmod.h>
41 #include <linux/proc_compat.h>
42 #include <spl-debug.h>
43
44 #ifdef SS_DEBUG_SUBSYS
45 #undef SS_DEBUG_SUBSYS
46 #endif
47
48 #define SS_DEBUG_SUBSYS SS_GENERIC
49
50 char spl_version[16] = "SPL v" SPL_META_VERSION;
51 EXPORT_SYMBOL(spl_version);
52
53 long spl_hostid = 0;
54 EXPORT_SYMBOL(spl_hostid);
55
56 char hw_serial[HW_HOSTID_LEN] = "<none>";
57 EXPORT_SYMBOL(hw_serial);
58
59 proc_t p0 = { 0 };
60 EXPORT_SYMBOL(p0);
61
62 #ifndef HAVE_KALLSYMS_LOOKUP_NAME
63 kallsyms_lookup_name_t spl_kallsyms_lookup_name_fn = SYMBOL_POISON;
64 #endif
65
66 int
67 highbit(unsigned long i)
68 {
69         register int h = 1;
70         SENTRY;
71
72         if (i == 0)
73                 SRETURN(0);
74 #if BITS_PER_LONG == 64
75         if (i & 0xffffffff00000000ul) {
76                 h += 32; i >>= 32;
77         }
78 #endif
79         if (i & 0xffff0000) {
80                 h += 16; i >>= 16;
81         }
82         if (i & 0xff00) {
83                 h += 8; i >>= 8;
84         }
85         if (i & 0xf0) {
86                 h += 4; i >>= 4;
87         }
88         if (i & 0xc) {
89                 h += 2; i >>= 2;
90         }
91         if (i & 0x2) {
92                 h += 1;
93         }
94         SRETURN(h);
95 }
96 EXPORT_SYMBOL(highbit);
97
98 #if BITS_PER_LONG == 32
99 /*
100  * Support 64/64 => 64 division on a 32-bit platform.  While the kernel
101  * provides a div64_u64() function for this we do not use it because the
102  * implementation is flawed.  There are cases which return incorrect
103  * results as late as linux-2.6.35.  Until this is fixed upstream the
104  * spl must provide its own implementation.
105  *
106  * This implementation is a slightly modified version of the algorithm
107  * proposed by the book 'Hacker's Delight'.  The original source can be
108  * found here and is available for use without restriction.
109  *
110  * http://www.hackersdelight.org/HDcode/newCode/divDouble.c
111  */
112
113 /*
114  * Calculate number of leading of zeros for a 64-bit value.
115  */
116 static int
117 nlz64(uint64_t x) {
118         register int n = 0;
119
120         if (x == 0)
121                 return 64;
122
123         if (x <= 0x00000000FFFFFFFFULL) {n = n + 32; x = x << 32;}
124         if (x <= 0x0000FFFFFFFFFFFFULL) {n = n + 16; x = x << 16;}
125         if (x <= 0x00FFFFFFFFFFFFFFULL) {n = n +  8; x = x <<  8;}
126         if (x <= 0x0FFFFFFFFFFFFFFFULL) {n = n +  4; x = x <<  4;}
127         if (x <= 0x3FFFFFFFFFFFFFFFULL) {n = n +  2; x = x <<  2;}
128         if (x <= 0x7FFFFFFFFFFFFFFFULL) {n = n +  1;}
129
130         return n;
131 }
132
133 /*
134  * Newer kernels have a div_u64() function but we define our own
135  * to simplify portibility between kernel versions.
136  */
137 static inline uint64_t
138 __div_u64(uint64_t u, uint32_t v)
139 {
140         (void) do_div(u, v);
141         return u;
142 }
143
144 /*
145  * Implementation of 64-bit unsigned division for 32-bit machines.
146  *
147  * First the procedure takes care of the case in which the divisor is a
148  * 32-bit quantity. There are two subcases: (1) If the left half of the
149  * dividend is less than the divisor, one execution of do_div() is all that
150  * is required (overflow is not possible). (2) Otherwise it does two
151  * divisions, using the grade school method.
152  */
153 uint64_t
154 __udivdi3(uint64_t u, uint64_t v)
155 {
156         uint64_t u0, u1, v1, q0, q1, k;
157         int n;
158
159         if (v >> 32 == 0) {                     // If v < 2**32:
160                 if (u >> 32 < v) {              // If u/v cannot overflow,
161                         return __div_u64(u, v); // just do one division.
162                 } else {                        // If u/v would overflow:
163                         u1 = u >> 32;           // Break u into two halves.
164                         u0 = u & 0xFFFFFFFF;
165                         q1 = __div_u64(u1, v);  // First quotient digit.
166                         k  = u1 - q1 * v;       // First remainder, < v.
167                         u0 += (k << 32);
168                         q0 = __div_u64(u0, v);  // Seconds quotient digit.
169                         return (q1 << 32) + q0;
170                 }
171         } else {                                // If v >= 2**32:
172                 n = nlz64(v);                   // 0 <= n <= 31.
173                 v1 = (v << n) >> 32;            // Normalize divisor, MSB is 1.
174                 u1 = u >> 1;                    // To ensure no overflow.
175                 q1 = __div_u64(u1, v1);         // Get quotient from
176                 q0 = (q1 << n) >> 31;           // Undo normalization and
177                                                 // division of u by 2.
178                 if (q0 != 0)                    // Make q0 correct or
179                         q0 = q0 - 1;            // too small by 1.
180                 if ((u - q0 * v) >= v)
181                         q0 = q0 + 1;            // Now q0 is correct.
182         
183                 return q0;
184         }
185 }
186 EXPORT_SYMBOL(__udivdi3);
187
188 /*
189  * Implementation of 64-bit signed division for 32-bit machines.
190  */
191 int64_t
192 __divdi3(int64_t u, int64_t v)
193 {
194         int64_t q, t;
195         q = __udivdi3(abs64(u), abs64(v));
196         t = (u ^ v) >> 63;      // If u, v have different
197         return (q ^ t) - t;     // signs, negate q.
198 }
199 EXPORT_SYMBOL(__divdi3);
200
201 /*
202  * Implementation of 64-bit unsigned modulo for 32-bit machines.
203  */
204 uint64_t
205 __umoddi3(uint64_t dividend, uint64_t divisor)
206 {
207         return (dividend - (divisor * __udivdi3(dividend, divisor)));
208 }
209 EXPORT_SYMBOL(__umoddi3);
210
211 #endif /* BITS_PER_LONG */
212
213 /* NOTE: The strtoxx behavior is solely based on my reading of the Solaris
214  * ddi_strtol(9F) man page.  I have not verified the behavior of these
215  * functions against their Solaris counterparts.  It is possible that I
216  * may have misinterpreted the man page or the man page is incorrect.
217  */
218 int ddi_strtoul(const char *, char **, int, unsigned long *);
219 int ddi_strtol(const char *, char **, int, long *);
220 int ddi_strtoull(const char *, char **, int, unsigned long long *);
221 int ddi_strtoll(const char *, char **, int, long long *);
222
223 #define define_ddi_strtoux(type, valtype)                               \
224 int ddi_strtou##type(const char *str, char **endptr,                    \
225                      int base, valtype *result)                         \
226 {                                                                       \
227         valtype last_value, value = 0;                                  \
228         char *ptr = (char *)str;                                        \
229         int flag = 1, digit;                                            \
230                                                                         \
231         if (strlen(ptr) == 0)                                           \
232                 return EINVAL;                                          \
233                                                                         \
234         /* Auto-detect base based on prefix */                          \
235         if (!base) {                                                    \
236                 if (str[0] == '0') {                                    \
237                         if (tolower(str[1])=='x' && isxdigit(str[2])) { \
238                                 base = 16; /* hex */                    \
239                                 ptr += 2;                               \
240                         } else if (str[1] >= '0' && str[1] < 8) {       \
241                                 base = 8; /* octal */                   \
242                                 ptr += 1;                               \
243                         } else {                                        \
244                                 return EINVAL;                          \
245                         }                                               \
246                 } else {                                                \
247                         base = 10; /* decimal */                        \
248                 }                                                       \
249         }                                                               \
250                                                                         \
251         while (1) {                                                     \
252                 if (isdigit(*ptr))                                      \
253                         digit = *ptr - '0';                             \
254                 else if (isalpha(*ptr))                                 \
255                         digit = tolower(*ptr) - 'a' + 10;               \
256                 else                                                    \
257                         break;                                          \
258                                                                         \
259                 if (digit >= base)                                      \
260                         break;                                          \
261                                                                         \
262                 last_value = value;                                     \
263                 value = value * base + digit;                           \
264                 if (last_value > value) /* Overflow */                  \
265                         return ERANGE;                                  \
266                                                                         \
267                 flag = 1;                                               \
268                 ptr++;                                                  \
269         }                                                               \
270                                                                         \
271         if (flag)                                                       \
272                 *result = value;                                        \
273                                                                         \
274         if (endptr)                                                     \
275                 *endptr = (char *)(flag ? ptr : str);                   \
276                                                                         \
277         return 0;                                                       \
278 }                                                                       \
279
280 #define define_ddi_strtox(type, valtype)                                \
281 int ddi_strto##type(const char *str, char **endptr,                     \
282                        int base, valtype *result)                       \
283 {                                                                       \
284         int rc;                                                         \
285                                                                         \
286         if (*str == '-') {                                              \
287                 rc = ddi_strtou##type(str + 1, endptr, base, result);   \
288                 if (!rc) {                                              \
289                         if (*endptr == str + 1)                         \
290                                 *endptr = (char *)str;                  \
291                         else                                            \
292                                 *result = -*result;                     \
293                 }                                                       \
294         } else {                                                        \
295                 rc = ddi_strtou##type(str, endptr, base, result);       \
296         }                                                               \
297                                                                         \
298         return rc;                                                      \
299 }
300
301 define_ddi_strtoux(l, unsigned long)
302 define_ddi_strtox(l, long)
303 define_ddi_strtoux(ll, unsigned long long)
304 define_ddi_strtox(ll, long long)
305
306 EXPORT_SYMBOL(ddi_strtoul);
307 EXPORT_SYMBOL(ddi_strtol);
308 EXPORT_SYMBOL(ddi_strtoll);
309 EXPORT_SYMBOL(ddi_strtoull);
310
311 int
312 ddi_copyin(const void *from, void *to, size_t len, int flags)
313 {
314         /* Fake ioctl() issued by kernel, 'from' is a kernel address */
315         if (flags & FKIOCTL) {
316                 memcpy(to, from, len);
317                 return 0;
318         }
319
320         return copyin(from, to, len);
321 }
322 EXPORT_SYMBOL(ddi_copyin);
323
324 int
325 ddi_copyout(const void *from, void *to, size_t len, int flags)
326 {
327         /* Fake ioctl() issued by kernel, 'from' is a kernel address */
328         if (flags & FKIOCTL) {
329                 memcpy(to, from, len);
330                 return 0;
331         }
332
333         return copyout(from, to, len);
334 }
335 EXPORT_SYMBOL(ddi_copyout);
336
337 #ifndef HAVE_PUT_TASK_STRUCT
338 /*
339  * This is only a stub function which should never be used.  The SPL should
340  * never be putting away the last reference on a task structure so this will
341  * not be called.  However, we still need to define it so the module does not
342  * have undefined symbol at load time.  That all said if this impossible
343  * thing does somehow happen PANIC immediately so we know about it.
344  */
345 void
346 __put_task_struct(struct task_struct *t)
347 {
348         PANIC("Unexpectly put last reference on task %d\n", (int)t->pid);
349 }
350 EXPORT_SYMBOL(__put_task_struct);
351 #endif /* HAVE_PUT_TASK_STRUCT */
352
353 struct new_utsname *__utsname(void)
354 {
355 #ifdef HAVE_INIT_UTSNAME
356         return init_utsname();
357 #else
358         return &system_utsname;
359 #endif
360 }
361 EXPORT_SYMBOL(__utsname);
362
363 static int
364 set_hostid(void)
365 {
366         char sh_path[] = "/bin/sh";
367         char *argv[] = { sh_path,
368                          "-c",
369                          "/usr/bin/hostid >/proc/sys/kernel/spl/hostid",
370                          NULL };
371         char *envp[] = { "HOME=/",
372                          "TERM=linux",
373                          "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
374                          NULL };
375         int rc;
376
377         /* Doing address resolution in the kernel is tricky and just
378          * not a good idea in general.  So to set the proper 'hw_serial'
379          * use the usermodehelper support to ask '/bin/sh' to run
380          * '/usr/bin/hostid' and redirect the result to /proc/sys/spl/hostid
381          * for us to use.  It's a horrific solution but it will do for now.
382          */
383         rc = call_usermodehelper(sh_path, argv, envp, 1);
384         if (rc)
385                 printk("SPL: Failed user helper '%s %s %s', rc = %d\n",
386                        argv[0], argv[1], argv[2], rc);
387
388         return rc;
389 }
390
391 uint32_t
392 zone_get_hostid(void *zone)
393 {
394         unsigned long hostid;
395
396         /* Only the global zone is supported */
397         ASSERT(zone == NULL);
398
399         if (ddi_strtoul(hw_serial, NULL, HW_HOSTID_LEN-1, &hostid) != 0)
400                 return HW_INVALID_HOSTID;
401
402         return (uint32_t)hostid;
403 }
404 EXPORT_SYMBOL(zone_get_hostid);
405
406 #ifndef HAVE_KALLSYMS_LOOKUP_NAME
407 /*
408  * Because kallsyms_lookup_name() is no longer exported in the
409  * mainline kernel we are forced to resort to somewhat drastic
410  * measures.  This function replaces the functionality by performing
411  * an upcall to user space where /proc/kallsyms is consulted for
412  * the requested address.
413  */
414 #define GET_KALLSYMS_ADDR_CMD                                           \
415         "gawk '{ if ( $3 == \"kallsyms_lookup_name\") { print $1 } }' " \
416         "/proc/kallsyms >/proc/sys/kernel/spl/kallsyms_lookup_name"
417
418 static int
419 set_kallsyms_lookup_name(void)
420 {
421         char sh_path[] = "/bin/sh";
422         char *argv[] = { sh_path,
423                          "-c",
424                          GET_KALLSYMS_ADDR_CMD,
425                          NULL };
426         char *envp[] = { "HOME=/",
427                          "TERM=linux",
428                          "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
429                          NULL };
430         int rc;
431
432         rc = call_usermodehelper(sh_path, argv, envp, 1);
433         if (rc)
434                 printk("SPL: Failed user helper '%s %s %s', rc = %d\n",
435                        argv[0], argv[1], argv[2], rc);
436
437         return rc;
438 }
439 #endif
440
441 static int
442 __init spl_init(void)
443 {
444         int rc = 0;
445
446         if ((rc = debug_init()))
447                 return rc;
448
449         if ((rc = spl_kmem_init()))
450                 SGOTO(out1, rc);
451
452         if ((rc = spl_mutex_init()))
453                 SGOTO(out2, rc);
454
455         if ((rc = spl_rw_init()))
456                 SGOTO(out3, rc);
457
458         if ((rc = spl_taskq_init()))
459                 SGOTO(out4, rc);
460
461         if ((rc = vn_init()))
462                 SGOTO(out5, rc);
463
464         if ((rc = proc_init()))
465                 SGOTO(out6, rc);
466
467         if ((rc = kstat_init()))
468                 SGOTO(out7, rc);
469
470         if ((rc = set_hostid()))
471                 SGOTO(out8, rc = -EADDRNOTAVAIL);
472
473 #ifndef HAVE_KALLSYMS_LOOKUP_NAME
474         if ((rc = set_kallsyms_lookup_name()))
475                 SGOTO(out8, rc = -EADDRNOTAVAIL);
476 #endif /* HAVE_KALLSYMS_LOOKUP_NAME */
477
478         if ((rc = spl_kmem_init_kallsyms_lookup()))
479                 SGOTO(out8, rc);
480
481         printk("SPL: Loaded Solaris Porting Layer v%s\n", SPL_META_VERSION);
482         SRETURN(rc);
483 out8:
484         kstat_fini();
485 out7:
486         proc_fini();
487 out6:
488         vn_fini();
489 out5:
490         spl_taskq_fini();
491 out4:
492         spl_rw_fini();
493 out3:
494         spl_mutex_fini();
495 out2:
496         spl_kmem_fini();
497 out1:
498         debug_fini();
499
500         printk("SPL: Failed to Load Solaris Porting Layer v%s, "
501                "rc = %d\n", SPL_META_VERSION, rc);
502         return rc;
503 }
504
505 static void
506 spl_fini(void)
507 {
508         SENTRY;
509
510         printk("SPL: Unloaded Solaris Porting Layer v%s\n", SPL_META_VERSION);
511         kstat_fini();
512         proc_fini();
513         vn_fini();
514         spl_taskq_fini();
515         spl_rw_fini();
516         spl_mutex_fini();
517         spl_kmem_fini();
518         debug_fini();
519 }
520
521 /* Called when a dependent module is loaded */
522 void
523 spl_setup(void)
524 {
525         int rc;
526
527         /*
528          * At module load time the pwd is set to '/' on a Solaris system.
529          * On a Linux system will be set to whatever directory the caller
530          * was in when executing insmod/modprobe.
531          */
532         rc = vn_set_pwd("/");
533         if (rc)
534                 printk("SPL: Warning unable to set pwd to '/': %d\n", rc);
535 }
536 EXPORT_SYMBOL(spl_setup);
537
538 /* Called when a dependent module is unloaded */
539 void
540 spl_cleanup(void)
541 {
542 }
543 EXPORT_SYMBOL(spl_cleanup);
544
545 module_init(spl_init);
546 module_exit(spl_fini);
547
548 MODULE_AUTHOR("Lawrence Livermore National Labs");
549 MODULE_DESCRIPTION("Solaris Porting Layer");
550 MODULE_LICENSE("GPL");