]> granicus.if.org Git - spl/commitdiff
Additional Linux VM integration
authorBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 5 Feb 2009 20:26:34 +0000 (12:26 -0800)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 5 Feb 2009 20:26:34 +0000 (12:26 -0800)
Added support for Solaris swapfs_minfree, and swapfs_reserve tunables.
In additional availrmem is now available and return a reasonable value
which is reasonably analogous to the Solaris meaning.  On linux we
return the sun of free and inactive pages since these are all easily
reclaimable.

All tunables are available in /proc/sys/kernel/spl/vm/* and they may
need a little adjusting once we observe the real behavior.  Some of
the defaults are mapped to similar linux counterparts, others are
straight from the OpenSolaris defaults.

config/spl-build.m4
configure
configure.ac
include/sys/vmsystm.h
module/spl/spl-kmem.c
module/spl/spl-proc.c
spl_config.h.in

index b7aa024e436b1e5d48a075df520e75e4e319b9fc..5ee615bf8dbd50eaa53a29fb78accf02f7146b18 100644 (file)
@@ -707,7 +707,7 @@ AC_DEFUN([SPL_AC_3ARGS_ON_EACH_CPU], [
 ])
 
 dnl #
-dnl # Distro specific first_online_pgdat symbol export.
+dnl # Distro specific first_online_pgdat() symbol export.
 dnl #
 AC_DEFUN([SPL_AC_FIRST_ONLINE_PGDAT], [
        SPL_CHECK_SYMBOL_EXPORT(
@@ -719,7 +719,7 @@ AC_DEFUN([SPL_AC_FIRST_ONLINE_PGDAT], [
 ])
 
 dnl #
-dnl # Distro specific next_online_pgdat symbol export.
+dnl # Distro specific next_online_pgdat() symbol export.
 dnl #
 AC_DEFUN([SPL_AC_NEXT_ONLINE_PGDAT], [
        SPL_CHECK_SYMBOL_EXPORT(
@@ -731,7 +731,7 @@ AC_DEFUN([SPL_AC_NEXT_ONLINE_PGDAT], [
 ])
 
 dnl #
-dnl # Distro specific next_zone symbol export.
+dnl # Distro specific next_zone() symbol export.
 dnl #
 AC_DEFUN([SPL_AC_NEXT_ZONE], [
        SPL_CHECK_SYMBOL_EXPORT(
@@ -741,3 +741,15 @@ AC_DEFUN([SPL_AC_NEXT_ZONE], [
                [next_zone() is available])],
                [])
 ])
+
+dnl #
+dnl # Distro specific get_zone_counts() symbol export.
+dnl #
+AC_DEFUN([SPL_AC_GET_ZONE_COUNTS], [
+       SPL_CHECK_SYMBOL_EXPORT(
+               [get_zone_counts],
+               [],
+               [AC_DEFINE(HAVE_GET_ZONE_COUNTS, 1,
+               [get_zone_counts() is available])],
+               [])
+])
index 7e59d67fdd89e9435e8684daf779c92e49e2eddf..21e222de479e7b4f60a86aa1e0a5a2746f4d372a 100755 (executable)
--- a/configure
+++ b/configure
@@ -20598,6 +20598,45 @@ _ACEOF
 
 
 
+       echo "$as_me:$LINENO: checking whether symbol get_zone_counts is exported" >&5
+echo $ECHO_N "checking whether symbol get_zone_counts is exported... $ECHO_C" >&6
+       grep -q -E '[[:space:]]get_zone_counts[[:space:]]' $LINUX/Module.symvers 2>/dev/null
+       rc=$?
+       if test $rc -ne 0; then
+               export=0
+               for file in ; do
+                       grep -q -E "EXPORT_SYMBOL.*(get_zone_counts)" "$LINUX/$file" 2>/dev/null
+                       rc=$?
+                       if test $rc -eq 0; then
+                               export=1
+                               break;
+                       fi
+               done
+               if test $export -eq 0; then
+                       echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+
+               else
+                       echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_GET_ZONE_COUNTS 1
+_ACEOF
+
+               fi
+       else
+               echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_GET_ZONE_COUNTS 1
+_ACEOF
+
+       fi
+
+
+
 TOPDIR=`/bin/pwd`
 
 # Add "V=1" to KERNELMAKE_PARAMS to enable verbose module build
index bd4f699769457bbbb2afd95425facfeb6ca9895c..259c4e89264e4b34ad6488efbc690ac9bec53ef3 100644 (file)
@@ -72,6 +72,7 @@ SPL_AC_3ARGS_ON_EACH_CPU
 SPL_AC_FIRST_ONLINE_PGDAT
 SPL_AC_NEXT_ONLINE_PGDAT
 SPL_AC_NEXT_ZONE
+SPL_AC_GET_ZONE_COUNTS
 
 TOPDIR=`/bin/pwd`
 
index a6e9e7d7e4c373c24667f0f7fb354e3eb68ef830..75ae8a9914d788704864d5a83ab9a64c5fc03d29 100644 (file)
 
 #define physmem                                num_physpages
 #define freemem                                nr_free_pages()
+#define availrmem                      spl_kmem_availrmem()
 
 extern pgcnt_t minfree;                        /* Sum of zone->pages_min */
 extern pgcnt_t desfree;                        /* Sum of zone->pages_low */
 extern pgcnt_t lotsfree;               /* Sum of zone->pages_high */
-extern pgcnt_t needfree;               /* Always 0 */
-extern pgcnt_t swapfs_minfree;
-extern pgcnt_t swapfs_desfree;
-extern pgcnt_t swapfs_reserve;
-extern pgcnt_t availrmem;
+extern pgcnt_t needfree;               /* Always 0 unused in new Solaris */
+extern pgcnt_t swapfs_minfree;         /* Solaris default value */
+extern pgcnt_t swapfs_reserve;         /* Solaris default value */
 
 extern vmem_t *heap_arena;             /* primary kernel heap arena */
 extern vmem_t *zio_alloc_arena;                /* arena for zio caches */
@@ -62,15 +61,8 @@ extern vmem_t *zio_arena;            /* arena for allocating zio memory */
 #define VMEM_ALLOC                     0x01
 #define VMEM_FREE                      0x02
 
-static __inline__ size_t
-vmem_size(vmem_t *vmp, int typemask)
-{
-       /* Arena's unsupported */
-       ASSERT(vmp == NULL);
-       ASSERT(typemask & (VMEM_ALLOC | VMEM_FREE));
-
-       return 0;
-}
+extern pgcnt_t spl_kmem_availrmem(void);
+extern size_t vmem_size(vmem_t *vmp, int typemask);
 
 #define xcopyin(from, to, size)                copy_from_user(to, from, size)
 #define xcopyout(from, to, size)       copy_to_user(to, from, size)
index c39636e06bb934b9de6f08666e093bacfcd1ae41..b5cd9fb1216a53faf98681a9473d887160fff772 100644 (file)
@@ -64,18 +64,12 @@ EXPORT_SYMBOL(lotsfree);
 pgcnt_t needfree = 0;
 EXPORT_SYMBOL(needfree);
 
-pgcnt_t swapfs_desfree = 0;
-EXPORT_SYMBOL(swapfs_desfree);
-
 pgcnt_t swapfs_minfree = 0;
 EXPORT_SYMBOL(swapfs_minfree);
 
 pgcnt_t swapfs_reserve = 0;
 EXPORT_SYMBOL(swapfs_reserve);
 
-pgcnt_t availrmem = 0;
-EXPORT_SYMBOL(availrmem);
-
 vmem_t *heap_arena = NULL;
 EXPORT_SYMBOL(heap_arena);
 
@@ -86,14 +80,17 @@ vmem_t *zio_arena = NULL;
 EXPORT_SYMBOL(zio_arena);
 
 #ifndef HAVE_FIRST_ONLINE_PGDAT
-struct pglist_data *first_online_pgdat(void)
+struct pglist_data *
+first_online_pgdat(void)
 {
        return NODE_DATA(first_online_node);
 }
+EXPORT_SYMBOL(first_online_pgdat);
 #endif /* HAVE_FIRST_ONLINE_PGDAT */
 
 #ifndef HAVE_NEXT_ONLINE_PGDAT
-struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
+struct pglist_data *
+next_online_pgdat(struct pglist_data *pgdat)
 {
        int nid = next_online_node(pgdat->node_id);
 
@@ -102,10 +99,12 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
 
        return NODE_DATA(nid);
 }
+EXPORT_SYMBOL(next_online_pgdat);
 #endif /* HAVE_NEXT_ONLINE_PGDAT */
 
 #ifndef HAVE_NEXT_ZONE
-struct zone *next_zone(struct zone *zone)
+struct zone *
+next_zone(struct zone *zone)
 {
        pg_data_t *pgdat = zone->zone_pgdat;
 
@@ -120,8 +119,73 @@ struct zone *next_zone(struct zone *zone)
        }
        return zone;
 }
+EXPORT_SYMBOL(next_zone);
 #endif /* HAVE_NEXT_ZONE */
 
+#ifndef HAVE_GET_ZONE_COUNTS
+void
+__get_zone_counts(unsigned long *active, unsigned long *inactive,
+                  unsigned long *free, struct pglist_data *pgdat)
+{
+       struct zone *zones = pgdat->node_zones;
+       int i;
+
+       *active = 0;
+       *inactive = 0;
+       *free = 0;
+       for (i = 0; i < MAX_NR_ZONES; i++) {
+               *active += zones[i].nr_active;
+               *inactive += zones[i].nr_inactive;
+               *free += zones[i].free_pages;
+       }
+}
+
+void
+get_zone_counts(unsigned long *active, unsigned long *inactive,
+                unsigned long *free)
+{
+       struct pglist_data *pgdat;
+
+       *active = 0;
+       *inactive = 0;
+       *free = 0;
+       for_each_online_pgdat(pgdat) {
+               unsigned long l, m, n;
+               __get_zone_counts(&l, &m, &n, pgdat);
+               *active += l;
+               *inactive += m;
+               *free += n;
+       }
+}
+EXPORT_SYMBOL(get_zone_counts);
+#endif /* HAVE_GET_ZONE_COUNTS */
+
+pgcnt_t
+spl_kmem_availrmem(void)
+{
+       unsigned long active;
+       unsigned long inactive;
+       unsigned long free;
+
+       get_zone_counts(&active, &inactive, &free);
+
+       /* The amount of easily available memory */
+       return free + inactive;
+}
+EXPORT_SYMBOL(spl_kmem_availrmem);
+
+size_t
+vmem_size(vmem_t *vmp, int typemask)
+{
+       /* Arena's unsupported */
+       ASSERT(vmp == NULL);
+       ASSERT(typemask & (VMEM_ALLOC | VMEM_FREE));
+
+       return 0;
+}
+EXPORT_SYMBOL(vmem_size);
+
+
 /*
  * Memory allocation interfaces and debugging for basic kmem_*
  * and vmem_* style memory allocation.  When DEBUG_KMEM is enable
@@ -1707,6 +1771,10 @@ spl_kmem_init_globals(void)
                desfree += zone->pages_low;
                lotsfree += zone->pages_high;
        }
+
+       /* Solaris default values */
+       swapfs_minfree = MAX(2*1024*1024 / PAGE_SIZE, physmem / 8);
+       swapfs_reserve = MIN(4*1024*1024 / PAGE_SIZE, physmem / 16);
 }
 
 int
index 024118a9f9b7dca1c519b8c1500c2cdd60921b7c..1ae1c129ae51fd01306e33def504b64dfa7d3663 100644 (file)
@@ -91,9 +91,10 @@ struct proc_dir_entry *proc_spl_kstat = NULL;
 #define CTL_VM_LOTSFREE                CTL_UNNUMBERED /* Lots of free memory */
 #define CTL_VM_NEEDFREE                CTL_UNNUMBERED /* Need free memory */
 #define CTL_VM_SWAPFS_MINFREE  CTL_UNNUMBERED /* Minimum swapfs memory */
-#define CTL_VM_SWAPFS_DESFREE  CTL_UNNUMBERED /* Desired swapfs memory */
 #define CTL_VM_SWAPFS_RESERVE  CTL_UNNUMBERED /* Reserved swapfs memory */
-#define CTL_VM_AVAILRMEM       CTL_UNNUMBERED /* Available reserved memory */
+#define CTL_VM_AVAILRMEM       CTL_UNNUMBERED /* Easily available memory */
+#define CTL_VM_FREEMEM         CTL_UNNUMBERED /* Free memory */
+#define CTL_VM_PHYSMEM         CTL_UNNUMBERED /* Total physical memory */
 
 #ifdef DEBUG_KMEM
 #define CTL_KMEM_KMEMUSED      CTL_UNNUMBERED /* Alloc'd kmem bytes */
@@ -145,9 +146,10 @@ enum {
        CTL_VM_LOTSFREE,                /* Lots of free memory threshold */
        CTL_VM_NEEDFREE,                /* Need free memory deficit */
        CTL_VM_SWAPFS_MINFREE,          /* Minimum swapfs memory */
-       CTL_VM_SWAPFS_DESFREE,          /* Desired swapfs memory */
        CTL_VM_SWAPFS_RESERVE,          /* Reserved swapfs memory */
-       CTL_VM_AVAILRMEM,               /* Available reserved memory */
+       CTL_VM_AVAILRMEM,               /* Easily available memory */
+       CTL_VM_FREEMEM,                 /* Free memory */
+       CTL_VM_PHYSMEM,                 /* Total physical memory */
 
 #ifdef DEBUG_KMEM
        CTL_KMEM_KMEMUSED,              /* Alloc'd kmem bytes */
@@ -486,6 +488,58 @@ proc_dohostid(struct ctl_table *table, int write, struct file *filp,
         RETURN(rc);
 }
 
+static int
+proc_doavailrmem(struct ctl_table *table, int write, struct file *filp,
+                 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        int len, rc = 0;
+       char str[32];
+       ENTRY;
+
+        if (write) {
+                *ppos += *lenp;
+        } else {
+               len = snprintf(str, sizeof(str), "%lu", (unsigned long)availrmem);
+               if (*ppos >= len)
+                       rc = 0;
+               else
+                       rc = proc_copyout_string(buffer, *lenp, str + *ppos, "\n");
+
+               if (rc >= 0) {
+                       *lenp = rc;
+                       *ppos += rc;
+               }
+        }
+
+        RETURN(rc);
+}
+
+static int
+proc_dofreemem(struct ctl_table *table, int write, struct file *filp,
+               void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        int len, rc = 0;
+       char str[32];
+       ENTRY;
+
+        if (write) {
+                *ppos += *lenp;
+        } else {
+               len = snprintf(str, sizeof(str), "%lu", (unsigned long)freemem);
+               if (*ppos >= len)
+                       rc = 0;
+               else
+                       rc = proc_copyout_string(buffer, *lenp, str + *ppos, "\n");
+
+               if (rc >= 0) {
+                       *lenp = rc;
+                       *ppos += rc;
+               }
+        }
+
+        RETURN(rc);
+}
+
 #ifdef DEBUG_MUTEX
 static void
 mutex_seq_show_headers(struct seq_file *f)
@@ -832,14 +886,6 @@ static struct ctl_table spl_vm_table[] = {
                 .mode     = 0644,
                 .proc_handler = &proc_dointvec,
         },
-        {
-                .ctl_name = CTL_VM_SWAPFS_DESFREE,
-                .procname = "swapfs_desfree",
-                .data     = &swapfs_desfree,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-        },
         {
                 .ctl_name = CTL_VM_SWAPFS_RESERVE,
                 .procname = "swapfs_reserve",
@@ -851,7 +897,21 @@ static struct ctl_table spl_vm_table[] = {
         {
                 .ctl_name = CTL_VM_AVAILRMEM,
                 .procname = "availrmem",
-                .data     = &availrmem,
+                .mode     = 0444,
+                .proc_handler = &proc_doavailrmem,
+        },
+        {
+                .ctl_name = CTL_VM_FREEMEM,
+                .procname = "freemem",
+                .data     = (void *)2,
+                .maxlen   = sizeof(int),
+                .mode     = 0444,
+                .proc_handler = &proc_dofreemem,
+        },
+        {
+                .ctl_name = CTL_VM_PHYSMEM,
+                .procname = "physmem",
+                .data     = &physmem,
                 .maxlen   = sizeof(int),
                 .mode     = 0444,
                 .proc_handler = &proc_dointvec,
index 2bfb5c48733147154a82ca911bd46f9b828c1ee0..1b7c8abab2e457c1c14fcc89619857ef4e05d6ae 100644 (file)
@@ -51,6 +51,9 @@
 /* fls64() is available */
 #undef HAVE_FLS64
 
+/* get_zone_counts() is available */
+#undef HAVE_GET_ZONE_COUNTS
+
 /* init_utsname() is available */
 #undef HAVE_INIT_UTSNAME