OpenZFS 9337 - zfs get all is slow due to uncached metadata

author Matthew Ahrens <mahrens@delphix.com>

Tue, 10 Jul 2018 17:49:50 +0000 (13:49 -0400)

committer Brian Behlendorf <behlendorf1@llnl.gov>

Thu, 12 Jul 2018 17:49:27 +0000 (10:49 -0700)
author Matthew Ahrens <mahrens@delphix.com>
Tue, 10 Jul 2018 17:49:50 +0000 (13:49 -0400)
committer Brian Behlendorf <behlendorf1@llnl.gov>
Thu, 12 Jul 2018 17:49:27 +0000 (10:49 -0700)
diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h

index 16dc61e7f2c7016407cc4187e581003a675808dc..557d2af6675ae96112b939ada7fdba8e82da1ada 100644 (file)
--- a/include/sys/dbuf.h
+++ b/include/sys/dbuf.h
@@ -84,6 +84,13 @@ typedef enum dbuf_states {
         DB_EVICTING
  } dbuf_states_t;
  
+typedef enum dbuf_cached_state {
+       DB_NO_CACHE = -1,
+       DB_DBUF_CACHE,
+       DB_DBUF_METADATA_CACHE,
+       DB_CACHE_MAX
+} dbuf_cached_state_t;
+
  struct dnode;
  struct dmu_tx;
  
@@ -240,11 +247,12 @@ typedef struct dmu_buf_impl {
          */
         avl_node_t db_link;
  
-       /*
-        * Link in dbuf_cache.
-        */
+       /* Link in dbuf_cache or dbuf_metadata_cache */
         multilist_node_t db_cache_link;
  
+       /* Tells us which dbuf cache this dbuf is in, if any */
+       dbuf_cached_state_t db_caching_status;
+
         /* Data which is unique to data (leaf) blocks: */
  
         /* User callback information. */
@@ -305,7 +313,7 @@ boolean_t dbuf_try_add_ref(dmu_buf_t *db, objset_t *os, uint64_t obj,
  uint64_t dbuf_refcount(dmu_buf_impl_t *db);
  
  void dbuf_rele(dmu_buf_impl_t *db, void *tag);
-void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting);
+void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag);
  
  dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
      uint64_t blkid);
diff --git a/include/sys/dmu.h b/include/sys/dmu.h

index d95c09bb931e09f2bada1691f7269964e0af8492..28756e6f7f815eb7baeb736bd849589957e58eb5 100644 (file)
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -107,7 +107,8 @@ typedef enum dmu_object_byteswap {
  /*
   * Defines a uint8_t object type. Object types specify if the data
   * in the object is metadata (boolean) and how to byteswap the data
- * (dmu_object_byteswap_t).
+ * (dmu_object_byteswap_t). All of the types created by this method
+ * are cached in the dbuf metadata cache.
   */
  #define        DMU_OT(byteswap, metadata, encrypted) \
         (DMU_OT_NEWTYPE | \
@@ -119,6 +120,9 @@ typedef enum dmu_object_byteswap {
         ((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \
         (ot) < DMU_OT_NUMTYPES)
  
+#define        DMU_OT_IS_METADATA_CACHED(ot) (((ot) & DMU_OT_NEWTYPE) ? \
+       B_TRUE : dmu_ot[(ot)].ot_dbuf_metadata_cache)
+
  /*
   * MDB doesn't have dmu_ot; it defines these macros itself.
   */
@@ -883,6 +887,7 @@ typedef void (*const arc_byteswap_func_t)(void *buf, size_t size);
  typedef struct dmu_object_type_info {
         dmu_object_byteswap_t   ot_byteswap;
         boolean_t               ot_metadata;
+       boolean_t               ot_dbuf_metadata_cache;
         boolean_t               ot_encrypt;
         char                    *ot_name;
  } dmu_object_type_info_t;
diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h

index fa032ba2b2cdc8fbd4fc4bbcb09fb1fbcbfa133b..fd5afc0bc2b5fcdc695171e3e94a1e4cb777fd50 100644 (file)
--- a/include/sys/dmu_objset.h
+++ b/include/sys/dmu_objset.h
@@ -38,6 +38,7 @@
  #include <sys/zio.h>
  #include <sys/zil.h>
  #include <sys/sa.h>
+#include <sys/zfs_ioctl.h>
  
  #ifdef __cplusplus
  extern "C" {
@@ -90,6 +91,7 @@ typedef struct objset_phys {
  
  typedef int (*dmu_objset_upgrade_cb_t)(objset_t *);
  
+#define        OBJSET_PROP_UNINITIALIZED       ((uint64_t)-1)
  struct objset {
         /* Immutable: */
         struct dsl_dataset *os_dsl_dataset;
@@ -125,6 +127,16 @@ struct objset {
         zfs_sync_type_t os_sync;
         zfs_redundant_metadata_type_t os_redundant_metadata;
         int os_recordsize;
+       /*
+        * The next four values are used as a cache of whatever's on disk, and
+        * are initialized the first time these properties are queried. Before
+        * being initialized with their real values, their values are
+        * OBJSET_PROP_UNINITIALIZED.
+        */
+       uint64_t os_version;
+       uint64_t os_normalization;
+       uint64_t os_utf8only;
+       uint64_t os_casesensitivity;
  
         /*
          * Pointer is constant; the blkptr it points to is protected by
diff --git a/include/sys/dnode.h b/include/sys/dnode.h

index 0774e663f1b63d3f186eee03fedc8edaa693aa12..6fdde5067b0657b153bd325bd88ca9235ef14869 100644 (file)
--- a/include/sys/dnode.h
+++ b/include/sys/dnode.h
@@ -408,7 +408,7 @@ int dnode_hold_impl(struct objset *dd, uint64_t object, int flag, int dn_slots,
      void *ref, dnode_t **dnp);
  boolean_t dnode_add_ref(dnode_t *dn, void *ref);
  void dnode_rele(dnode_t *dn, void *ref);
-void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting);
+void dnode_rele_and_unlock(dnode_t *dn, void *tag);
  void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
  void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
  void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h

index 317622f7c733e948f7b19efc8f535e94d3bc8cbe..b63ceffac57d890d50fe3bba9d1292e5cc55240f 100644 (file)
--- a/include/sys/zfs_ioctl.h
+++ b/include/sys/zfs_ioctl.h
@@ -488,7 +488,6 @@ extern int zfs_secpolicy_rename_perms(const char *, const char *, cred_t *);
  extern int zfs_secpolicy_destroy_perms(const char *, cred_t *);
  extern void zfs_unmount_snap(const char *);
  extern void zfs_destroy_unmount_origin(const char *);
-extern boolean_t dataset_name_hidden(const char *);
  extern int getzfsvfs_impl(struct objset *, struct zfsvfs **);
  extern int getzfsvfs(const char *, struct zfsvfs **);
  
diff --git a/include/zfs_comutil.h b/include/zfs_comutil.h

index 8cc098ada21742fe1e792f4dae2b84f772c0f4dc..1360d6e1c17131829a9ff1cdb197b82f62434a3f 100644 (file)
--- a/include/zfs_comutil.h
+++ b/include/zfs_comutil.h
@@ -38,6 +38,9 @@ extern void zpool_get_load_policy(nvlist_t *, zpool_load_policy_t *);
  
  extern int zfs_zpl_version_map(int spa_version);
  extern int zfs_spa_version_map(int zpl_version);
+
+extern boolean_t zfs_dataset_name_hidden(const char *);
+
  #define        ZFS_NUM_LEGACY_HISTORY_EVENTS 41
  extern const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS];
  
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5

index a2c32db8d0da4b1f2a0f8da8e0862196eaa77b5f..eae8dc42870f5581c66ce180f613d3a63ec849a9 100644 (file)
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -41,6 +41,21 @@ kstat.
  Default value: \fB0\fR.
  .RE
  
+.sp
+.ne 2
+.na
+\fBdbuf_metadata_cache_max_bytes\fR (ulong)
+.ad
+.RS 12n
+Maximum size in bytes of the metadata dbuf cache.  When \fB0\fR this value will
+default to \fB1/2^dbuf_cache_shift\fR (1/16) of the target ARC size, otherwise
+the provided value in bytes will be used.  The behavior of the metadata dbuf
+cache and its associated settings can be observed via the
+\fB/proc/spl/kstat/zfs/dbufstats\fR kstat.
+.sp
+Default value: \fB0\fR.
+.RE
+
  .sp
  .ne 2
  .na
@@ -77,6 +92,18 @@ of the target arc size.
  Default value: \fB5\fR.
  .RE
  
+.sp
+.ne 2
+.na
+\fBdbuf_metadata_cache_shift\fR (int)
+.ad
+.RS 12n
+Set the size of the dbuf metadata cache, \fBdbuf_metadata_cache_max_bytes\fR,
+to a log2 fraction of the target arc size.
+.sp
+Default value: \fB6\fR.
+.RE
+
  .sp
  .ne 2
  .na
diff --git a/module/zcommon/zfs_comutil.c b/module/zcommon/zfs_comutil.c

index 1f74095cce284939623057b9d15705e83c95b73d..5daa6907c5d0ada506f53e0697018f84ca1dad2d 100644 (file)
--- a/module/zcommon/zfs_comutil.c
+++ b/module/zcommon/zfs_comutil.c
@@ -204,10 +204,28 @@ const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = {
         "pool split",
  };
  
+boolean_t
+zfs_dataset_name_hidden(const char *name)
+{
+       /*
+        * Skip over datasets that are not visible in this zone,
+        * internal datasets (which have a $ in their name), and
+        * temporary datasets (which have a % in their name).
+        */
+       if (strchr(name, '$') != NULL)
+               return (B_TRUE);
+       if (strchr(name, '%') != NULL)
+               return (B_TRUE);
+       if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
+               return (B_TRUE);
+       return (B_FALSE);
+}
+
  #if defined(_KERNEL)
  EXPORT_SYMBOL(zfs_allocatable_devs);
  EXPORT_SYMBOL(zpool_get_load_policy);
  EXPORT_SYMBOL(zfs_zpl_version_map);
  EXPORT_SYMBOL(zfs_spa_version_map);
  EXPORT_SYMBOL(zfs_history_event_names);
+EXPORT_SYMBOL(zfs_dataset_name_hidden);
  #endif
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c

index 49e23e1d78de2f3e6a22b263a9b470ae85f2d9cb..dad090bf9fe26d6057746a6837947d90f0f44712 100644 (file)
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -49,6 +49,7 @@
  #include <sys/abd.h>
  #include <sys/vdev.h>
  #include <sys/cityhash.h>
+#include <sys/spa_impl.h>
  
  kstat_t *dbuf_ksp;
  
@@ -94,6 +95,18 @@ typedef struct dbuf_stats {
          * already created and in the dbuf hash table.
          */
         kstat_named_t hash_insert_race;
+       /*
+        * Statistics about the size of the metadata dbuf cache.
+        */
+       kstat_named_t metadata_cache_count;
+       kstat_named_t metadata_cache_size_bytes;
+       kstat_named_t metadata_cache_size_bytes_max;
+       /*
+        * For diagnostic purposes, this is incremented whenever we can't add
+        * something to the metadata cache because it's full, and instead put
+        * the data in the regular dbuf cache.
+        */
+       kstat_named_t metadata_cache_overflow;
  } dbuf_stats_t;
  
  dbuf_stats_t dbuf_stats = {
@@ -113,7 +126,11 @@ dbuf_stats_t dbuf_stats = {
         { "hash_elements_max",                  KSTAT_DATA_UINT64 },
         { "hash_chains",                        KSTAT_DATA_UINT64 },
         { "hash_chain_max",                     KSTAT_DATA_UINT64 },
-       { "hash_insert_race",                   KSTAT_DATA_UINT64 }
+       { "hash_insert_race",                   KSTAT_DATA_UINT64 },
+       { "metadata_cache_count",               KSTAT_DATA_UINT64 },
+       { "metadata_cache_size_bytes",          KSTAT_DATA_UINT64 },
+       { "metadata_cache_size_bytes_max",      KSTAT_DATA_UINT64 },
+       { "metadata_cache_overflow",            KSTAT_DATA_UINT64 }
  };
  
  #define        DBUF_STAT_INCR(stat, val)       \
@@ -175,24 +192,51 @@ static kcondvar_t dbuf_evict_cv;
  static boolean_t dbuf_evict_thread_exit;
  
  /*
- * LRU cache of dbufs. The dbuf cache maintains a list of dbufs that
- * are not currently held but have been recently released. These dbufs
- * are not eligible for arc eviction until they are aged out of the cache.
- * Dbufs are added to the dbuf cache once the last hold is released. If a
- * dbuf is later accessed and still exists in the dbuf cache, then it will
- * be removed from the cache and later re-added to the head of the cache.
- * Dbufs that are aged out of the cache will be immediately destroyed and
- * become eligible for arc eviction.
+ * There are two dbuf caches; each dbuf can only be in one of them at a time.
+ *
+ * 1. Cache of metadata dbufs, to help make read-heavy administrative commands
+ *    from /sbin/zfs run faster. The "metadata cache" specifically stores dbufs
+ *    that represent the metadata that describes filesystems/snapshots/
+ *    bookmarks/properties/etc. We only evict from this cache when we export a
+ *    pool, to short-circuit as much I/O as possible for all administrative
+ *    commands that need the metadata. There is no eviction policy for this
+ *    cache, because we try to only include types in it which would occupy a
+ *    very small amount of space per object but create a large impact on the
+ *    performance of these commands. Instead, after it reaches a maximum size
+ *    (which should only happen on very small memory systems with a very large
+ *    number of filesystem objects), we stop taking new dbufs into the
+ *    metadata cache, instead putting them in the normal dbuf cache.
+ *
+ * 2. LRU cache of dbufs. The dbuf cache maintains a list of dbufs that
+ *    are not currently held but have been recently released. These dbufs
+ *    are not eligible for arc eviction until they are aged out of the cache.
+ *    Dbufs that are aged out of the cache will be immediately destroyed and
+ *    become eligible for arc eviction.
+ *
+ * Dbufs are added to these caches once the last hold is released. If a dbuf is
+ * later accessed and still exists in the dbuf cache, then it will be removed
+ * from the cache and later re-added to the head of the cache.
+ *
+ * If a given dbuf meets the requirements for the metadata cache, it will go
+ * there, otherwise it will be considered for the generic LRU dbuf cache. The
+ * caches and the refcounts tracking their sizes are stored in an array indexed
+ * by those caches' matching enum values (from dbuf_cached_state_t).
   */
-static multilist_t *dbuf_cache;
-static refcount_t dbuf_cache_size;
-unsigned long dbuf_cache_max_bytes = 0;
+typedef struct dbuf_cache {
+       multilist_t *cache;
+       refcount_t size;
+} dbuf_cache_t;
+dbuf_cache_t dbuf_caches[DB_CACHE_MAX];
  
-/* Set the default size of the dbuf cache to log2 fraction of arc size. */
+/* Size limits for the caches */
+unsigned long dbuf_cache_max_bytes = 0;
+unsigned long dbuf_metadata_cache_max_bytes = 0;
+/* Set the default sizes of the caches to log2 fraction of arc size */
  int dbuf_cache_shift = 5;
+int dbuf_metadata_cache_shift = 6;
  
  /*
- * The dbuf cache uses a three-stage eviction policy:
+ * The LRU dbuf cache uses a three-stage eviction policy:
   *     - A low water marker designates when the dbuf eviction thread
   *     should stop evicting from the dbuf cache.
   *     - When we reach the maximum size (aka mid water mark), we
@@ -381,6 +425,39 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
         return (NULL);
  }
  
+/*
+ * This returns whether this dbuf should be stored in the metadata cache, which
+ * is based on whether it's from one of the dnode types that store data related
+ * to traversing dataset hierarchies.
+ */
+static boolean_t
+dbuf_include_in_metadata_cache(dmu_buf_impl_t *db)
+{
+       DB_DNODE_ENTER(db);
+       dmu_object_type_t type = DB_DNODE(db)->dn_type;
+       DB_DNODE_EXIT(db);
+
+       /* Check if this dbuf is one of the types we care about */
+       if (DMU_OT_IS_METADATA_CACHED(type)) {
+               /* If we hit this, then we set something up wrong in dmu_ot */
+               ASSERT(DMU_OT_IS_METADATA(type));
+
+               /*
+                * Sanity check for small-memory systems: don't allocate too
+                * much memory for this purpose.
+                */
+               if (refcount_count(&dbuf_caches[DB_DBUF_METADATA_CACHE].size) >
+                   dbuf_metadata_cache_max_bytes) {
+                       DBUF_STAT_BUMP(metadata_cache_overflow);
+                       return (B_FALSE);
+               }
+
+               return (B_TRUE);
+       }
+
+       return (B_FALSE);
+}
+
  /*
   * Remove an entry from the hash table.  It must be in the EVICTING state.
   */
@@ -574,13 +651,15 @@ dbuf_cache_lowater_bytes(void)
  static inline boolean_t
  dbuf_cache_above_hiwater(void)
  {
-       return (refcount_count(&dbuf_cache_size) > dbuf_cache_hiwater_bytes());
+       return (refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
+           dbuf_cache_hiwater_bytes());
  }
  
  static inline boolean_t
  dbuf_cache_above_lowater(void)
  {
-       return (refcount_count(&dbuf_cache_size) > dbuf_cache_lowater_bytes());
+       return (refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
+           dbuf_cache_lowater_bytes());
  }
  
  /*
@@ -589,8 +668,9 @@ dbuf_cache_above_lowater(void)
  static void
  dbuf_evict_one(void)
  {
-       int idx = multilist_get_random_index(dbuf_cache);
-       multilist_sublist_t *mls = multilist_sublist_lock(dbuf_cache, idx);
+       int idx = multilist_get_random_index(dbuf_caches[DB_DBUF_CACHE].cache);
+       multilist_sublist_t *mls = multilist_sublist_lock(
+           dbuf_caches[DB_DBUF_CACHE].cache, idx);
  
         ASSERT(!MUTEX_HELD(&dbuf_evict_lock));
  
@@ -605,15 +685,17 @@ dbuf_evict_one(void)
         if (db != NULL) {
                 multilist_sublist_remove(mls, db);
                 multilist_sublist_unlock(mls);
-               (void) refcount_remove_many(&dbuf_cache_size,
+               (void) refcount_remove_many(&dbuf_caches[DB_DBUF_CACHE].size,
                     db->db.db_size, db);
                 DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
                 DBUF_STAT_BUMPDOWN(cache_count);
                 DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
                     db->db.db_size);
+               ASSERT3U(db->db_caching_status, ==, DB_DBUF_CACHE);
+               db->db_caching_status = DB_NO_CACHE;
                 dbuf_destroy(db);
                 DBUF_STAT_MAX(cache_size_bytes_max,
-                   refcount_count(&dbuf_cache_size));
+                   refcount_count(&dbuf_caches[DB_DBUF_CACHE].size));
                 DBUF_STAT_BUMP(cache_total_evicts);
         } else {
                 multilist_sublist_unlock(mls);
@@ -676,7 +758,8 @@ dbuf_evict_notify(void)
          * because it's OK to occasionally make the wrong decision here,
          * and grabbing the lock results in massive lock contention.
          */
-       if (refcount_count(&dbuf_cache_size) > dbuf_cache_target_bytes()) {
+       if (refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
+           dbuf_cache_target_bytes()) {
                 if (dbuf_cache_above_hiwater())
                         dbuf_evict_one();
                 cv_signal(&dbuf_evict_cv);
@@ -691,8 +774,10 @@ dbuf_kstat_update(kstat_t *ksp, int rw)
         if (rw == KSTAT_WRITE) {
                 return (SET_ERROR(EACCES));
         } else {
+               ds->metadata_cache_size_bytes.value.ui64 =
+                   refcount_count(&dbuf_caches[DB_DBUF_METADATA_CACHE].size);
                 ds->cache_size_bytes.value.ui64 =
-                   refcount_count(&dbuf_cache_size);
+                   refcount_count(&dbuf_caches[DB_DBUF_CACHE].size);
                 ds->cache_target_bytes.value.ui64 = dbuf_cache_target_bytes();
                 ds->cache_hiwater_bytes.value.ui64 = dbuf_cache_hiwater_bytes();
                 ds->cache_lowater_bytes.value.ui64 = dbuf_cache_lowater_bytes();
@@ -746,15 +831,21 @@ retry:
         dbuf_stats_init(h);
  
         /*
-        * Setup the parameters for the dbuf cache. We set the size of the
-        * dbuf cache to 1/32nd (default) of the target size of the ARC. If
-        * the value has been specified as a module option and it's not
-        * greater than the target size of the ARC, then we honor that value.
+        * Setup the parameters for the dbuf caches. We set the sizes of the
+        * dbuf cache and the metadata cache to 1/32nd and 1/16th (default)
+        * of the target size of the ARC. If the values has been specified as
+        * a module option and they're not greater than the target size of the
+        * ARC, then we honor that value.
          */
         if (dbuf_cache_max_bytes == 0 ||
             dbuf_cache_max_bytes >= arc_target_bytes()) {
                 dbuf_cache_max_bytes = arc_target_bytes() >> dbuf_cache_shift;
         }
+       if (dbuf_metadata_cache_max_bytes == 0 ||
+           dbuf_metadata_cache_max_bytes >= arc_target_bytes()) {
+               dbuf_metadata_cache_max_bytes =
+                   arc_target_bytes() >> dbuf_metadata_cache_shift;
+       }
  
         /*
          * All entries are queued via taskq_dispatch_ent(), so min/maxalloc
@@ -762,10 +853,13 @@ retry:
          */
         dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0);
  
-       dbuf_cache = multilist_create(sizeof (dmu_buf_impl_t),
-           offsetof(dmu_buf_impl_t, db_cache_link),
-           dbuf_cache_multilist_index_func);
-       refcount_create(&dbuf_cache_size);
+       for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) {
+               dbuf_caches[dcs].cache =
+                   multilist_create(sizeof (dmu_buf_impl_t),
+                   offsetof(dmu_buf_impl_t, db_cache_link),
+                   dbuf_cache_multilist_index_func);
+               refcount_create(&dbuf_caches[dcs].size);
+       }
  
         dbuf_evict_thread_exit = B_FALSE;
         mutex_init(&dbuf_evict_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -827,8 +921,10 @@ dbuf_fini(void)
         mutex_destroy(&dbuf_evict_lock);
         cv_destroy(&dbuf_evict_cv);
  
-       refcount_destroy(&dbuf_cache_size);
-       multilist_destroy(dbuf_cache);
+       for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) {
+               refcount_destroy(&dbuf_caches[dcs].size);
+               multilist_destroy(dbuf_caches[dcs].cache);
+       }
  
         if (dbuf_ksp != NULL) {
                 kstat_delete(dbuf_ksp);
@@ -1116,7 +1212,7 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
                 db->db_state = DB_UNCACHED;
         }
         cv_broadcast(&db->db_changed);
-       dbuf_rele_and_unlock(db, NULL, B_FALSE);
+       dbuf_rele_and_unlock(db, NULL);
  }
  
  
@@ -2430,13 +2526,23 @@ dbuf_destroy(dmu_buf_impl_t *db)
         dbuf_clear_data(db);
  
         if (multilist_link_active(&db->db_cache_link)) {
-               multilist_remove(dbuf_cache, db);
-               (void) refcount_remove_many(&dbuf_cache_size,
+               ASSERT(db->db_caching_status == DB_DBUF_CACHE ||
+                   db->db_caching_status == DB_DBUF_METADATA_CACHE);
+
+               multilist_remove(dbuf_caches[db->db_caching_status].cache, db);
+               (void) refcount_remove_many(
+                   &dbuf_caches[db->db_caching_status].size,
                     db->db.db_size, db);
-               DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
-               DBUF_STAT_BUMPDOWN(cache_count);
-               DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
-                   db->db.db_size);
+
+               if (db->db_caching_status == DB_DBUF_METADATA_CACHE) {
+                       DBUF_STAT_BUMPDOWN(metadata_cache_count);
+               } else {
+                       DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
+                       DBUF_STAT_BUMPDOWN(cache_count);
+                       DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
+                           db->db.db_size);
+               }
+               db->db_caching_status = DB_NO_CACHE;
         }
  
         ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
@@ -2474,7 +2580,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
                  * release any lock.
                  */
                 mutex_enter(&dn->dn_mtx);
-               dnode_rele_and_unlock(dn, db, B_TRUE);
+               dnode_rele_and_unlock(dn, db);
                 db->db_dnode_handle = NULL;
  
                 dbuf_hash_remove(db);
@@ -2491,6 +2597,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
         ASSERT(db->db_hash_next == NULL);
         ASSERT(db->db_blkptr == NULL);
         ASSERT(db->db_data_pending == NULL);
+       ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE);
         ASSERT(!multilist_link_active(&db->db_cache_link));
  
         kmem_cache_free(dbuf_kmem_cache, db);
@@ -2502,7 +2609,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
          */
         if (parent && parent != dndb) {
                 mutex_enter(&parent->db_mtx);
-               dbuf_rele_and_unlock(parent, db, B_TRUE);
+               dbuf_rele_and_unlock(parent, db);
         }
  }
  
@@ -2640,6 +2747,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
                 ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
                 db->db.db_offset = DMU_BONUS_BLKID;
                 db->db_state = DB_UNCACHED;
+               db->db_caching_status = DB_NO_CACHE;
                 /* the bonus dbuf is not placed in the hash table */
                 arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
                 return (db);
@@ -2673,6 +2781,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
         avl_add(&dn->dn_dbufs, db);
  
         db->db_state = DB_UNCACHED;
+       db->db_caching_status = DB_NO_CACHE;
         mutex_exit(&dn->dn_dbufs_mtx);
         arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
  
@@ -3059,13 +3168,25 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
  
         if (multilist_link_active(&dh->dh_db->db_cache_link)) {
                 ASSERT(refcount_is_zero(&dh->dh_db->db_holds));
-               multilist_remove(dbuf_cache, dh->dh_db);
-               (void) refcount_remove_many(&dbuf_cache_size,
+               ASSERT(dh->dh_db->db_caching_status == DB_DBUF_CACHE ||
+                   dh->dh_db->db_caching_status == DB_DBUF_METADATA_CACHE);
+
+               multilist_remove(
+                   dbuf_caches[dh->dh_db->db_caching_status].cache,
+                   dh->dh_db);
+               (void) refcount_remove_many(
+                   &dbuf_caches[dh->dh_db->db_caching_status].size,
                     dh->dh_db->db.db_size, dh->dh_db);
-               DBUF_STAT_BUMPDOWN(cache_levels[dh->dh_db->db_level]);
-               DBUF_STAT_BUMPDOWN(cache_count);
-               DBUF_STAT_DECR(cache_levels_bytes[dh->dh_db->db_level],
-                   dh->dh_db->db.db_size);
+
+               if (dh->dh_db->db_caching_status == DB_DBUF_METADATA_CACHE) {
+                       DBUF_STAT_BUMPDOWN(metadata_cache_count);
+               } else {
+                       DBUF_STAT_BUMPDOWN(cache_levels[dh->dh_db->db_level]);
+                       DBUF_STAT_BUMPDOWN(cache_count);
+                       DBUF_STAT_DECR(cache_levels_bytes[dh->dh_db->db_level],
+                           dh->dh_db->db.db_size);
+               }
+               dh->dh_db->db_caching_status = DB_NO_CACHE;
         }
         (void) refcount_add(&dh->dh_db->db_holds, dh->dh_tag);
         DBUF_VERIFY(dh->dh_db);
@@ -3230,7 +3351,7 @@ void
  dbuf_rele(dmu_buf_impl_t *db, void *tag)
  {
         mutex_enter(&db->db_mtx);
-       dbuf_rele_and_unlock(db, tag, B_FALSE);
+       dbuf_rele_and_unlock(db, tag);
  }
  
  void
@@ -3253,7 +3374,7 @@ dmu_buf_rele(dmu_buf_t *db, void *tag)
   *
   */
  void
-dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
+dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
  {
         int64_t holds;
  
@@ -3343,19 +3464,40 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
                             db->db_pending_evict) {
                                 dbuf_destroy(db);
                         } else if (!multilist_link_active(&db->db_cache_link)) {
-                               multilist_insert(dbuf_cache, db);
-                               (void) refcount_add_many(&dbuf_cache_size,
+                               ASSERT3U(db->db_caching_status, ==,
+                                   DB_NO_CACHE);
+
+                               dbuf_cached_state_t dcs =
+                                   dbuf_include_in_metadata_cache(db) ?
+                                   DB_DBUF_METADATA_CACHE : DB_DBUF_CACHE;
+                               db->db_caching_status = dcs;
+
+                               multilist_insert(dbuf_caches[dcs].cache, db);
+                               (void) refcount_add_many(&dbuf_caches[dcs].size,
                                     db->db.db_size, db);
-                               DBUF_STAT_BUMP(cache_levels[db->db_level]);
-                               DBUF_STAT_BUMP(cache_count);
-                               DBUF_STAT_INCR(cache_levels_bytes[db->db_level],
-                                   db->db.db_size);
-                               DBUF_STAT_MAX(cache_size_bytes_max,
-                                   refcount_count(&dbuf_cache_size));
+
+                               if (dcs == DB_DBUF_METADATA_CACHE) {
+                                       DBUF_STAT_BUMP(metadata_cache_count);
+                                       DBUF_STAT_MAX(
+                                           metadata_cache_size_bytes_max,
+                                           refcount_count(
+                                           &dbuf_caches[dcs].size));
+                               } else {
+                                       DBUF_STAT_BUMP(
+                                           cache_levels[db->db_level]);
+                                       DBUF_STAT_BUMP(cache_count);
+                                       DBUF_STAT_INCR(
+                                           cache_levels_bytes[db->db_level],
+                                           db->db.db_size);
+                                       DBUF_STAT_MAX(cache_size_bytes_max,
+                                           refcount_count(
+                                           &dbuf_caches[dcs].size));
+                               }
                                 mutex_exit(&db->db_mtx);
  
-                               if (!evicting)
+                               if (db->db_caching_status == DB_DBUF_CACHE) {
                                         dbuf_evict_notify();
+                               }
                         }
  
                         if (do_arc_evict)
@@ -3706,7 +3848,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
                 kmem_free(dr, sizeof (dbuf_dirty_record_t));
                 ASSERT(db->db_dirtycnt > 0);
                 db->db_dirtycnt -= 1;
-               dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE);
+               dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
                 return;
         }
  
@@ -4081,7 +4223,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
         ASSERT(db->db_dirtycnt > 0);
         db->db_dirtycnt -= 1;
         db->db_data_pending = NULL;
-       dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE);
+       dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg);
  }
  
  static void
@@ -4445,8 +4587,17 @@ MODULE_PARM_DESC(dbuf_cache_lowater_pct,
         "Percentage below dbuf_cache_max_bytes when the evict thread stops "
         "evicting dbufs.");
  
+module_param(dbuf_metadata_cache_max_bytes, ulong, 0644);
+MODULE_PARM_DESC(dbuf_metadata_cache_max_bytes,
+       "Maximum size in bytes of the dbuf metadata cache.");
+
  module_param(dbuf_cache_shift, int, 0644);
  MODULE_PARM_DESC(dbuf_cache_shift,
         "Set the size of the dbuf cache to a log2 fraction of arc size.");
+
+module_param(dbuf_metadata_cache_shift, int, 0644);
+MODULE_PARM_DESC(dbuf_cache_shift,
+       "Set the size of the dbuf metadata cache to a log2 fraction of "
+       "arc size.");
  /* END CSTYLED */
  #endif
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c

index 1cb967641e70a2f021df1889e2fc647fcad6403e..0d2f03e22c4b8c9ff723af214349cb755d866dfe 100644 (file)
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -81,60 +81,60 @@ int zfs_dmu_offset_next_sync = 0;
  int zfs_object_remap_one_indirect_delay_ticks = 0;
  
  const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
-       { DMU_BSWAP_UINT8,      TRUE,   FALSE,  "unallocated"           },
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "object directory"      },
-       { DMU_BSWAP_UINT64,     TRUE,   FALSE,  "object array"          },
-       { DMU_BSWAP_UINT8,      TRUE,   FALSE,  "packed nvlist"         },
-       { DMU_BSWAP_UINT64,     TRUE,   FALSE,  "packed nvlist size"    },
-       { DMU_BSWAP_UINT64,     TRUE,   FALSE,  "bpobj"                 },
-       { DMU_BSWAP_UINT64,     TRUE,   FALSE,  "bpobj header"          },
-       { DMU_BSWAP_UINT64,     TRUE,   FALSE,  "SPA space map header"  },
-       { DMU_BSWAP_UINT64,     TRUE,   FALSE,  "SPA space map"         },
-       { DMU_BSWAP_UINT64,     TRUE,   TRUE,   "ZIL intent log"        },
-       { DMU_BSWAP_DNODE,      TRUE,   TRUE,   "DMU dnode"             },
-       { DMU_BSWAP_OBJSET,     TRUE,   FALSE,  "DMU objset"            },
-       { DMU_BSWAP_UINT64,     TRUE,   FALSE,  "DSL directory"         },
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "DSL directory child map"},
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "DSL dataset snap map"  },
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "DSL props"             },
-       { DMU_BSWAP_UINT64,     TRUE,   FALSE,  "DSL dataset"           },
-       { DMU_BSWAP_ZNODE,      TRUE,   FALSE,  "ZFS znode"             },
-       { DMU_BSWAP_OLDACL,     TRUE,   TRUE,   "ZFS V0 ACL"            },
-       { DMU_BSWAP_UINT8,      FALSE,  TRUE,   "ZFS plain file"        },
-       { DMU_BSWAP_ZAP,        TRUE,   TRUE,   "ZFS directory"         },
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "ZFS master node"       },
-       { DMU_BSWAP_ZAP,        TRUE,   TRUE,   "ZFS delete queue"      },
-       { DMU_BSWAP_UINT8,      FALSE,  TRUE,   "zvol object"           },
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "zvol prop"             },
-       { DMU_BSWAP_UINT8,      FALSE,  TRUE,   "other uint8[]"         },
-       { DMU_BSWAP_UINT64,     FALSE,  TRUE,   "other uint64[]"        },
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "other ZAP"             },
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "persistent error log"  },
-       { DMU_BSWAP_UINT8,      TRUE,   FALSE,  "SPA history"           },
-       { DMU_BSWAP_UINT64,     TRUE,   FALSE,  "SPA history offsets"   },
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "Pool properties"       },
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "DSL permissions"       },
-       { DMU_BSWAP_ACL,        TRUE,   TRUE,   "ZFS ACL"               },
-       { DMU_BSWAP_UINT8,      TRUE,   TRUE,   "ZFS SYSACL"            },
-       { DMU_BSWAP_UINT8,      TRUE,   TRUE,   "FUID table"            },
-       { DMU_BSWAP_UINT64,     TRUE,   FALSE,  "FUID table size"       },
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "DSL dataset next clones"},
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "scan work queue"       },
-       { DMU_BSWAP_ZAP,        TRUE,   TRUE,   "ZFS user/group/project used" },
-       { DMU_BSWAP_ZAP,        TRUE,   TRUE,   "ZFS user/group/project quota"},
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "snapshot refcount tags"},
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "DDT ZAP algorithm"     },
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "DDT statistics"        },
-       { DMU_BSWAP_UINT8,      TRUE,   TRUE,   "System attributes"     },
-       { DMU_BSWAP_ZAP,        TRUE,   TRUE,   "SA master node"        },
-       { DMU_BSWAP_ZAP,        TRUE,   TRUE,   "SA attr registration"  },
-       { DMU_BSWAP_ZAP,        TRUE,   TRUE,   "SA attr layouts"       },
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "scan translations"     },
-       { DMU_BSWAP_UINT8,      FALSE,  TRUE,   "deduplicated block"    },
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "DSL deadlist map"      },
-       { DMU_BSWAP_UINT64,     TRUE,   FALSE,  "DSL deadlist map hdr"  },
-       { DMU_BSWAP_ZAP,        TRUE,   FALSE,  "DSL dir clones"        },
-       { DMU_BSWAP_UINT64,     TRUE,   FALSE,  "bpobj subobj"          }
+       {DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "unallocated"           },
+       {DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "object directory"      },
+       {DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "object array"          },
+       {DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "packed nvlist"         },
+       {DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "packed nvlist size"    },
+       {DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj"                 },
+       {DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj header"          },
+       {DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA space map header"  },
+       {DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA space map"         },
+       {DMU_BSWAP_UINT64, TRUE,  FALSE, TRUE,  "ZIL intent log"        },
+       {DMU_BSWAP_DNODE,  TRUE,  FALSE, TRUE,  "DMU dnode"             },
+       {DMU_BSWAP_OBJSET, TRUE,  TRUE,  FALSE, "DMU objset"            },
+       {DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL directory"         },
+       {DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL directory child map"},
+       {DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dataset snap map"  },
+       {DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL props"             },
+       {DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL dataset"           },
+       {DMU_BSWAP_ZNODE,  TRUE,  FALSE, FALSE, "ZFS znode"             },
+       {DMU_BSWAP_OLDACL, TRUE,  FALSE, TRUE,  "ZFS V0 ACL"            },
+       {DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "ZFS plain file"        },
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS directory"         },
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "ZFS master node"       },
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS delete queue"      },
+       {DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "zvol object"           },
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "zvol prop"             },
+       {DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "other uint8[]"         },
+       {DMU_BSWAP_UINT64, FALSE, FALSE, TRUE,  "other uint64[]"        },
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "other ZAP"             },
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "persistent error log"  },
+       {DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "SPA history"           },
+       {DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA history offsets"   },
+       {DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "Pool properties"       },
+       {DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL permissions"       },
+       {DMU_BSWAP_ACL,    TRUE,  FALSE, TRUE,  "ZFS ACL"               },
+       {DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,  "ZFS SYSACL"            },
+       {DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,  "FUID table"            },
+       {DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "FUID table size"       },
+       {DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dataset next clones"},
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "scan work queue"       },
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS user/group/project used" },
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS user/group/project quota"},
+       {DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "snapshot refcount tags"},
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "DDT ZAP algorithm"     },
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "DDT statistics"        },
+       {DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,  "System attributes"     },
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "SA master node"        },
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "SA attr registration"  },
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "SA attr layouts"       },
+       {DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "scan translations"     },
+       {DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "deduplicated block"    },
+       {DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL deadlist map"      },
+       {DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL deadlist map hdr"  },
+       {DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dir clones"        },
+       {DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj subobj"          }
  };
  
  const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c

index 07b00ffdf94472cde80b8a37a0c95eb6f80b60a2..5b18ed5cc6b21ea013955cd884fe133ea81703ba 100644 (file)
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -471,6 +471,14 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
                 os->os_phys = os->os_phys_buf->b_data;
                 bzero(os->os_phys, size);
         }
+       /*
+        * These properties will be filled in by the logic in zfs_get_zplprop()
+        * when they are queried for the first time.
+        */
+       os->os_version = OBJSET_PROP_UNINITIALIZED;
+       os->os_normalization = OBJSET_PROP_UNINITIALIZED;
+       os->os_utf8only = OBJSET_PROP_UNINITIALIZED;
+       os->os_casesensitivity = OBJSET_PROP_UNINITIALIZED;
  
         /*
          * Note: the changed_cb will be called once before the register
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c

index fddad607d09d6e06b73a9ba5d290e5e77e886b60..7672a62fa49c41dadf0c08bc25f750b92341ab8d 100644 (file)
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -1574,11 +1574,11 @@ void
  dnode_rele(dnode_t *dn, void *tag)
  {
         mutex_enter(&dn->dn_mtx);
-       dnode_rele_and_unlock(dn, tag, B_FALSE);
+       dnode_rele_and_unlock(dn, tag);
  }
  
  void
-dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting)
+dnode_rele_and_unlock(dnode_t *dn, void *tag)
  {
         uint64_t refs;
         /* Get while the hold prevents the dnode from moving. */
@@ -1610,7 +1610,7 @@ dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting)
                  * asserted anyway when the handle gets destroyed.
                  */
                 mutex_enter(&db->db_mtx);
-               dbuf_rele_and_unlock(db, dnh, evicting);
+               dbuf_rele_and_unlock(db, dnh);
         }
  }
  
diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c

index 830da26f89b2d22c3ce396ac5389dd3eb1ebcc6c..22b401ab5b988ab78b0385a9cdb11386d2c5439f 100644 (file)
--- a/module/zfs/dnode_sync.c
+++ b/module/zfs/dnode_sync.c
@@ -438,7 +438,7 @@ dnode_evict_dbufs(dnode_t *dn)
                          * flow would look like:
                          *
                          * dbuf_destroy():
-                        *   dnode_rele_and_unlock(parent_dbuf, evicting=TRUE):
+                        *   dnode_rele_and_unlock(parent_dbuf):
                          *      if (!cacheable || pending_evict)
                          *        dbuf_destroy()
                          */
@@ -502,7 +502,7 @@ dnode_undirty_dbufs(list_t *list)
                         list_destroy(&dr->dt.di.dr_children);
                 }
                 kmem_free(dr, sizeof (dbuf_dirty_record_t));
-               dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE);
+               dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
         }
  }
  
diff --git a/module/zfs/zcp_iter.c b/module/zfs/zcp_iter.c

index d37172c8872d2c779f7a8ba478980224c21ef379..f264455207115d635922b17a5a830db12f83ebe0 100644 (file)
--- a/module/zfs/zcp_iter.c
+++ b/module/zfs/zcp_iter.c
@@ -33,6 +33,8 @@
  
  #include <sys/zcp.h>
  
+#include "zfs_comutil.h"
+
  typedef int (zcp_list_func_t)(lua_State *);
  typedef struct zcp_list_info {
         const char *name;
@@ -232,20 +234,6 @@ zcp_snapshots_list(lua_State *state)
         return (1);
  }
  
-/*
- * Note: channel programs only run in the global zone, so all datasets
- * are visible to this zone.
- */
-static boolean_t
-dataset_name_hidden(const char *name)
-{
-       if (strchr(name, '$') != NULL)
-               return (B_TRUE);
-       if (strchr(name, '%') != NULL)
-               return (B_TRUE);
-       return (B_FALSE);
-}
-
  static int
  zcp_children_iter(lua_State *state)
  {
@@ -275,7 +263,7 @@ zcp_children_iter(lua_State *state)
         do {
                 err = dmu_dir_list_next(os,
                     sizeof (childname) - (p - childname), p, NULL, &cursor);
-       } while (err == 0 && dataset_name_hidden(childname));
+       } while (err == 0 && zfs_dataset_name_hidden(childname));
         dsl_dataset_rele(ds, FTAG);
  
         if (err == ENOENT) {
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c

index e70207aa50ea4a871b4a5a2e3001b0ae9f77cdc3..911bf884a88b3f7c235ec3e804ef284541ba517b 100644 (file)
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -2252,23 +2252,6 @@ zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
         return (err);
  }
  
-boolean_t
-dataset_name_hidden(const char *name)
-{
-       /*
-        * Skip over datasets that are not visible in this zone,
-        * internal datasets (which have a $ in their name), and
-        * temporary datasets (which have a % in their name).
-        */
-       if (strchr(name, '$') != NULL)
-               return (B_TRUE);
-       if (strchr(name, '%') != NULL)
-               return (B_TRUE);
-       if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
-               return (B_TRUE);
-       return (B_FALSE);
-}
-
  /*
   * inputs:
   * zc_name             name of filesystem
@@ -2308,7 +2291,7 @@ top:
                     NULL, &zc->zc_cookie);
                 if (error == ENOENT)
                         error = SET_ERROR(ESRCH);
-       } while (error == 0 && dataset_name_hidden(zc->zc_name));
+       } while (error == 0 && zfs_dataset_name_hidden(zc->zc_name));
         dmu_objset_rele(os, FTAG);
  
         /*
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c

index b890bbaf942e23ec48819d027b39a042258e27e3..a477c8669b543e0ed0cd8363958fb1110d616b33 100644 (file)
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -2234,6 +2234,7 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
         dmu_tx_commit(tx);
  
         zfsvfs->z_version = newvers;
+       os->os_version = newvers;
  
         zfs_set_fuid_feature(zfsvfs);
  
@@ -2246,13 +2247,42 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
  int
  zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
  {
-       const char *pname;
-       int error = SET_ERROR(ENOENT);
+       uint64_t *cached_copy = NULL;
+
+       /*
+        * Figure out where in the objset_t the cached copy would live, if it
+        * is available for the requested property.
+        */
+       if (os != NULL) {
+               switch (prop) {
+               case ZFS_PROP_VERSION:
+                       cached_copy = &os->os_version;
+                       break;
+               case ZFS_PROP_NORMALIZE:
+                       cached_copy = &os->os_normalization;
+                       break;
+               case ZFS_PROP_UTF8ONLY:
+                       cached_copy = &os->os_utf8only;
+                       break;
+               case ZFS_PROP_CASE:
+                       cached_copy = &os->os_casesensitivity;
+                       break;
+               default:
+                       break;
+               }
+       }
+       if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
+               *value = *cached_copy;
+               return (0);
+       }
  
         /*
-        * Look up the file system's value for the property.  For the
-        * version property, we look up a slightly different string.
+        * If the property wasn't cached, look up the file system's value for
+        * the property. For the version property, we look up a slightly
+        * different string.
          */
+       const char *pname;
+       int error = ENOENT;
         if (prop == ZFS_PROP_VERSION)
                 pname = ZPL_VERSION_STR;
         else
@@ -2284,6 +2314,15 @@ zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
                 }
                 error = 0;
         }
+
+       /*
+        * If one of the methods for getting the property value above worked,
+        * copy it into the objset_t's cache.
+        */
+       if (error == 0 && cached_copy != NULL) {
+               *cached_copy = *value;
+       }
+
         return (error);
  }
  
diff --git a/tests/zfs-tests/tests/functional/arc/dbufstats_001_pos.ksh b/tests/zfs-tests/tests/functional/arc/dbufstats_001_pos.ksh

index 7813d263df529c6c8cd23b54d47673e05964fe4b..5ceff962d4ccf0c755d15b7a0d0ff13364a03f27 100755 (executable)
--- a/tests/zfs-tests/tests/functional/arc/dbufstats_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/arc/dbufstats_001_pos.ksh
@@ -58,7 +58,7 @@ function testdbufstat # stat_name dbufstat_filter
         from_dbufstat=$(grep -w "$name" "$DBUFSTATS_FILE" | awk '{ print $3 }')
         from_dbufs=$(dbufstat.py -bxn -i "$DBUFS_FILE" "$filter" | wc -l)
  
-       within_tolerance $from_dbufstat $from_dbufs 5 \
+       within_tolerance $from_dbufstat $from_dbufs 9 \
             || log_fail "Stat $name exceeded tolerance"
  }
author	Matthew Ahrens <mahrens@delphix.com>
	Tue, 10 Jul 2018 17:49:50 +0000 (13:49 -0400)
committer	Brian Behlendorf <behlendorf1@llnl.gov>
	Thu, 12 Jul 2018 17:49:27 +0000 (10:49 -0700)
include/sys/dbuf.h		patch \| blob \| history
include/sys/dmu.h		patch \| blob \| history
include/sys/dmu_objset.h		patch \| blob \| history
include/sys/dnode.h		patch \| blob \| history
include/sys/zfs_ioctl.h		patch \| blob \| history
include/zfs_comutil.h		patch \| blob \| history
man/man5/zfs-module-parameters.5		patch \| blob \| history
module/zcommon/zfs_comutil.c		patch \| blob \| history
module/zfs/dbuf.c		patch \| blob \| history
module/zfs/dmu.c		patch \| blob \| history
module/zfs/dmu_objset.c		patch \| blob \| history
module/zfs/dnode.c		patch \| blob \| history
module/zfs/dnode_sync.c		patch \| blob \| history
module/zfs/zcp_iter.c		patch \| blob \| history
module/zfs/zfs_ioctl.c		patch \| blob \| history
module/zfs/zfs_vfsops.c		patch \| blob \| history
tests/zfs-tests/tests/functional/arc/dbufstats_001_pos.ksh		patch \| blob \| history