]> granicus.if.org Git - zfs/commitdiff
Mark IO pipeline with PF_FSTRANS
authorBrian Behlendorf <behlendorf1@llnl.gov>
Sun, 13 Jul 2014 18:35:19 +0000 (14:35 -0400)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Fri, 16 Jan 2015 22:28:05 +0000 (14:28 -0800)
In order to avoid deadlocking in the IO pipeline it is critical that
pageout be avoided during direct memory reclaim.  This ensures that
the pipeline threads can always make forward progress and never end
up blocking on a DMU transaction.  For this very reason Linux now
provides the PF_FSTRANS flag which may be set in the process context.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
include/sys/zfs_context.h
lib/libzpool/kernel.c
module/zfs/txg.c
module/zfs/vdev_file.c
module/zfs/zio.c
module/zfs/zpl_file.c
module/zfs/zvol.c

index d4c6fb810b5bd6af5b78d9bd427d05f8b0fd1d45..1487a99f4a6447a2d9a090a8cb095b4f407e9d83 100644 (file)
@@ -733,6 +733,11 @@ void ksiddomain_rele(ksiddomain_t *);
                (void) nanosleep(&ts, NULL);                            \
        } while (0)
 
-#endif /* _KERNEL */
+typedef int fstrans_cookie_t;
+
+extern fstrans_cookie_t spl_fstrans_mark(void);
+extern void spl_fstrans_unmark(fstrans_cookie_t);
+extern int spl_fstrans_check(void);
 
+#endif /* _KERNEL */
 #endif /* _SYS_ZFS_CONTEXT_H */
index 995f61d05de2b0c82377b281da97a7255d8d8a4e..db50352c5191a04dd9cfaf77788b29ffa35c96a9 100644 (file)
@@ -1275,3 +1275,20 @@ zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
 {
        return (0);
 }
+
+fstrans_cookie_t
+spl_fstrans_mark(void)
+{
+       return ((fstrans_cookie_t) 0);
+}
+
+void
+spl_fstrans_unmark(fstrans_cookie_t cookie)
+{
+}
+
+int
+spl_fstrans_check(void)
+{
+       return (0);
+}
index 4693762b87192b60a868816f3b85135a56bf2842..81afeb373a490cad7108b396e353ac6f8fc394aa 100644 (file)
@@ -483,15 +483,7 @@ txg_sync_thread(dsl_pool_t *dp)
        vdev_stat_t *vs1, *vs2;
        clock_t start, delta;
 
-#ifdef _KERNEL
-       /*
-        * Annotate this process with a flag that indicates that it is
-        * unsafe to use KM_SLEEP during memory allocations due to the
-        * potential for a deadlock.  KM_PUSHPAGE should be used instead.
-        */
-       current->flags |= PF_NOFS;
-#endif /* _KERNEL */
-
+       (void) spl_fstrans_mark();
        txg_thread_enter(tx, &cpr);
 
        vs1 = kmem_alloc(sizeof (vdev_stat_t), KM_PUSHPAGE);
index 8059cdea4400b57d2da8ece842046d30c4b63fa3..8573a3a6631bba3687c85d8a3dc08afbd4a4aef8 100644 (file)
@@ -161,6 +161,17 @@ vdev_file_io_strategy(void *arg)
        zio_interrupt(zio);
 }
 
+static void
+vdev_file_io_fsync(void *arg)
+{
+       zio_t *zio = (zio_t *)arg;
+       vdev_file_t *vf = zio->io_vd->vdev_tsd;
+
+       zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL);
+
+       zio_interrupt(zio);
+}
+
 static int
 vdev_file_io_start(zio_t *zio)
 {
@@ -180,6 +191,19 @@ vdev_file_io_start(zio_t *zio)
                        if (zfs_nocacheflush)
                                break;
 
+                       /*
+                        * We cannot safely call vfs_fsync() when PF_FSTRANS
+                        * is set in the current context.  Filesystems like
+                        * XFS include sanity checks to verify it is not
+                        * already set, see xfs_vm_writepage().  Therefore
+                        * the sync must be dispatched to a different context.
+                        */
+                       if (spl_fstrans_check()) {
+                               VERIFY3U(taskq_dispatch(vdev_file_taskq,
+                                   vdev_file_io_fsync, zio, TQ_SLEEP), !=, 0);
+                               return (ZIO_PIPELINE_STOP);
+                       }
+
                        zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
                            kcred, NULL);
                        break;
index 9d70b3e5969d7f957fd91aae9c61e0cfc47018b2..7c0e6bf7e8eec0370af24ae4a2c9a79aeb8b9d39 100644 (file)
@@ -1361,7 +1361,11 @@ static zio_pipe_stage_t *zio_pipeline[];
 void
 zio_execute(zio_t *zio)
 {
+       fstrans_cookie_t cookie;
+
+       cookie = spl_fstrans_mark();
        __zio_execute(zio);
+       spl_fstrans_unmark(cookie);
 }
 
 __attribute__((always_inline))
index cabe9bf15b2dc122ff9d50087e2c01ac6caa7a24..61005dcd4ff74685ccae233d94446f977f082d33 100644 (file)
@@ -481,19 +481,14 @@ int
 zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
 {
        struct address_space *mapping = data;
+       fstrans_cookie_t cookie;
 
        ASSERT(PageLocked(pp));
        ASSERT(!PageWriteback(pp));
-       ASSERT(!(current->flags & PF_NOFS));
 
-       /*
-        * Annotate this call path with a flag that indicates that it is
-        * unsafe to use KM_SLEEP during memory allocations due to the
-        * potential for a deadlock.  KM_PUSHPAGE should be used instead.
-        */
-       current->flags |= PF_NOFS;
+       cookie = spl_fstrans_mark();
        (void) zfs_putpage(mapping->host, pp, wbc);
-       current->flags &= ~PF_NOFS;
+       spl_fstrans_unmark(cookie);
 
        return (0);
 }
index fa5c7eb4e2c88a8cd0e484ad63db0c31005a15dc..ddaf520a2c772110c312de1fba783eba681e7722 100644 (file)
@@ -577,20 +577,13 @@ zvol_write(void *arg)
        struct request *req = (struct request *)arg;
        struct request_queue *q = req->q;
        zvol_state_t *zv = q->queuedata;
+       fstrans_cookie_t cookie = spl_fstrans_mark();
        uint64_t offset = blk_rq_pos(req) << 9;
        uint64_t size = blk_rq_bytes(req);
        int error = 0;
        dmu_tx_t *tx;
        rl_t *rl;
 
-       /*
-        * Annotate this call path with a flag that indicates that it is
-        * unsafe to use KM_SLEEP during memory allocations due to the
-        * potential for a deadlock.  KM_PUSHPAGE should be used instead.
-        */
-       ASSERT(!(current->flags & PF_NOFS));
-       current->flags |= PF_NOFS;
-
        if (req->cmd_flags & VDEV_REQ_FLUSH)
                zil_commit(zv->zv_zilog, ZVOL_OBJ);
 
@@ -598,7 +591,7 @@ zvol_write(void *arg)
         * Some requests are just for flush and nothing else.
         */
        if (size == 0) {
-               blk_end_request(req, 0, size);
+               error = 0;
                goto out;
        }
 
@@ -612,7 +605,6 @@ zvol_write(void *arg)
        if (error) {
                dmu_tx_abort(tx);
                zfs_range_unlock(rl);
-               blk_end_request(req, -error, size);
                goto out;
        }
 
@@ -628,9 +620,9 @@ zvol_write(void *arg)
            zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)
                zil_commit(zv->zv_zilog, ZVOL_OBJ);
 
-       blk_end_request(req, -error, size);
 out:
-       current->flags &= ~PF_NOFS;
+       blk_end_request(req, -error, size);
+       spl_fstrans_unmark(cookie);
 }
 
 #ifdef HAVE_BLK_QUEUE_DISCARD
@@ -640,21 +632,14 @@ zvol_discard(void *arg)
        struct request *req = (struct request *)arg;
        struct request_queue *q = req->q;
        zvol_state_t *zv = q->queuedata;
+       fstrans_cookie_t cookie = spl_fstrans_mark();
        uint64_t start = blk_rq_pos(req) << 9;
        uint64_t end = start + blk_rq_bytes(req);
        int error;
        rl_t *rl;
 
-       /*
-        * Annotate this call path with a flag that indicates that it is
-        * unsafe to use KM_SLEEP during memory allocations due to the
-        * potential for a deadlock.  KM_PUSHPAGE should be used instead.
-        */
-       ASSERT(!(current->flags & PF_NOFS));
-       current->flags |= PF_NOFS;
-
        if (end > zv->zv_volsize) {
-               blk_end_request(req, -EIO, blk_rq_bytes(req));
+               error = EIO;
                goto out;
        }
 
@@ -668,7 +653,7 @@ zvol_discard(void *arg)
        end = P2ALIGN(end, zv->zv_volblocksize);
 
        if (start >= end) {
-               blk_end_request(req, 0, blk_rq_bytes(req));
+               error = 0;
                goto out;
        }
 
@@ -681,10 +666,9 @@ zvol_discard(void *arg)
         */
 
        zfs_range_unlock(rl);
-
-       blk_end_request(req, -error, blk_rq_bytes(req));
 out:
-       current->flags &= ~PF_NOFS;
+       blk_end_request(req, -error, blk_rq_bytes(req));
+       spl_fstrans_unmark(cookie);
 }
 #endif /* HAVE_BLK_QUEUE_DISCARD */
 
@@ -700,14 +684,15 @@ zvol_read(void *arg)
        struct request *req = (struct request *)arg;
        struct request_queue *q = req->q;
        zvol_state_t *zv = q->queuedata;
+       fstrans_cookie_t cookie = spl_fstrans_mark();
        uint64_t offset = blk_rq_pos(req) << 9;
        uint64_t size = blk_rq_bytes(req);
        int error;
        rl_t *rl;
 
        if (size == 0) {
-               blk_end_request(req, 0, size);
-               return;
+               error = 0;
+               goto out;
        }
 
        rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
@@ -720,7 +705,9 @@ zvol_read(void *arg)
        if (error == ECKSUM)
                error = SET_ERROR(EIO);
 
+out:
        blk_end_request(req, -error, size);
+       spl_fstrans_unmark(cookie);
 }
 
 /*