iomap: Allow filesystem to call iomap_dio_complete without i_rwsem
authorChristoph Hellwig <hch@lst.de>
Mon, 28 Sep 2020 15:51:08 +0000 (08:51 -0700)
committerDarrick J. Wong <darrick.wong@oracle.com>
Mon, 28 Sep 2020 15:51:08 +0000 (08:51 -0700)
This is to avoid the deadlock caused in btrfs because of O_DIRECT |
O_DSYNC.

Filesystems such as btrfs require i_rwsem while performing sync on a
file. iomap_dio_rw() is called under i_rw_sem. This leads to a
deadlock because of:

iomap_dio_complete()
  generic_write_sync()
    btrfs_sync_file()

Separate out iomap_dio_complete() from iomap_dio_rw(), so filesystems
can call iomap_dio_complete() after unlocking i_rwsem.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
fs/iomap/direct-io.c
include/linux/iomap.h

index 024d4bb..b005ef5 100644 (file)
@@ -76,7 +76,7 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
                dio->submit.cookie = submit_bio(bio);
 }
 
-static ssize_t iomap_dio_complete(struct iomap_dio *dio)
+ssize_t iomap_dio_complete(struct iomap_dio *dio)
 {
        const struct iomap_dio_ops *dops = dio->dops;
        struct kiocb *iocb = dio->iocb;
@@ -130,6 +130,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 
        return ret;
 }
+EXPORT_SYMBOL_GPL(iomap_dio_complete);
 
 static void iomap_dio_complete_work(struct work_struct *work)
 {
@@ -416,8 +417,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
  * Returns -ENOTBLK In case of a page invalidation invalidation failure for
  * writes.  The callers needs to fall back to buffered I/O in this case.
  */
-ssize_t
-iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+struct iomap_dio *
+__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
                bool wait_for_completion)
 {
@@ -431,14 +432,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        struct iomap_dio *dio;
 
        if (!count)
-               return 0;
+               return NULL;
 
        if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion))
-               return -EIO;
+               return ERR_PTR(-EIO);
 
        dio = kmalloc(sizeof(*dio), GFP_KERNEL);
        if (!dio)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        dio->iocb = iocb;
        atomic_set(&dio->ref, 1);
@@ -568,7 +569,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        dio->wait_for_completion = wait_for_completion;
        if (!atomic_dec_and_test(&dio->ref)) {
                if (!wait_for_completion)
-                       return -EIOCBQUEUED;
+                       return ERR_PTR(-EIOCBQUEUED);
 
                for (;;) {
                        set_current_state(TASK_UNINTERRUPTIBLE);
@@ -584,10 +585,26 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                __set_current_state(TASK_RUNNING);
        }
 
-       return iomap_dio_complete(dio);
+       return dio;
 
 out_free_dio:
        kfree(dio);
-       return ret;
+       if (ret)
+               return ERR_PTR(ret);
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(__iomap_dio_rw);
+
+ssize_t
+iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+               const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
+               bool wait_for_completion)
+{
+       struct iomap_dio *dio;
+
+       dio = __iomap_dio_rw(iocb, iter, ops, dops, wait_for_completion);
+       if (IS_ERR_OR_NULL(dio))
+               return PTR_ERR_OR_ZERO(dio);
+       return iomap_dio_complete(dio);
 }
 EXPORT_SYMBOL_GPL(iomap_dio_rw);
index 4d1d3c3..172b339 100644 (file)
@@ -13,6 +13,7 @@
 struct address_space;
 struct fiemap_extent_info;
 struct inode;
+struct iomap_dio;
 struct iomap_writepage_ctx;
 struct iov_iter;
 struct kiocb;
@@ -258,6 +259,10 @@ struct iomap_dio_ops {
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
                bool wait_for_completion);
+struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+               const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
+               bool wait_for_completion);
+ssize_t iomap_dio_complete(struct iomap_dio *dio);
 int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
 
 #ifdef CONFIG_SWAP