ceph: use copy-from2 op in copy_file_range
authorLuis Henriques <lhenriques@suse.com>
Wed, 8 Jan 2020 10:03:53 +0000 (10:03 +0000)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 27 Jan 2020 15:53:40 +0000 (16:53 +0100)
Instead of using the copy-from operation, switch copy_file_range to the
new copy-from2 operation, which allows to send the truncate_seq and
truncate_size parameters.

If an OSD does not support the copy-from2 operation it will return
-EOPNOTSUPP.  In that case, the kernel client will stop trying to do
remote object copies for this fs client and will always use the generic
VFS copy_file_range.

Signed-off-by: Luis Henriques <lhenriques@suse.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
fs/ceph/file.c
fs/ceph/super.c
fs/ceph/super.h
include/linux/ceph/osd_client.h
include/linux/ceph/rados.h
net/ceph/osd_client.c

index 11929d2..c3b8e8e 100644 (file)
@@ -1974,6 +1974,9 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
        if (ceph_test_mount_opt(src_fsc, NOCOPYFROM))
                return -EOPNOTSUPP;
 
+       if (!src_fsc->have_copy_from2)
+               return -EOPNOTSUPP;
+
        /*
         * Striped file layouts require that we copy partial objects, but the
         * OSD copy-from operation only supports full-object copies.  Limit
@@ -2101,8 +2104,14 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
                        CEPH_OSD_OP_FLAG_FADVISE_NOCACHE,
                        &dst_oid, &dst_oloc,
                        CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
-                       CEPH_OSD_OP_FLAG_FADVISE_DONTNEED, 0);
+                       CEPH_OSD_OP_FLAG_FADVISE_DONTNEED,
+                       dst_ci->i_truncate_seq, dst_ci->i_truncate_size,
+                       CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
                if (err) {
+                       if (err == -EOPNOTSUPP) {
+                               src_fsc->have_copy_from2 = false;
+                               pr_notice("OSDs don't support copy-from2; disabling copy offload\n");
+                       }
                        dout("ceph_osdc_copy_from returned %d\n", err);
                        if (!ret)
                                ret = err;
index 112927d..bfb8aea 100644 (file)
@@ -718,6 +718,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
        fsc->sb = NULL;
        fsc->mount_state = CEPH_MOUNT_MOUNTING;
        fsc->filp_gen = 1;
+       fsc->have_copy_from2 = true;
 
        atomic_long_set(&fsc->writeback_count, 0);
 
index 3bf1a01..1e456a9 100644 (file)
@@ -106,6 +106,8 @@ struct ceph_fs_client {
        unsigned long last_auto_reconnect;
        bool blacklisted;
 
+       bool have_copy_from2;
+
        u32 filp_gen;
        loff_t max_file_size;
 
index eaffbdd..5a62dbd 100644 (file)
@@ -534,6 +534,7 @@ int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
                        struct ceph_object_id *dst_oid,
                        struct ceph_object_locator *dst_oloc,
                        u32 dst_fadvise_flags,
+                       u32 truncate_seq, u64 truncate_size,
                        u8 copy_from_flags);
 
 /* watch/notify */
index 3eb0e55..59bdfd4 100644 (file)
@@ -256,6 +256,7 @@ extern const char *ceph_osd_state_name(int s);
                                                                            \
        /* tiering */                                                       \
        f(COPY_FROM,    __CEPH_OSD_OP(WR, DATA, 26),    "copy-from")        \
+       f(COPY_FROM2,   __CEPH_OSD_OP(WR, DATA, 45),    "copy-from2")       \
        f(COPY_GET_CLASSIC, __CEPH_OSD_OP(RD, DATA, 27), "copy-get-classic") \
        f(UNDIRTY,      __CEPH_OSD_OP(WR, DATA, 28),    "undirty")          \
        f(ISDIRTY,      __CEPH_OSD_OP(RD, DATA, 29),    "isdirty")          \
@@ -446,6 +447,7 @@ enum {
        CEPH_OSD_COPY_FROM_FLAG_MAP_SNAP_CLONE = 8, /* map snap direct to
                                                     * cloneid */
        CEPH_OSD_COPY_FROM_FLAG_RWORDERED = 16,     /* order with write */
+       CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ = 32,  /* send truncate_{seq,size} */
 };
 
 enum {
index ba45b07..b68b376 100644 (file)
@@ -402,7 +402,7 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
        case CEPH_OSD_OP_LIST_WATCHERS:
                ceph_osd_data_release(&op->list_watchers.response_data);
                break;
-       case CEPH_OSD_OP_COPY_FROM:
+       case CEPH_OSD_OP_COPY_FROM2:
                ceph_osd_data_release(&op->copy_from.osd_data);
                break;
        default:
@@ -697,7 +697,7 @@ static void get_num_data_items(struct ceph_osd_request *req,
                case CEPH_OSD_OP_SETXATTR:
                case CEPH_OSD_OP_CMPXATTR:
                case CEPH_OSD_OP_NOTIFY_ACK:
-               case CEPH_OSD_OP_COPY_FROM:
+               case CEPH_OSD_OP_COPY_FROM2:
                        *num_request_data_items += 1;
                        break;
 
@@ -1029,7 +1029,7 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
        case CEPH_OSD_OP_CREATE:
        case CEPH_OSD_OP_DELETE:
                break;
-       case CEPH_OSD_OP_COPY_FROM:
+       case CEPH_OSD_OP_COPY_FROM2:
                dst->copy_from.snapid = cpu_to_le64(src->copy_from.snapid);
                dst->copy_from.src_version =
                        cpu_to_le64(src->copy_from.src_version);
@@ -1966,7 +1966,7 @@ static void setup_request_data(struct ceph_osd_request *req)
                        ceph_osdc_msg_data_add(request_msg,
                                               &op->notify_ack.request_data);
                        break;
-               case CEPH_OSD_OP_COPY_FROM:
+               case CEPH_OSD_OP_COPY_FROM2:
                        ceph_osdc_msg_data_add(request_msg,
                                               &op->copy_from.osd_data);
                        break;
@@ -5315,6 +5315,7 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
                                     struct ceph_object_locator *src_oloc,
                                     u32 src_fadvise_flags,
                                     u32 dst_fadvise_flags,
+                                    u32 truncate_seq, u64 truncate_size,
                                     u8 copy_from_flags)
 {
        struct ceph_osd_req_op *op;
@@ -5325,7 +5326,8 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
        if (IS_ERR(pages))
                return PTR_ERR(pages);
 
-       op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM, dst_fadvise_flags);
+       op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM2,
+                             dst_fadvise_flags);
        op->copy_from.snapid = src_snapid;
        op->copy_from.src_version = src_version;
        op->copy_from.flags = copy_from_flags;
@@ -5335,6 +5337,8 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
        end = p + PAGE_SIZE;
        ceph_encode_string(&p, end, src_oid->name, src_oid->name_len);
        encode_oloc(&p, end, src_oloc);
+       ceph_encode_32(&p, truncate_seq);
+       ceph_encode_64(&p, truncate_size);
        op->indata_len = PAGE_SIZE - (end - p);
 
        ceph_osd_data_pages_init(&op->copy_from.osd_data, pages,
@@ -5350,6 +5354,7 @@ int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
                        struct ceph_object_id *dst_oid,
                        struct ceph_object_locator *dst_oloc,
                        u32 dst_fadvise_flags,
+                       u32 truncate_seq, u64 truncate_size,
                        u8 copy_from_flags)
 {
        struct ceph_osd_request *req;
@@ -5366,7 +5371,8 @@ int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
 
        ret = osd_req_op_copy_from_init(req, src_snapid, src_version, src_oid,
                                        src_oloc, src_fadvise_flags,
-                                       dst_fadvise_flags, copy_from_flags);
+                                       dst_fadvise_flags, truncate_seq,
+                                       truncate_size, copy_from_flags);
        if (ret)
                goto out;