splice.c revision f011531e61ae750cdf82074e0dea1379b07fa239
1/* 2 * splice engine 3 * 4 * IO engine that transfers data by doing splices to/from pipes and 5 * the files. 6 * 7 */ 8#include <stdio.h> 9#include <stdlib.h> 10#include <unistd.h> 11#include <errno.h> 12#include <assert.h> 13#include <sys/poll.h> 14#include <sys/mman.h> 15 16#include "../fio.h" 17 18#ifdef FIO_HAVE_SPLICE 19 20struct spliceio_data { 21 int pipe[2]; 22 int vmsplice_to_user; 23 int vmsplice_to_user_map; 24}; 25 26/* 27 * vmsplice didn't use to support splicing to user space, this is the old 28 * variant of getting that job done. Doesn't make a lot of sense, but it 29 * uses splices to move data from the source into a pipe. 30 */ 31static int fio_splice_read_old(struct thread_data *td, struct io_u *io_u) 32{ 33 struct spliceio_data *sd = td->io_ops->data; 34 struct fio_file *f = io_u->file; 35 int ret, ret2, buflen; 36 off_t offset; 37 void *p; 38 39 offset = io_u->offset; 40 buflen = io_u->xfer_buflen; 41 p = io_u->xfer_buf; 42 while (buflen) { 43 int this_len = buflen; 44 45 if (this_len > SPLICE_DEF_SIZE) 46 this_len = SPLICE_DEF_SIZE; 47 48 ret = splice(f->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE); 49 if (ret < 0) { 50 if (errno == ENODATA || errno == EAGAIN) 51 continue; 52 53 return -errno; 54 } 55 56 buflen -= ret; 57 58 while (ret) { 59 ret2 = read(sd->pipe[0], p, ret); 60 if (ret2 < 0) 61 return -errno; 62 63 ret -= ret2; 64 p += ret2; 65 } 66 } 67 68 return io_u->xfer_buflen; 69} 70 71/* 72 * We can now vmsplice into userspace, so do the transfer by splicing into 73 * a pipe and vmsplicing that into userspace. 74 */ 75static int fio_splice_read(struct thread_data *td, struct io_u *io_u) 76{ 77 struct spliceio_data *sd = td->io_ops->data; 78 struct fio_file *f = io_u->file; 79 struct iovec iov; 80 int ret , buflen, mmap_len; 81 off_t offset; 82 void *p, *map; 83 84 ret = 0; 85 offset = io_u->offset; 86 mmap_len = buflen = io_u->xfer_buflen; 87 88 if (sd->vmsplice_to_user_map) { 89 map = mmap(io_u->xfer_buf, buflen, PROT_READ, MAP_PRIVATE|OS_MAP_ANON, 0, 0); 90 if (map == MAP_FAILED) { 91 td_verror(td, errno, "mmap io_u"); 92 return -1; 93 } 94 95 p = map; 96 } else { 97 map = NULL; 98 p = io_u->xfer_buf; 99 } 100 101 while (buflen) { 102 int this_len = buflen; 103 int flags = 0; 104 105 if (this_len > SPLICE_DEF_SIZE) { 106 this_len = SPLICE_DEF_SIZE; 107 flags = SPLICE_F_MORE; 108 } 109 110 ret = splice(f->fd, &offset, sd->pipe[1], NULL, this_len,flags); 111 if (ret < 0) { 112 if (errno == ENODATA || errno == EAGAIN) 113 continue; 114 115 td_verror(td, errno, "splice-from-fd"); 116 break; 117 } 118 119 buflen -= ret; 120 iov.iov_base = p; 121 iov.iov_len = ret; 122 123 while (iov.iov_len) { 124 ret = vmsplice(sd->pipe[0], &iov, 1, SPLICE_F_MOVE); 125 if (ret < 0) { 126 if (errno == EFAULT && 127 sd->vmsplice_to_user_map) { 128 sd->vmsplice_to_user_map = 0; 129 munmap(map, mmap_len); 130 map = NULL; 131 p = io_u->xfer_buf; 132 iov.iov_base = p; 133 continue; 134 } 135 if (errno == EBADF) { 136 ret = -EBADF; 137 break; 138 } 139 td_verror(td, errno, "vmsplice"); 140 break; 141 } else if (!ret) { 142 td_verror(td, ENODATA, "vmsplice"); 143 ret = -1; 144 break; 145 } 146 147 iov.iov_len -= ret; 148 iov.iov_base += ret; 149 p += ret; 150 } 151 if (ret < 0) 152 break; 153 } 154 155 if (sd->vmsplice_to_user_map && munmap(map, mmap_len) < 0) { 156 td_verror(td, errno, "munnap io_u"); 157 return -1; 158 } 159 if (ret < 0) 160 return ret; 161 162 return io_u->xfer_buflen; 163} 164 165/* 166 * For splice writing, we can vmsplice our data buffer directly into a 167 * pipe and then splice that to a file. 168 */ 169static int fio_splice_write(struct thread_data *td, struct io_u *io_u) 170{ 171 struct spliceio_data *sd = td->io_ops->data; 172 struct iovec iov = { 173 .iov_base = io_u->xfer_buf, 174 .iov_len = io_u->xfer_buflen, 175 }; 176 struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, }; 177 struct fio_file *f = io_u->file; 178 off_t off = io_u->offset; 179 int ret, ret2; 180 181 while (iov.iov_len) { 182 if (poll(&pfd, 1, -1) < 0) 183 return errno; 184 185 ret = vmsplice(sd->pipe[1], &iov, 1, SPLICE_F_NONBLOCK); 186 if (ret < 0) 187 return -errno; 188 189 iov.iov_len -= ret; 190 iov.iov_base += ret; 191 192 while (ret) { 193 ret2 = splice(sd->pipe[0], NULL, f->fd, &off, ret, 0); 194 if (ret2 < 0) 195 return -errno; 196 197 ret -= ret2; 198 } 199 } 200 201 return io_u->xfer_buflen; 202} 203 204static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u) 205{ 206 struct spliceio_data *sd = td->io_ops->data; 207 int uninitialized_var(ret); 208 209 fio_ro_check(td, io_u); 210 211 if (io_u->ddir == DDIR_READ) { 212 if (sd->vmsplice_to_user) { 213 ret = fio_splice_read(td, io_u); 214 /* 215 * This kernel doesn't support vmsplice to user 216 * space. Reset the vmsplice_to_user flag, so that 217 * we retry below and don't hit this path again. 218 */ 219 if (ret == -EBADF) 220 sd->vmsplice_to_user = 0; 221 } 222 if (!sd->vmsplice_to_user) 223 ret = fio_splice_read_old(td, io_u); 224 } else if (io_u->ddir == DDIR_WRITE) 225 ret = fio_splice_write(td, io_u); 226 else 227 ret = do_io_u_sync(td, io_u); 228 229 if (ret != (int) io_u->xfer_buflen) { 230 if (ret >= 0) { 231 io_u->resid = io_u->xfer_buflen - ret; 232 io_u->error = 0; 233 return FIO_Q_COMPLETED; 234 } else 235 io_u->error = errno; 236 } 237 238 if (io_u->error) { 239 td_verror(td, io_u->error, "xfer"); 240 if (io_u->error == EINVAL) 241 log_err("fio: looks like splice doesn't work on this" 242 " file system\n"); 243 } 244 245 return FIO_Q_COMPLETED; 246} 247 248static void fio_spliceio_cleanup(struct thread_data *td) 249{ 250 struct spliceio_data *sd = td->io_ops->data; 251 252 if (sd) { 253 close(sd->pipe[0]); 254 close(sd->pipe[1]); 255 free(sd); 256 } 257} 258 259static int fio_spliceio_init(struct thread_data *td) 260{ 261 struct spliceio_data *sd = malloc(sizeof(*sd)); 262 263 if (pipe(sd->pipe) < 0) { 264 td_verror(td, errno, "pipe"); 265 free(sd); 266 return 1; 267 } 268 269 /* 270 * Assume this work, we'll reset this if it doesn't 271 */ 272 sd->vmsplice_to_user = 1; 273 274 /* 275 * Works with "real" vmsplice to user, eg mapping pages directly. 276 * Reset if we fail. 277 */ 278 sd->vmsplice_to_user_map = 1; 279 280 /* 281 * And if vmsplice_to_user works, we definitely need aligned 282 * buffers. Just set ->odirect to force that. 283 */ 284 if (td_read(td)) 285 td->o.odirect = 1; 286 287 td->io_ops->data = sd; 288 return 0; 289} 290 291static struct ioengine_ops ioengine = { 292 .name = "splice", 293 .version = FIO_IOOPS_VERSION, 294 .init = fio_spliceio_init, 295 .queue = fio_spliceio_queue, 296 .cleanup = fio_spliceio_cleanup, 297 .open_file = generic_open_file, 298 .close_file = generic_close_file, 299 .get_file_size = generic_get_file_size, 300 .flags = FIO_SYNCIO | FIO_PIPEIO, 301}; 302 303#else /* FIO_HAVE_SPLICE */ 304 305/* 306 * When we have a proper configure system in place, we simply wont build 307 * and install this io engine. For now install a crippled version that 308 * just complains and fails to load. 309 */ 310static int fio_spliceio_init(struct thread_data fio_unused *td) 311{ 312 fprintf(stderr, "fio: splice not available\n"); 313 return 1; 314} 315 316static struct ioengine_ops ioengine = { 317 .name = "splice", 318 .version = FIO_IOOPS_VERSION, 319 .init = fio_spliceio_init, 320}; 321 322#endif 323 324static void fio_init fio_spliceio_register(void) 325{ 326 register_ioengine(&ioengine); 327} 328 329static void fio_exit fio_spliceio_unregister(void) 330{ 331 unregister_ioengine(&ioengine); 332} 333