splice.c revision 0aa417a9515f9dc17523c1870f6409370e94ca19
1/* 2 * splice engine 3 * 4 * IO engine that transfers data by doing splices to/from pipes and 5 * the files. 6 * 7 */ 8#include <stdio.h> 9#include <stdlib.h> 10#include <unistd.h> 11#include <errno.h> 12#include <assert.h> 13#include <sys/poll.h> 14#include <sys/mman.h> 15 16#include "../fio.h" 17 18#ifdef FIO_HAVE_SPLICE 19 20struct spliceio_data { 21 int pipe[2]; 22 int vmsplice_to_user; 23 int vmsplice_to_user_map; 24}; 25 26/* 27 * vmsplice didn't use to support splicing to user space, this is the old 28 * variant of getting that job done. Doesn't make a lot of sense, but it 29 * uses splices to move data from the source into a pipe. 30 */ 31static int fio_splice_read_old(struct thread_data *td, struct io_u *io_u) 32{ 33 struct spliceio_data *sd = td->io_ops->data; 34 struct fio_file *f = io_u->file; 35 int ret, ret2, buflen; 36 off_t offset; 37 void *p; 38 39 offset = io_u->offset; 40 buflen = io_u->xfer_buflen; 41 p = io_u->xfer_buf; 42 while (buflen) { 43 int this_len = buflen; 44 45 if (this_len > SPLICE_DEF_SIZE) 46 this_len = SPLICE_DEF_SIZE; 47 48 ret = splice(f->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE); 49 if (ret < 0) { 50 if (errno == ENODATA || errno == EAGAIN) 51 continue; 52 53 return -errno; 54 } 55 56 buflen -= ret; 57 58 while (ret) { 59 ret2 = read(sd->pipe[0], p, ret); 60 if (ret2 < 0) 61 return -errno; 62 63 ret -= ret2; 64 p += ret2; 65 } 66 } 67 68 return io_u->xfer_buflen; 69} 70 71/* 72 * We can now vmsplice into userspace, so do the transfer by splicing into 73 * a pipe and vmsplicing that into userspace. 74 */ 75static int fio_splice_read(struct thread_data *td, struct io_u *io_u) 76{ 77 struct spliceio_data *sd = td->io_ops->data; 78 struct fio_file *f = io_u->file; 79 struct iovec iov; 80 int ret , buflen, mmap_len; 81 off_t offset; 82 void *p, *map; 83 84restart: 85 ret = 0; 86 offset = io_u->offset; 87 mmap_len = buflen = io_u->xfer_buflen; 88 89 if (sd->vmsplice_to_user_map) { 90 map = mmap(io_u->xfer_buf, buflen, PROT_READ, MAP_PRIVATE|OS_MAP_ANON, 0, 0); 91 if (map == MAP_FAILED) { 92 td_verror(td, errno, "mmap io_u"); 93 return -1; 94 } 95 96 p = map; 97 } else { 98 map = NULL; 99 p = io_u->xfer_buf; 100 } 101 102 while (buflen) { 103 int this_len = buflen; 104 int flags = 0; 105 106 if (this_len > SPLICE_DEF_SIZE) { 107 this_len = SPLICE_DEF_SIZE; 108 flags = SPLICE_F_MORE; 109 } 110 111 ret = splice(f->fd, &offset, sd->pipe[1], NULL, this_len,flags); 112 if (ret < 0) { 113 if (errno == ENODATA || errno == EAGAIN) 114 continue; 115 116 td_verror(td, errno, "splice-from-fd"); 117 break; 118 } 119 120 buflen -= ret; 121 iov.iov_base = p; 122 iov.iov_len = ret; 123 p += ret; 124 125 while (iov.iov_len) { 126 ret = vmsplice(sd->pipe[0], &iov, 1, SPLICE_F_MOVE); 127 if (ret < 0) { 128 if (errno == EFAULT && sd->vmsplice_to_user_map) { 129 sd->vmsplice_to_user_map = 0; 130 munmap(map, mmap_len); 131 goto restart; 132 } 133 if (errno == EBADF) { 134 ret = -EBADF; 135 break; 136 } 137 td_verror(td, errno, "vmsplice"); 138 break; 139 } else if (!ret) { 140 td_verror(td, ENODATA, "vmsplice"); 141 ret = -1; 142 break; 143 } 144 145 iov.iov_len -= ret; 146 iov.iov_base += ret; 147 } 148 if (ret < 0) 149 break; 150 } 151 152 if (sd->vmsplice_to_user_map && munmap(map, mmap_len) < 0) { 153 td_verror(td, errno, "munnap io_u"); 154 return -1; 155 } 156 if (ret < 0) 157 return ret; 158 159 return io_u->xfer_buflen; 160} 161 162/* 163 * For splice writing, we can vmsplice our data buffer directly into a 164 * pipe and then splice that to a file. 165 */ 166static int fio_splice_write(struct thread_data *td, struct io_u *io_u) 167{ 168 struct spliceio_data *sd = td->io_ops->data; 169 struct iovec iov = { 170 .iov_base = io_u->xfer_buf, 171 .iov_len = io_u->xfer_buflen, 172 }; 173 struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, }; 174 struct fio_file *f = io_u->file; 175 off_t off = io_u->offset; 176 int ret, ret2; 177 178 while (iov.iov_len) { 179 if (poll(&pfd, 1, -1) < 0) 180 return errno; 181 182 ret = vmsplice(sd->pipe[1], &iov, 1, SPLICE_F_NONBLOCK); 183 if (ret < 0) 184 return -errno; 185 186 iov.iov_len -= ret; 187 iov.iov_base += ret; 188 189 while (ret) { 190 ret2 = splice(sd->pipe[0], NULL, f->fd, &off, ret, 0); 191 if (ret2 < 0) 192 return -errno; 193 194 ret -= ret2; 195 } 196 } 197 198 return io_u->xfer_buflen; 199} 200 201static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u) 202{ 203 struct spliceio_data *sd = td->io_ops->data; 204 int ret; 205 206 fio_ro_check(td, io_u); 207 208 if (io_u->ddir == DDIR_READ) { 209 if (sd->vmsplice_to_user) { 210 ret = fio_splice_read(td, io_u); 211 /* 212 * This kernel doesn't support vmsplice to user 213 * space. Reset the vmsplice_to_user flag, so that 214 * we retry below and don't hit this path again. 215 */ 216 if (ret == -EBADF) 217 sd->vmsplice_to_user = 0; 218 } 219 if (!sd->vmsplice_to_user) 220 ret = fio_splice_read_old(td, io_u); 221 } else if (io_u->ddir == DDIR_WRITE) 222 ret = fio_splice_write(td, io_u); 223 else 224 ret = fsync(io_u->file->fd); 225 226 if (ret != (int) io_u->xfer_buflen) { 227 if (ret >= 0) { 228 io_u->resid = io_u->xfer_buflen - ret; 229 io_u->error = 0; 230 return FIO_Q_COMPLETED; 231 } else 232 io_u->error = errno; 233 } 234 235 if (io_u->error) { 236 td_verror(td, io_u->error, "xfer"); 237 if (io_u->error == EINVAL) 238 log_err("fio: looks like splice doesn't work on this" 239 " file system\n"); 240 } 241 242 return FIO_Q_COMPLETED; 243} 244 245static void fio_spliceio_cleanup(struct thread_data *td) 246{ 247 struct spliceio_data *sd = td->io_ops->data; 248 249 if (sd) { 250 close(sd->pipe[0]); 251 close(sd->pipe[1]); 252 free(sd); 253 } 254} 255 256static int fio_spliceio_init(struct thread_data *td) 257{ 258 struct spliceio_data *sd = malloc(sizeof(*sd)); 259 260 if (pipe(sd->pipe) < 0) { 261 td_verror(td, errno, "pipe"); 262 free(sd); 263 return 1; 264 } 265 266 /* 267 * Assume this work, we'll reset this if it doesn't 268 */ 269 sd->vmsplice_to_user = 1; 270 271 /* 272 * Works with "real" vmsplice to user, eg mapping pages directly. 273 * Reset if we fail. 274 */ 275 sd->vmsplice_to_user_map = 1; 276 277 /* 278 * And if vmsplice_to_user works, we definitely need aligned 279 * buffers. Just set ->odirect to force that. 280 */ 281 if (td_read(td)) 282 td->o.odirect = 1; 283 284 td->io_ops->data = sd; 285 return 0; 286} 287 288static struct ioengine_ops ioengine = { 289 .name = "splice", 290 .version = FIO_IOOPS_VERSION, 291 .init = fio_spliceio_init, 292 .queue = fio_spliceio_queue, 293 .cleanup = fio_spliceio_cleanup, 294 .open_file = generic_open_file, 295 .close_file = generic_close_file, 296 .flags = FIO_SYNCIO, 297}; 298 299#else /* FIO_HAVE_SPLICE */ 300 301/* 302 * When we have a proper configure system in place, we simply wont build 303 * and install this io engine. For now install a crippled version that 304 * just complains and fails to load. 305 */ 306static int fio_spliceio_init(struct thread_data fio_unused *td) 307{ 308 fprintf(stderr, "fio: splice not available\n"); 309 return 1; 310} 311 312static struct ioengine_ops ioengine = { 313 .name = "splice", 314 .version = FIO_IOOPS_VERSION, 315 .init = fio_spliceio_init, 316}; 317 318#endif 319 320static void fio_init fio_spliceio_register(void) 321{ 322 register_ioengine(&ioengine); 323} 324 325static void fio_exit fio_spliceio_unregister(void) 326{ 327 unregister_ioengine(&ioengine); 328} 329