dm-io.c revision 891ce207011d3d9219f79fd5114c8594bbacc653
1/* 2 * Copyright (C) 2003 Sistina Software 3 * Copyright (C) 2006 Red Hat GmbH 4 * 5 * This file is released under the GPL. 6 */ 7 8#include "dm-io.h" 9 10#include <linux/bio.h> 11#include <linux/mempool.h> 12#include <linux/module.h> 13#include <linux/sched.h> 14#include <linux/slab.h> 15 16static struct bio_set *_bios; 17 18struct dm_io_client { 19 mempool_t *pool; 20 struct bio_set *bios; 21}; 22 23/* FIXME: can we shrink this ? */ 24struct io { 25 unsigned long error; 26 atomic_t count; 27 struct task_struct *sleeper; 28 struct dm_io_client *client; 29 io_notify_fn callback; 30 void *context; 31}; 32 33/* 34 * io contexts are only dynamically allocated for asynchronous 35 * io. Since async io is likely to be the majority of io we'll 36 * have the same number of io contexts as bios! (FIXME: must reduce this). 37 */ 38static unsigned _num_ios; 39static mempool_t *_io_pool; 40 41/* 42 * Temporary functions to allow old and new interfaces to co-exist. 43 */ 44static struct bio_set *bios(struct dm_io_client *client) 45{ 46 return client ? client->bios : _bios; 47} 48 49static mempool_t *io_pool(struct dm_io_client *client) 50{ 51 return client ? client->pool : _io_pool; 52} 53 54static unsigned int pages_to_ios(unsigned int pages) 55{ 56 return 4 * pages; /* too many ? */ 57} 58 59static int resize_pool(unsigned int new_ios) 60{ 61 int r = 0; 62 63 if (_io_pool) { 64 if (new_ios == 0) { 65 /* free off the pool */ 66 mempool_destroy(_io_pool); 67 _io_pool = NULL; 68 bioset_free(_bios); 69 70 } else { 71 /* resize the pool */ 72 r = mempool_resize(_io_pool, new_ios, GFP_KERNEL); 73 } 74 75 } else { 76 /* create new pool */ 77 _io_pool = mempool_create_kmalloc_pool(new_ios, 78 sizeof(struct io)); 79 if (!_io_pool) 80 return -ENOMEM; 81 82 _bios = bioset_create(16, 16); 83 if (!_bios) { 84 mempool_destroy(_io_pool); 85 _io_pool = NULL; 86 return -ENOMEM; 87 } 88 } 89 90 if (!r) 91 _num_ios = new_ios; 92 93 return r; 94} 95 96int dm_io_get(unsigned int num_pages) 97{ 98 return resize_pool(_num_ios + pages_to_ios(num_pages)); 99} 100 101void dm_io_put(unsigned int num_pages) 102{ 103 resize_pool(_num_ios - pages_to_ios(num_pages)); 104} 105 106/*----------------------------------------------------------------- 107 * We need to keep track of which region a bio is doing io for. 108 * In order to save a memory allocation we store this the last 109 * bvec which we know is unused (blech). 110 * XXX This is ugly and can OOPS with some configs... find another way. 111 *---------------------------------------------------------------*/ 112static inline void bio_set_region(struct bio *bio, unsigned region) 113{ 114 bio->bi_io_vec[bio->bi_max_vecs].bv_len = region; 115} 116 117static inline unsigned bio_get_region(struct bio *bio) 118{ 119 return bio->bi_io_vec[bio->bi_max_vecs].bv_len; 120} 121 122/*----------------------------------------------------------------- 123 * We need an io object to keep track of the number of bios that 124 * have been dispatched for a particular io. 125 *---------------------------------------------------------------*/ 126static void dec_count(struct io *io, unsigned int region, int error) 127{ 128 if (error) 129 set_bit(region, &io->error); 130 131 if (atomic_dec_and_test(&io->count)) { 132 if (io->sleeper) 133 wake_up_process(io->sleeper); 134 135 else { 136 int r = io->error; 137 io_notify_fn fn = io->callback; 138 void *context = io->context; 139 140 mempool_free(io, io_pool(io->client)); 141 fn(r, context); 142 } 143 } 144} 145 146static int endio(struct bio *bio, unsigned int done, int error) 147{ 148 struct io *io; 149 unsigned region; 150 151 /* keep going until we've finished */ 152 if (bio->bi_size) 153 return 1; 154 155 if (error && bio_data_dir(bio) == READ) 156 zero_fill_bio(bio); 157 158 /* 159 * The bio destructor in bio_put() may use the io object. 160 */ 161 io = bio->bi_private; 162 region = bio_get_region(bio); 163 164 bio->bi_max_vecs++; 165 bio_put(bio); 166 167 dec_count(io, region, error); 168 169 return 0; 170} 171 172/*----------------------------------------------------------------- 173 * These little objects provide an abstraction for getting a new 174 * destination page for io. 175 *---------------------------------------------------------------*/ 176struct dpages { 177 void (*get_page)(struct dpages *dp, 178 struct page **p, unsigned long *len, unsigned *offset); 179 void (*next_page)(struct dpages *dp); 180 181 unsigned context_u; 182 void *context_ptr; 183}; 184 185/* 186 * Functions for getting the pages from a list. 187 */ 188static void list_get_page(struct dpages *dp, 189 struct page **p, unsigned long *len, unsigned *offset) 190{ 191 unsigned o = dp->context_u; 192 struct page_list *pl = (struct page_list *) dp->context_ptr; 193 194 *p = pl->page; 195 *len = PAGE_SIZE - o; 196 *offset = o; 197} 198 199static void list_next_page(struct dpages *dp) 200{ 201 struct page_list *pl = (struct page_list *) dp->context_ptr; 202 dp->context_ptr = pl->next; 203 dp->context_u = 0; 204} 205 206static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset) 207{ 208 dp->get_page = list_get_page; 209 dp->next_page = list_next_page; 210 dp->context_u = offset; 211 dp->context_ptr = pl; 212} 213 214/* 215 * Functions for getting the pages from a bvec. 216 */ 217static void bvec_get_page(struct dpages *dp, 218 struct page **p, unsigned long *len, unsigned *offset) 219{ 220 struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; 221 *p = bvec->bv_page; 222 *len = bvec->bv_len; 223 *offset = bvec->bv_offset; 224} 225 226static void bvec_next_page(struct dpages *dp) 227{ 228 struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; 229 dp->context_ptr = bvec + 1; 230} 231 232static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec) 233{ 234 dp->get_page = bvec_get_page; 235 dp->next_page = bvec_next_page; 236 dp->context_ptr = bvec; 237} 238 239static void vm_get_page(struct dpages *dp, 240 struct page **p, unsigned long *len, unsigned *offset) 241{ 242 *p = vmalloc_to_page(dp->context_ptr); 243 *offset = dp->context_u; 244 *len = PAGE_SIZE - dp->context_u; 245} 246 247static void vm_next_page(struct dpages *dp) 248{ 249 dp->context_ptr += PAGE_SIZE - dp->context_u; 250 dp->context_u = 0; 251} 252 253static void vm_dp_init(struct dpages *dp, void *data) 254{ 255 dp->get_page = vm_get_page; 256 dp->next_page = vm_next_page; 257 dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1); 258 dp->context_ptr = data; 259} 260 261static void dm_bio_destructor(struct bio *bio) 262{ 263 struct io *io = bio->bi_private; 264 265 bio_free(bio, bios(io->client)); 266} 267 268/*----------------------------------------------------------------- 269 * IO routines that accept a list of pages. 270 *---------------------------------------------------------------*/ 271static void do_region(int rw, unsigned int region, struct io_region *where, 272 struct dpages *dp, struct io *io) 273{ 274 struct bio *bio; 275 struct page *page; 276 unsigned long len; 277 unsigned offset; 278 unsigned num_bvecs; 279 sector_t remaining = where->count; 280 281 while (remaining) { 282 /* 283 * Allocate a suitably sized-bio: we add an extra 284 * bvec for bio_get/set_region() and decrement bi_max_vecs 285 * to hide it from bio_add_page(). 286 */ 287 num_bvecs = (remaining / (PAGE_SIZE >> SECTOR_SHIFT)) + 2; 288 bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, bios(io->client)); 289 bio->bi_sector = where->sector + (where->count - remaining); 290 bio->bi_bdev = where->bdev; 291 bio->bi_end_io = endio; 292 bio->bi_private = io; 293 bio->bi_destructor = dm_bio_destructor; 294 bio->bi_max_vecs--; 295 bio_set_region(bio, region); 296 297 /* 298 * Try and add as many pages as possible. 299 */ 300 while (remaining) { 301 dp->get_page(dp, &page, &len, &offset); 302 len = min(len, to_bytes(remaining)); 303 if (!bio_add_page(bio, page, len, offset)) 304 break; 305 306 offset = 0; 307 remaining -= to_sector(len); 308 dp->next_page(dp); 309 } 310 311 atomic_inc(&io->count); 312 submit_bio(rw, bio); 313 } 314} 315 316static void dispatch_io(int rw, unsigned int num_regions, 317 struct io_region *where, struct dpages *dp, 318 struct io *io, int sync) 319{ 320 int i; 321 struct dpages old_pages = *dp; 322 323 if (sync) 324 rw |= (1 << BIO_RW_SYNC); 325 326 /* 327 * For multiple regions we need to be careful to rewind 328 * the dp object for each call to do_region. 329 */ 330 for (i = 0; i < num_regions; i++) { 331 *dp = old_pages; 332 if (where[i].count) 333 do_region(rw, i, where + i, dp, io); 334 } 335 336 /* 337 * Drop the extra reference that we were holding to avoid 338 * the io being completed too early. 339 */ 340 dec_count(io, 0, 0); 341} 342 343static int sync_io(struct dm_io_client *client, unsigned int num_regions, 344 struct io_region *where, int rw, struct dpages *dp, 345 unsigned long *error_bits) 346{ 347 struct io io; 348 349 if (num_regions > 1 && rw != WRITE) { 350 WARN_ON(1); 351 return -EIO; 352 } 353 354 io.error = 0; 355 atomic_set(&io.count, 1); /* see dispatch_io() */ 356 io.sleeper = current; 357 io.client = client; 358 359 dispatch_io(rw, num_regions, where, dp, &io, 1); 360 361 while (1) { 362 set_current_state(TASK_UNINTERRUPTIBLE); 363 364 if (!atomic_read(&io.count) || signal_pending(current)) 365 break; 366 367 io_schedule(); 368 } 369 set_current_state(TASK_RUNNING); 370 371 if (atomic_read(&io.count)) 372 return -EINTR; 373 374 if (error_bits) 375 *error_bits = io.error; 376 377 return io.error ? -EIO : 0; 378} 379 380static int async_io(struct dm_io_client *client, unsigned int num_regions, 381 struct io_region *where, int rw, struct dpages *dp, 382 io_notify_fn fn, void *context) 383{ 384 struct io *io; 385 386 if (num_regions > 1 && rw != WRITE) { 387 WARN_ON(1); 388 fn(1, context); 389 return -EIO; 390 } 391 392 io = mempool_alloc(io_pool(client), GFP_NOIO); 393 io->error = 0; 394 atomic_set(&io->count, 1); /* see dispatch_io() */ 395 io->sleeper = NULL; 396 io->client = client; 397 io->callback = fn; 398 io->context = context; 399 400 dispatch_io(rw, num_regions, where, dp, io, 0); 401 return 0; 402} 403 404int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw, 405 struct page_list *pl, unsigned int offset, 406 unsigned long *error_bits) 407{ 408 struct dpages dp; 409 list_dp_init(&dp, pl, offset); 410 return sync_io(NULL, num_regions, where, rw, &dp, error_bits); 411} 412 413int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where, int rw, 414 struct bio_vec *bvec, unsigned long *error_bits) 415{ 416 struct dpages dp; 417 bvec_dp_init(&dp, bvec); 418 return sync_io(NULL, num_regions, where, rw, &dp, error_bits); 419} 420 421int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw, 422 void *data, unsigned long *error_bits) 423{ 424 struct dpages dp; 425 vm_dp_init(&dp, data); 426 return sync_io(NULL, num_regions, where, rw, &dp, error_bits); 427} 428 429int dm_io_async(unsigned int num_regions, struct io_region *where, int rw, 430 struct page_list *pl, unsigned int offset, 431 io_notify_fn fn, void *context) 432{ 433 struct dpages dp; 434 list_dp_init(&dp, pl, offset); 435 return async_io(NULL, num_regions, where, rw, &dp, fn, context); 436} 437 438int dm_io_async_bvec(unsigned int num_regions, struct io_region *where, int rw, 439 struct bio_vec *bvec, io_notify_fn fn, void *context) 440{ 441 struct dpages dp; 442 bvec_dp_init(&dp, bvec); 443 return async_io(NULL, num_regions, where, rw, &dp, fn, context); 444} 445 446int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw, 447 void *data, io_notify_fn fn, void *context) 448{ 449 struct dpages dp; 450 vm_dp_init(&dp, data); 451 return async_io(NULL, num_regions, where, rw, &dp, fn, context); 452} 453 454EXPORT_SYMBOL(dm_io_get); 455EXPORT_SYMBOL(dm_io_put); 456EXPORT_SYMBOL(dm_io_sync); 457EXPORT_SYMBOL(dm_io_async); 458EXPORT_SYMBOL(dm_io_sync_bvec); 459EXPORT_SYMBOL(dm_io_async_bvec); 460EXPORT_SYMBOL(dm_io_sync_vm); 461EXPORT_SYMBOL(dm_io_async_vm); 462