dm-stripe.c revision 1a66a08ae82b16eb40705ad112ff28873981af92
1/* 2 * Copyright (C) 2001-2003 Sistina Software (UK) Limited. 3 * 4 * This file is released under the GPL. 5 */ 6 7#include <linux/device-mapper.h> 8 9#include <linux/module.h> 10#include <linux/init.h> 11#include <linux/blkdev.h> 12#include <linux/bio.h> 13#include <linux/slab.h> 14#include <linux/log2.h> 15 16#define DM_MSG_PREFIX "striped" 17#define DM_IO_ERROR_THRESHOLD 15 18 19struct stripe { 20 struct dm_dev *dev; 21 sector_t physical_start; 22 23 atomic_t error_count; 24}; 25 26struct stripe_c { 27 uint32_t stripes; 28 int stripes_shift; 29 sector_t stripes_mask; 30 31 /* The size of this target / num. stripes */ 32 sector_t stripe_width; 33 34 /* stripe chunk size */ 35 uint32_t chunk_shift; 36 sector_t chunk_mask; 37 38 /* Needed for handling events */ 39 struct dm_target *ti; 40 41 /* Work struct used for triggering events*/ 42 struct work_struct trigger_event; 43 44 struct stripe stripe[0]; 45}; 46 47/* 48 * An event is triggered whenever a drive 49 * drops out of a stripe volume. 50 */ 51static void trigger_event(struct work_struct *work) 52{ 53 struct stripe_c *sc = container_of(work, struct stripe_c, 54 trigger_event); 55 dm_table_event(sc->ti->table); 56} 57 58static inline struct stripe_c *alloc_context(unsigned int stripes) 59{ 60 size_t len; 61 62 if (dm_array_too_big(sizeof(struct stripe_c), sizeof(struct stripe), 63 stripes)) 64 return NULL; 65 66 len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes); 67 68 return kmalloc(len, GFP_KERNEL); 69} 70 71/* 72 * Parse a single <dev> <sector> pair 73 */ 74static int get_stripe(struct dm_target *ti, struct stripe_c *sc, 75 unsigned int stripe, char **argv) 76{ 77 unsigned long long start; 78 char dummy; 79 80 if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1) 81 return -EINVAL; 82 83 if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), 84 &sc->stripe[stripe].dev)) 85 return -ENXIO; 86 87 sc->stripe[stripe].physical_start = start; 88 89 return 0; 90} 91 92/* 93 * Construct a striped mapping. 94 * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+ 95 */ 96static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) 97{ 98 struct stripe_c *sc; 99 sector_t width; 100 uint32_t stripes; 101 uint32_t chunk_size; 102 int r; 103 unsigned int i; 104 105 if (argc < 2) { 106 ti->error = "Not enough arguments"; 107 return -EINVAL; 108 } 109 110 if (kstrtouint(argv[0], 10, &stripes) || !stripes) { 111 ti->error = "Invalid stripe count"; 112 return -EINVAL; 113 } 114 115 if (kstrtouint(argv[1], 10, &chunk_size)) { 116 ti->error = "Invalid chunk_size"; 117 return -EINVAL; 118 } 119 120 /* 121 * chunk_size is a power of two 122 */ 123 if (!is_power_of_2(chunk_size) || 124 (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) { 125 ti->error = "Invalid chunk size"; 126 return -EINVAL; 127 } 128 129 if (ti->len & (chunk_size - 1)) { 130 ti->error = "Target length not divisible by " 131 "chunk size"; 132 return -EINVAL; 133 } 134 135 width = ti->len; 136 if (sector_div(width, stripes)) { 137 ti->error = "Target length not divisible by " 138 "number of stripes"; 139 return -EINVAL; 140 } 141 142 /* 143 * Do we have enough arguments for that many stripes ? 144 */ 145 if (argc != (2 + 2 * stripes)) { 146 ti->error = "Not enough destinations " 147 "specified"; 148 return -EINVAL; 149 } 150 151 sc = alloc_context(stripes); 152 if (!sc) { 153 ti->error = "Memory allocation for striped context " 154 "failed"; 155 return -ENOMEM; 156 } 157 158 INIT_WORK(&sc->trigger_event, trigger_event); 159 160 /* Set pointer to dm target; used in trigger_event */ 161 sc->ti = ti; 162 sc->stripes = stripes; 163 sc->stripe_width = width; 164 165 if (stripes & (stripes - 1)) 166 sc->stripes_shift = -1; 167 else { 168 sc->stripes_shift = ffs(stripes) - 1; 169 sc->stripes_mask = ((sector_t) stripes) - 1; 170 } 171 172 ti->split_io = chunk_size; 173 ti->num_flush_requests = stripes; 174 ti->num_discard_requests = stripes; 175 176 sc->chunk_shift = ffs(chunk_size) - 1; 177 sc->chunk_mask = ((sector_t) chunk_size) - 1; 178 179 /* 180 * Get the stripe destinations. 181 */ 182 for (i = 0; i < stripes; i++) { 183 argv += 2; 184 185 r = get_stripe(ti, sc, i, argv); 186 if (r < 0) { 187 ti->error = "Couldn't parse stripe destination"; 188 while (i--) 189 dm_put_device(ti, sc->stripe[i].dev); 190 kfree(sc); 191 return r; 192 } 193 atomic_set(&(sc->stripe[i].error_count), 0); 194 } 195 196 ti->private = sc; 197 198 return 0; 199} 200 201static void stripe_dtr(struct dm_target *ti) 202{ 203 unsigned int i; 204 struct stripe_c *sc = (struct stripe_c *) ti->private; 205 206 for (i = 0; i < sc->stripes; i++) 207 dm_put_device(ti, sc->stripe[i].dev); 208 209 flush_work_sync(&sc->trigger_event); 210 kfree(sc); 211} 212 213static void stripe_map_sector(struct stripe_c *sc, sector_t sector, 214 uint32_t *stripe, sector_t *result) 215{ 216 sector_t offset = dm_target_offset(sc->ti, sector); 217 sector_t chunk = offset >> sc->chunk_shift; 218 219 if (sc->stripes_shift < 0) 220 *stripe = sector_div(chunk, sc->stripes); 221 else { 222 *stripe = chunk & sc->stripes_mask; 223 chunk >>= sc->stripes_shift; 224 } 225 226 *result = (chunk << sc->chunk_shift) | (offset & sc->chunk_mask); 227} 228 229static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, 230 uint32_t target_stripe, sector_t *result) 231{ 232 uint32_t stripe; 233 234 stripe_map_sector(sc, sector, &stripe, result); 235 if (stripe == target_stripe) 236 return; 237 *result &= ~sc->chunk_mask; /* round down */ 238 if (target_stripe < stripe) 239 *result += sc->chunk_mask + 1; /* next chunk */ 240} 241 242static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, 243 uint32_t target_stripe) 244{ 245 sector_t begin, end; 246 247 stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin); 248 stripe_map_range_sector(sc, bio->bi_sector + bio_sectors(bio), 249 target_stripe, &end); 250 if (begin < end) { 251 bio->bi_bdev = sc->stripe[target_stripe].dev->bdev; 252 bio->bi_sector = begin + sc->stripe[target_stripe].physical_start; 253 bio->bi_size = to_bytes(end - begin); 254 return DM_MAPIO_REMAPPED; 255 } else { 256 /* The range doesn't map to the target stripe */ 257 bio_endio(bio, 0); 258 return DM_MAPIO_SUBMITTED; 259 } 260} 261 262static int stripe_map(struct dm_target *ti, struct bio *bio, 263 union map_info *map_context) 264{ 265 struct stripe_c *sc = ti->private; 266 uint32_t stripe; 267 unsigned target_request_nr; 268 269 if (bio->bi_rw & REQ_FLUSH) { 270 target_request_nr = map_context->target_request_nr; 271 BUG_ON(target_request_nr >= sc->stripes); 272 bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; 273 return DM_MAPIO_REMAPPED; 274 } 275 if (unlikely(bio->bi_rw & REQ_DISCARD)) { 276 target_request_nr = map_context->target_request_nr; 277 BUG_ON(target_request_nr >= sc->stripes); 278 return stripe_map_discard(sc, bio, target_request_nr); 279 } 280 281 stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector); 282 283 bio->bi_sector += sc->stripe[stripe].physical_start; 284 bio->bi_bdev = sc->stripe[stripe].dev->bdev; 285 286 return DM_MAPIO_REMAPPED; 287} 288 289/* 290 * Stripe status: 291 * 292 * INFO 293 * #stripes [stripe_name <stripe_name>] [group word count] 294 * [error count 'A|D' <error count 'A|D'>] 295 * 296 * TABLE 297 * #stripes [stripe chunk size] 298 * [stripe_name physical_start <stripe_name physical_start>] 299 * 300 */ 301 302static int stripe_status(struct dm_target *ti, 303 status_type_t type, char *result, unsigned int maxlen) 304{ 305 struct stripe_c *sc = (struct stripe_c *) ti->private; 306 char buffer[sc->stripes + 1]; 307 unsigned int sz = 0; 308 unsigned int i; 309 310 switch (type) { 311 case STATUSTYPE_INFO: 312 DMEMIT("%d ", sc->stripes); 313 for (i = 0; i < sc->stripes; i++) { 314 DMEMIT("%s ", sc->stripe[i].dev->name); 315 buffer[i] = atomic_read(&(sc->stripe[i].error_count)) ? 316 'D' : 'A'; 317 } 318 buffer[i] = '\0'; 319 DMEMIT("1 %s", buffer); 320 break; 321 322 case STATUSTYPE_TABLE: 323 DMEMIT("%d %llu", sc->stripes, 324 (unsigned long long)sc->chunk_mask + 1); 325 for (i = 0; i < sc->stripes; i++) 326 DMEMIT(" %s %llu", sc->stripe[i].dev->name, 327 (unsigned long long)sc->stripe[i].physical_start); 328 break; 329 } 330 return 0; 331} 332 333static int stripe_end_io(struct dm_target *ti, struct bio *bio, 334 int error, union map_info *map_context) 335{ 336 unsigned i; 337 char major_minor[16]; 338 struct stripe_c *sc = ti->private; 339 340 if (!error) 341 return 0; /* I/O complete */ 342 343 if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD)) 344 return error; 345 346 if (error == -EOPNOTSUPP) 347 return error; 348 349 memset(major_minor, 0, sizeof(major_minor)); 350 sprintf(major_minor, "%d:%d", 351 MAJOR(disk_devt(bio->bi_bdev->bd_disk)), 352 MINOR(disk_devt(bio->bi_bdev->bd_disk))); 353 354 /* 355 * Test to see which stripe drive triggered the event 356 * and increment error count for all stripes on that device. 357 * If the error count for a given device exceeds the threshold 358 * value we will no longer trigger any further events. 359 */ 360 for (i = 0; i < sc->stripes; i++) 361 if (!strcmp(sc->stripe[i].dev->name, major_minor)) { 362 atomic_inc(&(sc->stripe[i].error_count)); 363 if (atomic_read(&(sc->stripe[i].error_count)) < 364 DM_IO_ERROR_THRESHOLD) 365 schedule_work(&sc->trigger_event); 366 } 367 368 return error; 369} 370 371static int stripe_iterate_devices(struct dm_target *ti, 372 iterate_devices_callout_fn fn, void *data) 373{ 374 struct stripe_c *sc = ti->private; 375 int ret = 0; 376 unsigned i = 0; 377 378 do { 379 ret = fn(ti, sc->stripe[i].dev, 380 sc->stripe[i].physical_start, 381 sc->stripe_width, data); 382 } while (!ret && ++i < sc->stripes); 383 384 return ret; 385} 386 387static void stripe_io_hints(struct dm_target *ti, 388 struct queue_limits *limits) 389{ 390 struct stripe_c *sc = ti->private; 391 unsigned chunk_size = (sc->chunk_mask + 1) << 9; 392 393 blk_limits_io_min(limits, chunk_size); 394 blk_limits_io_opt(limits, chunk_size * sc->stripes); 395} 396 397static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm, 398 struct bio_vec *biovec, int max_size) 399{ 400 struct stripe_c *sc = ti->private; 401 sector_t bvm_sector = bvm->bi_sector; 402 uint32_t stripe; 403 struct request_queue *q; 404 405 stripe_map_sector(sc, bvm_sector, &stripe, &bvm_sector); 406 407 q = bdev_get_queue(sc->stripe[stripe].dev->bdev); 408 if (!q->merge_bvec_fn) 409 return max_size; 410 411 bvm->bi_bdev = sc->stripe[stripe].dev->bdev; 412 bvm->bi_sector = sc->stripe[stripe].physical_start + bvm_sector; 413 414 return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 415} 416 417static struct target_type stripe_target = { 418 .name = "striped", 419 .version = {1, 4, 0}, 420 .module = THIS_MODULE, 421 .ctr = stripe_ctr, 422 .dtr = stripe_dtr, 423 .map = stripe_map, 424 .end_io = stripe_end_io, 425 .status = stripe_status, 426 .iterate_devices = stripe_iterate_devices, 427 .io_hints = stripe_io_hints, 428 .merge = stripe_merge, 429}; 430 431int __init dm_stripe_init(void) 432{ 433 int r; 434 435 r = dm_register_target(&stripe_target); 436 if (r < 0) { 437 DMWARN("target registration failed"); 438 return r; 439 } 440 441 return r; 442} 443 444void dm_stripe_exit(void) 445{ 446 dm_unregister_target(&stripe_target); 447} 448