1/* 2 * Code extracted from drivers/block/genhd.c 3 * Copyright (C) 1991-1998 Linus Torvalds 4 * Re-organised Feb 1998 Russell King 5 * 6 * We now have independent partition support from the 7 * block drivers, which allows all the partition code to 8 * be grouped in one location, and it to be mostly self 9 * contained. 10 */ 11 12#include <linux/init.h> 13#include <linux/module.h> 14#include <linux/fs.h> 15#include <linux/slab.h> 16#include <linux/kmod.h> 17#include <linux/ctype.h> 18#include <linux/genhd.h> 19#include <linux/blktrace_api.h> 20 21#include "partitions/check.h" 22 23#ifdef CONFIG_BLK_DEV_MD 24extern void md_autodetect_dev(dev_t dev); 25#endif 26 27/* 28 * disk_name() is used by partition check code and the genhd driver. 29 * It formats the devicename of the indicated disk into 30 * the supplied buffer (of size at least 32), and returns 31 * a pointer to that same buffer (for convenience). 32 */ 33 34char *disk_name(struct gendisk *hd, int partno, char *buf) 35{ 36 if (!partno) 37 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); 38 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) 39 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); 40 else 41 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); 42 43 return buf; 44} 45 46const char *bdevname(struct block_device *bdev, char *buf) 47{ 48 return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); 49} 50 51EXPORT_SYMBOL(bdevname); 52 53/* 54 * There's very little reason to use this, you should really 55 * have a struct block_device just about everywhere and use 56 * bdevname() instead. 57 */ 58const char *__bdevname(dev_t dev, char *buffer) 59{ 60 scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)", 61 MAJOR(dev), MINOR(dev)); 62 return buffer; 63} 64 65EXPORT_SYMBOL(__bdevname); 66 67static ssize_t part_partition_show(struct device *dev, 68 struct device_attribute *attr, char *buf) 69{ 70 struct hd_struct *p = dev_to_part(dev); 71 72 return sprintf(buf, "%d\n", p->partno); 73} 74 75static ssize_t part_start_show(struct device *dev, 76 struct device_attribute *attr, char *buf) 77{ 78 struct hd_struct *p = dev_to_part(dev); 79 80 return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); 81} 82 83ssize_t part_size_show(struct device *dev, 84 struct device_attribute *attr, char *buf) 85{ 86 struct hd_struct *p = dev_to_part(dev); 87 return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p)); 88} 89 90static ssize_t part_ro_show(struct device *dev, 91 struct device_attribute *attr, char *buf) 92{ 93 struct hd_struct *p = dev_to_part(dev); 94 return sprintf(buf, "%d\n", p->policy ? 1 : 0); 95} 96 97static ssize_t part_alignment_offset_show(struct device *dev, 98 struct device_attribute *attr, char *buf) 99{ 100 struct hd_struct *p = dev_to_part(dev); 101 return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); 102} 103 104static ssize_t part_discard_alignment_show(struct device *dev, 105 struct device_attribute *attr, char *buf) 106{ 107 struct hd_struct *p = dev_to_part(dev); 108 return sprintf(buf, "%u\n", p->discard_alignment); 109} 110 111ssize_t part_stat_show(struct device *dev, 112 struct device_attribute *attr, char *buf) 113{ 114 struct hd_struct *p = dev_to_part(dev); 115 int cpu; 116 117 cpu = part_stat_lock(); 118 part_round_stats(cpu, p); 119 part_stat_unlock(); 120 return sprintf(buf, 121 "%8lu %8lu %8llu %8u " 122 "%8lu %8lu %8llu %8u " 123 "%8u %8u %8u" 124 "\n", 125 part_stat_read(p, ios[READ]), 126 part_stat_read(p, merges[READ]), 127 (unsigned long long)part_stat_read(p, sectors[READ]), 128 jiffies_to_msecs(part_stat_read(p, ticks[READ])), 129 part_stat_read(p, ios[WRITE]), 130 part_stat_read(p, merges[WRITE]), 131 (unsigned long long)part_stat_read(p, sectors[WRITE]), 132 jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), 133 part_in_flight(p), 134 jiffies_to_msecs(part_stat_read(p, io_ticks)), 135 jiffies_to_msecs(part_stat_read(p, time_in_queue))); 136} 137 138ssize_t part_inflight_show(struct device *dev, 139 struct device_attribute *attr, char *buf) 140{ 141 struct hd_struct *p = dev_to_part(dev); 142 143 return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]), 144 atomic_read(&p->in_flight[1])); 145} 146 147#ifdef CONFIG_FAIL_MAKE_REQUEST 148ssize_t part_fail_show(struct device *dev, 149 struct device_attribute *attr, char *buf) 150{ 151 struct hd_struct *p = dev_to_part(dev); 152 153 return sprintf(buf, "%d\n", p->make_it_fail); 154} 155 156ssize_t part_fail_store(struct device *dev, 157 struct device_attribute *attr, 158 const char *buf, size_t count) 159{ 160 struct hd_struct *p = dev_to_part(dev); 161 int i; 162 163 if (count > 0 && sscanf(buf, "%d", &i) > 0) 164 p->make_it_fail = (i == 0) ? 0 : 1; 165 166 return count; 167} 168#endif 169 170static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); 171static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); 172static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 173static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL); 174static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); 175static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show, 176 NULL); 177static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 178static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); 179#ifdef CONFIG_FAIL_MAKE_REQUEST 180static struct device_attribute dev_attr_fail = 181 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 182#endif 183 184static struct attribute *part_attrs[] = { 185 &dev_attr_partition.attr, 186 &dev_attr_start.attr, 187 &dev_attr_size.attr, 188 &dev_attr_ro.attr, 189 &dev_attr_alignment_offset.attr, 190 &dev_attr_discard_alignment.attr, 191 &dev_attr_stat.attr, 192 &dev_attr_inflight.attr, 193#ifdef CONFIG_FAIL_MAKE_REQUEST 194 &dev_attr_fail.attr, 195#endif 196 NULL 197}; 198 199static struct attribute_group part_attr_group = { 200 .attrs = part_attrs, 201}; 202 203static const struct attribute_group *part_attr_groups[] = { 204 &part_attr_group, 205#ifdef CONFIG_BLK_DEV_IO_TRACE 206 &blk_trace_attr_group, 207#endif 208 NULL 209}; 210 211static void part_release(struct device *dev) 212{ 213 struct hd_struct *p = dev_to_part(dev); 214 blk_free_devt(dev->devt); 215 free_part_stats(p); 216 free_part_info(p); 217 kfree(p); 218} 219 220static int part_uevent(struct device *dev, struct kobj_uevent_env *env) 221{ 222 struct hd_struct *part = dev_to_part(dev); 223 224 add_uevent_var(env, "PARTN=%u", part->partno); 225 if (part->info && part->info->volname[0]) 226 add_uevent_var(env, "PARTNAME=%s", part->info->volname); 227 return 0; 228} 229 230struct device_type part_type = { 231 .name = "partition", 232 .groups = part_attr_groups, 233 .release = part_release, 234 .uevent = part_uevent, 235}; 236 237static void delete_partition_rcu_cb(struct rcu_head *head) 238{ 239 struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); 240 241 part->start_sect = 0; 242 part->nr_sects = 0; 243 part_stat_set_all(part, 0); 244 put_device(part_to_dev(part)); 245} 246 247void __delete_partition(struct hd_struct *part) 248{ 249 call_rcu(&part->rcu_head, delete_partition_rcu_cb); 250} 251 252void delete_partition(struct gendisk *disk, int partno) 253{ 254 struct disk_part_tbl *ptbl = disk->part_tbl; 255 struct hd_struct *part; 256 257 if (partno >= ptbl->len) 258 return; 259 260 part = ptbl->part[partno]; 261 if (!part) 262 return; 263 264 rcu_assign_pointer(ptbl->part[partno], NULL); 265 rcu_assign_pointer(ptbl->last_lookup, NULL); 266 kobject_put(part->holder_dir); 267 device_del(part_to_dev(part)); 268 269 hd_struct_put(part); 270} 271 272static ssize_t whole_disk_show(struct device *dev, 273 struct device_attribute *attr, char *buf) 274{ 275 return 0; 276} 277static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, 278 whole_disk_show, NULL); 279 280struct hd_struct *add_partition(struct gendisk *disk, int partno, 281 sector_t start, sector_t len, int flags, 282 struct partition_meta_info *info) 283{ 284 struct hd_struct *p; 285 dev_t devt = MKDEV(0, 0); 286 struct device *ddev = disk_to_dev(disk); 287 struct device *pdev; 288 struct disk_part_tbl *ptbl; 289 const char *dname; 290 int err; 291 292 err = disk_expand_part_tbl(disk, partno); 293 if (err) 294 return ERR_PTR(err); 295 ptbl = disk->part_tbl; 296 297 if (ptbl->part[partno]) 298 return ERR_PTR(-EBUSY); 299 300 p = kzalloc(sizeof(*p), GFP_KERNEL); 301 if (!p) 302 return ERR_PTR(-EBUSY); 303 304 if (!init_part_stats(p)) { 305 err = -ENOMEM; 306 goto out_free; 307 } 308 309 seqcount_init(&p->nr_sects_seq); 310 pdev = part_to_dev(p); 311 312 p->start_sect = start; 313 p->alignment_offset = 314 queue_limit_alignment_offset(&disk->queue->limits, start); 315 p->discard_alignment = 316 queue_limit_discard_alignment(&disk->queue->limits, start); 317 p->nr_sects = len; 318 p->partno = partno; 319 p->policy = get_disk_ro(disk); 320 321 if (info) { 322 struct partition_meta_info *pinfo = alloc_part_info(disk); 323 if (!pinfo) 324 goto out_free_stats; 325 memcpy(pinfo, info, sizeof(*info)); 326 p->info = pinfo; 327 } 328 329 dname = dev_name(ddev); 330 if (isdigit(dname[strlen(dname) - 1])) 331 dev_set_name(pdev, "%sp%d", dname, partno); 332 else 333 dev_set_name(pdev, "%s%d", dname, partno); 334 335 device_initialize(pdev); 336 pdev->class = &block_class; 337 pdev->type = &part_type; 338 pdev->parent = ddev; 339 340 err = blk_alloc_devt(p, &devt); 341 if (err) 342 goto out_free_info; 343 pdev->devt = devt; 344 345 /* delay uevent until 'holders' subdir is created */ 346 dev_set_uevent_suppress(pdev, 1); 347 err = device_add(pdev); 348 if (err) 349 goto out_put; 350 351 err = -ENOMEM; 352 p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); 353 if (!p->holder_dir) 354 goto out_del; 355 356 dev_set_uevent_suppress(pdev, 0); 357 if (flags & ADDPART_FLAG_WHOLEDISK) { 358 err = device_create_file(pdev, &dev_attr_whole_disk); 359 if (err) 360 goto out_del; 361 } 362 363 /* everything is up and running, commence */ 364 rcu_assign_pointer(ptbl->part[partno], p); 365 366 /* suppress uevent if the disk suppresses it */ 367 if (!dev_get_uevent_suppress(ddev)) 368 kobject_uevent(&pdev->kobj, KOBJ_ADD); 369 370 hd_ref_init(p); 371 return p; 372 373out_free_info: 374 free_part_info(p); 375out_free_stats: 376 free_part_stats(p); 377out_free: 378 kfree(p); 379 return ERR_PTR(err); 380out_del: 381 kobject_put(p->holder_dir); 382 device_del(pdev); 383out_put: 384 put_device(pdev); 385 blk_free_devt(devt); 386 return ERR_PTR(err); 387} 388 389static bool disk_unlock_native_capacity(struct gendisk *disk) 390{ 391 const struct block_device_operations *bdops = disk->fops; 392 393 if (bdops->unlock_native_capacity && 394 !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) { 395 printk(KERN_CONT "enabling native capacity\n"); 396 bdops->unlock_native_capacity(disk); 397 disk->flags |= GENHD_FL_NATIVE_CAPACITY; 398 return true; 399 } else { 400 printk(KERN_CONT "truncated\n"); 401 return false; 402 } 403} 404 405static int drop_partitions(struct gendisk *disk, struct block_device *bdev) 406{ 407 struct disk_part_iter piter; 408 struct hd_struct *part; 409 int res; 410 411 if (bdev->bd_part_count) 412 return -EBUSY; 413 res = invalidate_partition(disk, 0); 414 if (res) 415 return res; 416 417 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 418 while ((part = disk_part_iter_next(&piter))) 419 delete_partition(disk, part->partno); 420 disk_part_iter_exit(&piter); 421 422 return 0; 423} 424 425int rescan_partitions(struct gendisk *disk, struct block_device *bdev) 426{ 427 struct parsed_partitions *state = NULL; 428 struct hd_struct *part; 429 int p, highest, res; 430rescan: 431 if (state && !IS_ERR(state)) { 432 free_partitions(state); 433 state = NULL; 434 } 435 436 res = drop_partitions(disk, bdev); 437 if (res) 438 return res; 439 440 if (disk->fops->revalidate_disk) 441 disk->fops->revalidate_disk(disk); 442 check_disk_size_change(disk, bdev); 443 bdev->bd_invalidated = 0; 444 if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) 445 return 0; 446 if (IS_ERR(state)) { 447 /* 448 * I/O error reading the partition table. If any 449 * partition code tried to read beyond EOD, retry 450 * after unlocking native capacity. 451 */ 452 if (PTR_ERR(state) == -ENOSPC) { 453 printk(KERN_WARNING "%s: partition table beyond EOD, ", 454 disk->disk_name); 455 if (disk_unlock_native_capacity(disk)) 456 goto rescan; 457 } 458 return -EIO; 459 } 460 /* 461 * If any partition code tried to read beyond EOD, try 462 * unlocking native capacity even if partition table is 463 * successfully read as we could be missing some partitions. 464 */ 465 if (state->access_beyond_eod) { 466 printk(KERN_WARNING 467 "%s: partition table partially beyond EOD, ", 468 disk->disk_name); 469 if (disk_unlock_native_capacity(disk)) 470 goto rescan; 471 } 472 473 /* tell userspace that the media / partition table may have changed */ 474 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); 475 476 /* Detect the highest partition number and preallocate 477 * disk->part_tbl. This is an optimization and not strictly 478 * necessary. 479 */ 480 for (p = 1, highest = 0; p < state->limit; p++) 481 if (state->parts[p].size) 482 highest = p; 483 484 disk_expand_part_tbl(disk, highest); 485 486 /* add partitions */ 487 for (p = 1; p < state->limit; p++) { 488 sector_t size, from; 489 struct partition_meta_info *info = NULL; 490 491 size = state->parts[p].size; 492 if (!size) 493 continue; 494 495 from = state->parts[p].from; 496 if (from >= get_capacity(disk)) { 497 printk(KERN_WARNING 498 "%s: p%d start %llu is beyond EOD, ", 499 disk->disk_name, p, (unsigned long long) from); 500 if (disk_unlock_native_capacity(disk)) 501 goto rescan; 502 continue; 503 } 504 505 if (from + size > get_capacity(disk)) { 506 printk(KERN_WARNING 507 "%s: p%d size %llu extends beyond EOD, ", 508 disk->disk_name, p, (unsigned long long) size); 509 510 if (disk_unlock_native_capacity(disk)) { 511 /* free state and restart */ 512 goto rescan; 513 } else { 514 /* 515 * we can not ignore partitions of broken tables 516 * created by for example camera firmware, but 517 * we limit them to the end of the disk to avoid 518 * creating invalid block devices 519 */ 520 size = get_capacity(disk) - from; 521 } 522 } 523 524 if (state->parts[p].has_info) 525 info = &state->parts[p].info; 526 part = add_partition(disk, p, from, size, 527 state->parts[p].flags, 528 &state->parts[p].info); 529 if (IS_ERR(part)) { 530 printk(KERN_ERR " %s: p%d could not be added: %ld\n", 531 disk->disk_name, p, -PTR_ERR(part)); 532 continue; 533 } 534#ifdef CONFIG_BLK_DEV_MD 535 if (state->parts[p].flags & ADDPART_FLAG_RAID) 536 md_autodetect_dev(part_to_dev(part)->devt); 537#endif 538 } 539 free_partitions(state); 540 return 0; 541} 542 543int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) 544{ 545 int res; 546 547 if (!bdev->bd_invalidated) 548 return 0; 549 550 res = drop_partitions(disk, bdev); 551 if (res) 552 return res; 553 554 set_capacity(disk, 0); 555 check_disk_size_change(disk, bdev); 556 bdev->bd_invalidated = 0; 557 /* tell userspace that the media / partition table may have changed */ 558 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); 559 560 return 0; 561} 562 563unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) 564{ 565 struct address_space *mapping = bdev->bd_inode->i_mapping; 566 struct page *page; 567 568 page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), 569 NULL); 570 if (!IS_ERR(page)) { 571 if (PageError(page)) 572 goto fail; 573 p->v = page; 574 return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_CACHE_SHIFT - 9)) - 1)) << 9); 575fail: 576 page_cache_release(page); 577 } 578 p->v = NULL; 579 return NULL; 580} 581 582EXPORT_SYMBOL(read_dev_sector); 583