drbd_nl.c revision 8d49a77568d1105ff3e64aec484dac059f54824e
1/* 2 drbd_nl.c 3 4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5 6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9 10 drbd is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 2, or (at your option) 13 any later version. 14 15 drbd is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with drbd; see the file COPYING. If not, write to 22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23 24 */ 25 26#include <linux/module.h> 27#include <linux/drbd.h> 28#include <linux/in.h> 29#include <linux/fs.h> 30#include <linux/file.h> 31#include <linux/slab.h> 32#include <linux/connector.h> 33#include <linux/blkpg.h> 34#include <linux/cpumask.h> 35#include "drbd_int.h" 36#include "drbd_req.h" 37#include "drbd_wrappers.h" 38#include <asm/unaligned.h> 39#include <linux/drbd_tag_magic.h> 40#include <linux/drbd_limits.h> 41#include <linux/compiler.h> 42#include <linux/kthread.h> 43 44static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int); 45static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *); 46static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *); 47 48/* see get_sb_bdev and bd_claim */ 49static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; 50 51/* Generate the tag_list to struct functions */ 52#define NL_PACKET(name, number, fields) \ 53static int name ## _from_tags(struct drbd_conf *mdev, \ 54 unsigned short *tags, struct name *arg) __attribute__ ((unused)); \ 55static int name ## _from_tags(struct drbd_conf *mdev, \ 56 unsigned short *tags, struct name *arg) \ 57{ \ 58 int tag; \ 59 int dlen; \ 60 \ 61 while ((tag = get_unaligned(tags++)) != TT_END) { \ 62 dlen = get_unaligned(tags++); \ 63 switch (tag_number(tag)) { \ 64 fields \ 65 default: \ 66 if (tag & T_MANDATORY) { \ 67 dev_err(DEV, "Unknown tag: %d\n", tag_number(tag)); \ 68 return 0; \ 69 } \ 70 } \ 71 tags = (unsigned short *)((char *)tags + dlen); \ 72 } \ 73 return 1; \ 74} 75#define NL_INTEGER(pn, pr, member) \ 76 case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \ 77 arg->member = get_unaligned((int *)(tags)); \ 78 break; 79#define NL_INT64(pn, pr, member) \ 80 case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \ 81 arg->member = get_unaligned((u64 *)(tags)); \ 82 break; 83#define NL_BIT(pn, pr, member) \ 84 case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \ 85 arg->member = *(char *)(tags) ? 1 : 0; \ 86 break; 87#define NL_STRING(pn, pr, member, len) \ 88 case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \ 89 if (dlen > len) { \ 90 dev_err(DEV, "arg too long: %s (%u wanted, max len: %u bytes)\n", \ 91 #member, dlen, (unsigned int)len); \ 92 return 0; \ 93 } \ 94 arg->member ## _len = dlen; \ 95 memcpy(arg->member, tags, min_t(size_t, dlen, len)); \ 96 break; 97#include "linux/drbd_nl.h" 98 99/* Generate the struct to tag_list functions */ 100#define NL_PACKET(name, number, fields) \ 101static unsigned short* \ 102name ## _to_tags(struct drbd_conf *mdev, \ 103 struct name *arg, unsigned short *tags) __attribute__ ((unused)); \ 104static unsigned short* \ 105name ## _to_tags(struct drbd_conf *mdev, \ 106 struct name *arg, unsigned short *tags) \ 107{ \ 108 fields \ 109 return tags; \ 110} 111 112#define NL_INTEGER(pn, pr, member) \ 113 put_unaligned(pn | pr | TT_INTEGER, tags++); \ 114 put_unaligned(sizeof(int), tags++); \ 115 put_unaligned(arg->member, (int *)tags); \ 116 tags = (unsigned short *)((char *)tags+sizeof(int)); 117#define NL_INT64(pn, pr, member) \ 118 put_unaligned(pn | pr | TT_INT64, tags++); \ 119 put_unaligned(sizeof(u64), tags++); \ 120 put_unaligned(arg->member, (u64 *)tags); \ 121 tags = (unsigned short *)((char *)tags+sizeof(u64)); 122#define NL_BIT(pn, pr, member) \ 123 put_unaligned(pn | pr | TT_BIT, tags++); \ 124 put_unaligned(sizeof(char), tags++); \ 125 *(char *)tags = arg->member; \ 126 tags = (unsigned short *)((char *)tags+sizeof(char)); 127#define NL_STRING(pn, pr, member, len) \ 128 put_unaligned(pn | pr | TT_STRING, tags++); \ 129 put_unaligned(arg->member ## _len, tags++); \ 130 memcpy(tags, arg->member, arg->member ## _len); \ 131 tags = (unsigned short *)((char *)tags + arg->member ## _len); 132#include "linux/drbd_nl.h" 133 134void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name); 135void drbd_nl_send_reply(struct cn_msg *, int); 136 137int drbd_khelper(struct drbd_conf *mdev, char *cmd) 138{ 139 char *envp[] = { "HOME=/", 140 "TERM=linux", 141 "PATH=/sbin:/usr/sbin:/bin:/usr/bin", 142 NULL, /* Will be set to address family */ 143 NULL, /* Will be set to address */ 144 NULL }; 145 146 char mb[12], af[20], ad[60], *afs; 147 char *argv[] = {usermode_helper, cmd, mb, NULL }; 148 int ret; 149 150 snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); 151 152 if (get_net_conf(mdev)) { 153 switch (((struct sockaddr *)mdev->net_conf->peer_addr)->sa_family) { 154 case AF_INET6: 155 afs = "ipv6"; 156 snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI6", 157 &((struct sockaddr_in6 *)mdev->net_conf->peer_addr)->sin6_addr); 158 break; 159 case AF_INET: 160 afs = "ipv4"; 161 snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4", 162 &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr); 163 break; 164 default: 165 afs = "ssocks"; 166 snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4", 167 &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr); 168 } 169 snprintf(af, 20, "DRBD_PEER_AF=%s", afs); 170 envp[3]=af; 171 envp[4]=ad; 172 put_net_conf(mdev); 173 } 174 175 /* The helper may take some time. 176 * write out any unsynced meta data changes now */ 177 drbd_md_sync(mdev); 178 179 dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); 180 181 drbd_bcast_ev_helper(mdev, cmd); 182 ret = call_usermodehelper(usermode_helper, argv, envp, 1); 183 if (ret) 184 dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", 185 usermode_helper, cmd, mb, 186 (ret >> 8) & 0xff, ret); 187 else 188 dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", 189 usermode_helper, cmd, mb, 190 (ret >> 8) & 0xff, ret); 191 192 if (ret < 0) /* Ignore any ERRNOs we got. */ 193 ret = 0; 194 195 return ret; 196} 197 198enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) 199{ 200 char *ex_to_string; 201 int r; 202 enum drbd_disk_state nps; 203 enum drbd_fencing_p fp; 204 205 D_ASSERT(mdev->state.pdsk == D_UNKNOWN); 206 207 if (get_ldev_if_state(mdev, D_CONSISTENT)) { 208 fp = mdev->ldev->dc.fencing; 209 put_ldev(mdev); 210 } else { 211 dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n"); 212 nps = mdev->state.pdsk; 213 goto out; 214 } 215 216 r = drbd_khelper(mdev, "fence-peer"); 217 218 switch ((r>>8) & 0xff) { 219 case 3: /* peer is inconsistent */ 220 ex_to_string = "peer is inconsistent or worse"; 221 nps = D_INCONSISTENT; 222 break; 223 case 4: /* peer got outdated, or was already outdated */ 224 ex_to_string = "peer was fenced"; 225 nps = D_OUTDATED; 226 break; 227 case 5: /* peer was down */ 228 if (mdev->state.disk == D_UP_TO_DATE) { 229 /* we will(have) create(d) a new UUID anyways... */ 230 ex_to_string = "peer is unreachable, assumed to be dead"; 231 nps = D_OUTDATED; 232 } else { 233 ex_to_string = "peer unreachable, doing nothing since disk != UpToDate"; 234 nps = mdev->state.pdsk; 235 } 236 break; 237 case 6: /* Peer is primary, voluntarily outdate myself. 238 * This is useful when an unconnected R_SECONDARY is asked to 239 * become R_PRIMARY, but finds the other peer being active. */ 240 ex_to_string = "peer is active"; 241 dev_warn(DEV, "Peer is primary, outdating myself.\n"); 242 nps = D_UNKNOWN; 243 _drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE); 244 break; 245 case 7: 246 if (fp != FP_STONITH) 247 dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n"); 248 ex_to_string = "peer was stonithed"; 249 nps = D_OUTDATED; 250 break; 251 default: 252 /* The script is broken ... */ 253 nps = D_UNKNOWN; 254 dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); 255 return nps; 256 } 257 258 dev_info(DEV, "fence-peer helper returned %d (%s)\n", 259 (r>>8) & 0xff, ex_to_string); 260 261out: 262 if (mdev->state.susp_fen && nps >= D_UNKNOWN) { 263 /* The handler was not successful... unfreeze here, the 264 state engine can not unfreeze... */ 265 _drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE); 266 } 267 268 return nps; 269} 270 271static int _try_outdate_peer_async(void *data) 272{ 273 struct drbd_conf *mdev = (struct drbd_conf *)data; 274 enum drbd_disk_state nps; 275 276 nps = drbd_try_outdate_peer(mdev); 277 drbd_request_state(mdev, NS(pdsk, nps)); 278 279 return 0; 280} 281 282void drbd_try_outdate_peer_async(struct drbd_conf *mdev) 283{ 284 struct task_struct *opa; 285 286 opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev)); 287 if (IS_ERR(opa)) 288 dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n"); 289} 290 291enum drbd_state_rv 292drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) 293{ 294 const int max_tries = 4; 295 enum drbd_state_rv rv = SS_UNKNOWN_ERROR; 296 int try = 0; 297 int forced = 0; 298 union drbd_state mask, val; 299 enum drbd_disk_state nps; 300 301 if (new_role == R_PRIMARY) 302 request_ping(mdev); /* Detect a dead peer ASAP */ 303 304 mutex_lock(&mdev->state_mutex); 305 306 mask.i = 0; mask.role = R_MASK; 307 val.i = 0; val.role = new_role; 308 309 while (try++ < max_tries) { 310 rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE); 311 312 /* in case we first succeeded to outdate, 313 * but now suddenly could establish a connection */ 314 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) { 315 val.pdsk = 0; 316 mask.pdsk = 0; 317 continue; 318 } 319 320 if (rv == SS_NO_UP_TO_DATE_DISK && force && 321 (mdev->state.disk < D_UP_TO_DATE && 322 mdev->state.disk >= D_INCONSISTENT)) { 323 mask.disk = D_MASK; 324 val.disk = D_UP_TO_DATE; 325 forced = 1; 326 continue; 327 } 328 329 if (rv == SS_NO_UP_TO_DATE_DISK && 330 mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) { 331 D_ASSERT(mdev->state.pdsk == D_UNKNOWN); 332 nps = drbd_try_outdate_peer(mdev); 333 334 if (nps == D_OUTDATED || nps == D_INCONSISTENT) { 335 val.disk = D_UP_TO_DATE; 336 mask.disk = D_MASK; 337 } 338 339 val.pdsk = nps; 340 mask.pdsk = D_MASK; 341 342 continue; 343 } 344 345 if (rv == SS_NOTHING_TO_DO) 346 goto fail; 347 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { 348 nps = drbd_try_outdate_peer(mdev); 349 350 if (force && nps > D_OUTDATED) { 351 dev_warn(DEV, "Forced into split brain situation!\n"); 352 nps = D_OUTDATED; 353 } 354 355 mask.pdsk = D_MASK; 356 val.pdsk = nps; 357 358 continue; 359 } 360 if (rv == SS_TWO_PRIMARIES) { 361 /* Maybe the peer is detected as dead very soon... 362 retry at most once more in this case. */ 363 schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10); 364 if (try < max_tries) 365 try = max_tries - 1; 366 continue; 367 } 368 if (rv < SS_SUCCESS) { 369 rv = _drbd_request_state(mdev, mask, val, 370 CS_VERBOSE + CS_WAIT_COMPLETE); 371 if (rv < SS_SUCCESS) 372 goto fail; 373 } 374 break; 375 } 376 377 if (rv < SS_SUCCESS) 378 goto fail; 379 380 if (forced) 381 dev_warn(DEV, "Forced to consider local data as UpToDate!\n"); 382 383 /* Wait until nothing is on the fly :) */ 384 wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); 385 386 if (new_role == R_SECONDARY) { 387 set_disk_ro(mdev->vdisk, true); 388 if (get_ldev(mdev)) { 389 mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; 390 put_ldev(mdev); 391 } 392 } else { 393 if (get_net_conf(mdev)) { 394 mdev->net_conf->want_lose = 0; 395 put_net_conf(mdev); 396 } 397 set_disk_ro(mdev->vdisk, false); 398 if (get_ldev(mdev)) { 399 if (((mdev->state.conn < C_CONNECTED || 400 mdev->state.pdsk <= D_FAILED) 401 && mdev->ldev->md.uuid[UI_BITMAP] == 0) || forced) 402 drbd_uuid_new_current(mdev); 403 404 mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; 405 put_ldev(mdev); 406 } 407 } 408 409 /* writeout of activity log covered areas of the bitmap 410 * to stable storage done in after state change already */ 411 412 if (mdev->state.conn >= C_WF_REPORT_PARAMS) { 413 /* if this was forced, we should consider sync */ 414 if (forced) 415 drbd_send_uuids(mdev); 416 drbd_send_state(mdev); 417 } 418 419 drbd_md_sync(mdev); 420 421 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); 422 fail: 423 mutex_unlock(&mdev->state_mutex); 424 return rv; 425} 426 427static struct drbd_conf *ensure_mdev(int minor, int create) 428{ 429 struct drbd_conf *mdev; 430 431 if (minor >= minor_count) 432 return NULL; 433 434 mdev = minor_to_mdev(minor); 435 436 if (!mdev && create) { 437 struct gendisk *disk = NULL; 438 mdev = drbd_new_device(minor); 439 440 spin_lock_irq(&drbd_pp_lock); 441 if (minor_table[minor] == NULL) { 442 minor_table[minor] = mdev; 443 disk = mdev->vdisk; 444 mdev = NULL; 445 } /* else: we lost the race */ 446 spin_unlock_irq(&drbd_pp_lock); 447 448 if (disk) /* we won the race above */ 449 /* in case we ever add a drbd_delete_device(), 450 * don't forget the del_gendisk! */ 451 add_disk(disk); 452 else /* we lost the race above */ 453 drbd_free_mdev(mdev); 454 455 mdev = minor_to_mdev(minor); 456 } 457 458 return mdev; 459} 460 461static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 462 struct drbd_nl_cfg_reply *reply) 463{ 464 struct primary primary_args; 465 466 memset(&primary_args, 0, sizeof(struct primary)); 467 if (!primary_from_tags(mdev, nlp->tag_list, &primary_args)) { 468 reply->ret_code = ERR_MANDATORY_TAG; 469 return 0; 470 } 471 472 reply->ret_code = 473 drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force); 474 475 return 0; 476} 477 478static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 479 struct drbd_nl_cfg_reply *reply) 480{ 481 reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0); 482 483 return 0; 484} 485 486/* initializes the md.*_offset members, so we are able to find 487 * the on disk meta data */ 488static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, 489 struct drbd_backing_dev *bdev) 490{ 491 sector_t md_size_sect = 0; 492 switch (bdev->dc.meta_dev_idx) { 493 default: 494 /* v07 style fixed size indexed meta data */ 495 bdev->md.md_size_sect = MD_RESERVED_SECT; 496 bdev->md.md_offset = drbd_md_ss__(mdev, bdev); 497 bdev->md.al_offset = MD_AL_OFFSET; 498 bdev->md.bm_offset = MD_BM_OFFSET; 499 break; 500 case DRBD_MD_INDEX_FLEX_EXT: 501 /* just occupy the full device; unit: sectors */ 502 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev); 503 bdev->md.md_offset = 0; 504 bdev->md.al_offset = MD_AL_OFFSET; 505 bdev->md.bm_offset = MD_BM_OFFSET; 506 break; 507 case DRBD_MD_INDEX_INTERNAL: 508 case DRBD_MD_INDEX_FLEX_INT: 509 bdev->md.md_offset = drbd_md_ss__(mdev, bdev); 510 /* al size is still fixed */ 511 bdev->md.al_offset = -MD_AL_MAX_SIZE; 512 /* we need (slightly less than) ~ this much bitmap sectors: */ 513 md_size_sect = drbd_get_capacity(bdev->backing_bdev); 514 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT); 515 md_size_sect = BM_SECT_TO_EXT(md_size_sect); 516 md_size_sect = ALIGN(md_size_sect, 8); 517 518 /* plus the "drbd meta data super block", 519 * and the activity log; */ 520 md_size_sect += MD_BM_OFFSET; 521 522 bdev->md.md_size_sect = md_size_sect; 523 /* bitmap offset is adjusted by 'super' block size */ 524 bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET; 525 break; 526 } 527} 528 529/* input size is expected to be in KB */ 530char *ppsize(char *buf, unsigned long long size) 531{ 532 /* Needs 9 bytes at max including trailing NUL: 533 * -1ULL ==> "16384 EB" */ 534 static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' }; 535 int base = 0; 536 while (size >= 10000 && base < sizeof(units)-1) { 537 /* shift + round */ 538 size = (size >> 10) + !!(size & (1<<9)); 539 base++; 540 } 541 sprintf(buf, "%u %cB", (unsigned)size, units[base]); 542 543 return buf; 544} 545 546/* there is still a theoretical deadlock when called from receiver 547 * on an D_INCONSISTENT R_PRIMARY: 548 * remote READ does inc_ap_bio, receiver would need to receive answer 549 * packet from remote to dec_ap_bio again. 550 * receiver receive_sizes(), comes here, 551 * waits for ap_bio_cnt == 0. -> deadlock. 552 * but this cannot happen, actually, because: 553 * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable 554 * (not connected, or bad/no disk on peer): 555 * see drbd_fail_request_early, ap_bio_cnt is zero. 556 * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET: 557 * peer may not initiate a resize. 558 */ 559void drbd_suspend_io(struct drbd_conf *mdev) 560{ 561 set_bit(SUSPEND_IO, &mdev->flags); 562 if (is_susp(mdev->state)) 563 return; 564 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); 565} 566 567void drbd_resume_io(struct drbd_conf *mdev) 568{ 569 clear_bit(SUSPEND_IO, &mdev->flags); 570 wake_up(&mdev->misc_wait); 571} 572 573/** 574 * drbd_determine_dev_size() - Sets the right device size obeying all constraints 575 * @mdev: DRBD device. 576 * 577 * Returns 0 on success, negative return values indicate errors. 578 * You should call drbd_md_sync() after calling this function. 579 */ 580enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) 581{ 582 sector_t prev_first_sect, prev_size; /* previous meta location */ 583 sector_t la_size; 584 sector_t size; 585 char ppb[10]; 586 587 int md_moved, la_size_changed; 588 enum determine_dev_size rv = unchanged; 589 590 /* race: 591 * application request passes inc_ap_bio, 592 * but then cannot get an AL-reference. 593 * this function later may wait on ap_bio_cnt == 0. -> deadlock. 594 * 595 * to avoid that: 596 * Suspend IO right here. 597 * still lock the act_log to not trigger ASSERTs there. 598 */ 599 drbd_suspend_io(mdev); 600 601 /* no wait necessary anymore, actually we could assert that */ 602 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); 603 604 prev_first_sect = drbd_md_first_sector(mdev->ldev); 605 prev_size = mdev->ldev->md.md_size_sect; 606 la_size = mdev->ldev->md.la_size_sect; 607 608 /* TODO: should only be some assert here, not (re)init... */ 609 drbd_md_set_sector_offsets(mdev, mdev->ldev); 610 611 size = drbd_new_dev_size(mdev, mdev->ldev, flags & DDSF_FORCED); 612 613 if (drbd_get_capacity(mdev->this_bdev) != size || 614 drbd_bm_capacity(mdev) != size) { 615 int err; 616 err = drbd_bm_resize(mdev, size, !(flags & DDSF_NO_RESYNC)); 617 if (unlikely(err)) { 618 /* currently there is only one error: ENOMEM! */ 619 size = drbd_bm_capacity(mdev)>>1; 620 if (size == 0) { 621 dev_err(DEV, "OUT OF MEMORY! " 622 "Could not allocate bitmap!\n"); 623 } else { 624 dev_err(DEV, "BM resizing failed. " 625 "Leaving size unchanged at size = %lu KB\n", 626 (unsigned long)size); 627 } 628 rv = dev_size_error; 629 } 630 /* racy, see comments above. */ 631 drbd_set_my_capacity(mdev, size); 632 mdev->ldev->md.la_size_sect = size; 633 dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), 634 (unsigned long long)size>>1); 635 } 636 if (rv == dev_size_error) 637 goto out; 638 639 la_size_changed = (la_size != mdev->ldev->md.la_size_sect); 640 641 md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev) 642 || prev_size != mdev->ldev->md.md_size_sect; 643 644 if (la_size_changed || md_moved) { 645 int err; 646 647 drbd_al_shrink(mdev); /* All extents inactive. */ 648 dev_info(DEV, "Writing the whole bitmap, %s\n", 649 la_size_changed && md_moved ? "size changed and md moved" : 650 la_size_changed ? "size changed" : "md moved"); 651 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ 652 err = drbd_bitmap_io(mdev, &drbd_bm_write, 653 "size changed", BM_LOCKED_MASK); 654 if (err) { 655 rv = dev_size_error; 656 goto out; 657 } 658 drbd_md_mark_dirty(mdev); 659 } 660 661 if (size > la_size) 662 rv = grew; 663 if (size < la_size) 664 rv = shrunk; 665out: 666 lc_unlock(mdev->act_log); 667 wake_up(&mdev->al_wait); 668 drbd_resume_io(mdev); 669 670 return rv; 671} 672 673sector_t 674drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space) 675{ 676 sector_t p_size = mdev->p_size; /* partner's disk size. */ 677 sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */ 678 sector_t m_size; /* my size */ 679 sector_t u_size = bdev->dc.disk_size; /* size requested by user. */ 680 sector_t size = 0; 681 682 m_size = drbd_get_max_capacity(bdev); 683 684 if (mdev->state.conn < C_CONNECTED && assume_peer_has_space) { 685 dev_warn(DEV, "Resize while not connected was forced by the user!\n"); 686 p_size = m_size; 687 } 688 689 if (p_size && m_size) { 690 size = min_t(sector_t, p_size, m_size); 691 } else { 692 if (la_size) { 693 size = la_size; 694 if (m_size && m_size < size) 695 size = m_size; 696 if (p_size && p_size < size) 697 size = p_size; 698 } else { 699 if (m_size) 700 size = m_size; 701 if (p_size) 702 size = p_size; 703 } 704 } 705 706 if (size == 0) 707 dev_err(DEV, "Both nodes diskless!\n"); 708 709 if (u_size) { 710 if (u_size > size) 711 dev_err(DEV, "Requested disk size is too big (%lu > %lu)\n", 712 (unsigned long)u_size>>1, (unsigned long)size>>1); 713 else 714 size = u_size; 715 } 716 717 return size; 718} 719 720/** 721 * drbd_check_al_size() - Ensures that the AL is of the right size 722 * @mdev: DRBD device. 723 * 724 * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation 725 * failed, and 0 on success. You should call drbd_md_sync() after you called 726 * this function. 727 */ 728static int drbd_check_al_size(struct drbd_conf *mdev) 729{ 730 struct lru_cache *n, *t; 731 struct lc_element *e; 732 unsigned int in_use; 733 int i; 734 735 ERR_IF(mdev->sync_conf.al_extents < 7) 736 mdev->sync_conf.al_extents = 127; 737 738 if (mdev->act_log && 739 mdev->act_log->nr_elements == mdev->sync_conf.al_extents) 740 return 0; 741 742 in_use = 0; 743 t = mdev->act_log; 744 n = lc_create("act_log", drbd_al_ext_cache, 745 mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); 746 747 if (n == NULL) { 748 dev_err(DEV, "Cannot allocate act_log lru!\n"); 749 return -ENOMEM; 750 } 751 spin_lock_irq(&mdev->al_lock); 752 if (t) { 753 for (i = 0; i < t->nr_elements; i++) { 754 e = lc_element_by_index(t, i); 755 if (e->refcnt) 756 dev_err(DEV, "refcnt(%d)==%d\n", 757 e->lc_number, e->refcnt); 758 in_use += e->refcnt; 759 } 760 } 761 if (!in_use) 762 mdev->act_log = n; 763 spin_unlock_irq(&mdev->al_lock); 764 if (in_use) { 765 dev_err(DEV, "Activity log still in use!\n"); 766 lc_destroy(n); 767 return -EBUSY; 768 } else { 769 if (t) 770 lc_destroy(t); 771 } 772 drbd_md_mark_dirty(mdev); /* we changed mdev->act_log->nr_elemens */ 773 return 0; 774} 775 776void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local) 777{ 778 struct request_queue * const q = mdev->rq_queue; 779 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; 780 int max_segments = mdev->ldev->dc.max_bio_bvecs; 781 int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); 782 783 blk_queue_logical_block_size(q, 512); 784 blk_queue_max_hw_sectors(q, max_hw_sectors); 785 /* This is the workaround for "bio would need to, but cannot, be split" */ 786 blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); 787 blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1); 788 blk_queue_stack_limits(q, b); 789 790 dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9); 791 792 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { 793 dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", 794 q->backing_dev_info.ra_pages, 795 b->backing_dev_info.ra_pages); 796 q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; 797 } 798} 799 800/* serialize deconfig (worker exiting, doing cleanup) 801 * and reconfig (drbdsetup disk, drbdsetup net) 802 * 803 * Wait for a potentially exiting worker, then restart it, 804 * or start a new one. Flush any pending work, there may still be an 805 * after_state_change queued. 806 */ 807static void drbd_reconfig_start(struct drbd_conf *mdev) 808{ 809 wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags)); 810 wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); 811 drbd_thread_start(&mdev->worker); 812 drbd_flush_workqueue(mdev); 813} 814 815/* if still unconfigured, stops worker again. 816 * if configured now, clears CONFIG_PENDING. 817 * wakes potential waiters */ 818static void drbd_reconfig_done(struct drbd_conf *mdev) 819{ 820 spin_lock_irq(&mdev->req_lock); 821 if (mdev->state.disk == D_DISKLESS && 822 mdev->state.conn == C_STANDALONE && 823 mdev->state.role == R_SECONDARY) { 824 set_bit(DEVICE_DYING, &mdev->flags); 825 drbd_thread_stop_nowait(&mdev->worker); 826 } else 827 clear_bit(CONFIG_PENDING, &mdev->flags); 828 spin_unlock_irq(&mdev->req_lock); 829 wake_up(&mdev->state_wait); 830} 831 832/* Make sure IO is suspended before calling this function(). */ 833static void drbd_suspend_al(struct drbd_conf *mdev) 834{ 835 int s = 0; 836 837 if (lc_try_lock(mdev->act_log)) { 838 drbd_al_shrink(mdev); 839 lc_unlock(mdev->act_log); 840 } else { 841 dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n"); 842 return; 843 } 844 845 spin_lock_irq(&mdev->req_lock); 846 if (mdev->state.conn < C_CONNECTED) 847 s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags); 848 849 spin_unlock_irq(&mdev->req_lock); 850 851 if (s) 852 dev_info(DEV, "Suspended AL updates\n"); 853} 854 855/* does always return 0; 856 * interesting return code is in reply->ret_code */ 857static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 858 struct drbd_nl_cfg_reply *reply) 859{ 860 enum drbd_ret_code retcode; 861 enum determine_dev_size dd; 862 sector_t max_possible_sectors; 863 sector_t min_md_device_sectors; 864 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ 865 struct block_device *bdev; 866 struct lru_cache *resync_lru = NULL; 867 union drbd_state ns, os; 868 unsigned int max_bio_size; 869 enum drbd_state_rv rv; 870 int cp_discovered = 0; 871 int logical_block_size; 872 873 drbd_reconfig_start(mdev); 874 875 /* if you want to reconfigure, please tear down first */ 876 if (mdev->state.disk > D_DISKLESS) { 877 retcode = ERR_DISK_CONFIGURED; 878 goto fail; 879 } 880 /* It may just now have detached because of IO error. Make sure 881 * drbd_ldev_destroy is done already, we may end up here very fast, 882 * e.g. if someone calls attach from the on-io-error handler, 883 * to realize a "hot spare" feature (not that I'd recommend that) */ 884 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); 885 886 /* allocation not in the IO path, cqueue thread context */ 887 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); 888 if (!nbc) { 889 retcode = ERR_NOMEM; 890 goto fail; 891 } 892 893 nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF; 894 nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF; 895 nbc->dc.fencing = DRBD_FENCING_DEF; 896 nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; 897 898 if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) { 899 retcode = ERR_MANDATORY_TAG; 900 goto fail; 901 } 902 903 if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { 904 retcode = ERR_MD_IDX_INVALID; 905 goto fail; 906 } 907 908 if (get_net_conf(mdev)) { 909 int prot = mdev->net_conf->wire_protocol; 910 put_net_conf(mdev); 911 if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) { 912 retcode = ERR_STONITH_AND_PROT_A; 913 goto fail; 914 } 915 } 916 917 bdev = blkdev_get_by_path(nbc->dc.backing_dev, 918 FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev); 919 if (IS_ERR(bdev)) { 920 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, 921 PTR_ERR(bdev)); 922 retcode = ERR_OPEN_DISK; 923 goto fail; 924 } 925 nbc->backing_bdev = bdev; 926 927 /* 928 * meta_dev_idx >= 0: external fixed size, possibly multiple 929 * drbd sharing one meta device. TODO in that case, paranoia 930 * check that [md_bdev, meta_dev_idx] is not yet used by some 931 * other drbd minor! (if you use drbd.conf + drbdadm, that 932 * should check it for you already; but if you don't, or 933 * someone fooled it, we need to double check here) 934 */ 935 bdev = blkdev_get_by_path(nbc->dc.meta_dev, 936 FMODE_READ | FMODE_WRITE | FMODE_EXCL, 937 (nbc->dc.meta_dev_idx < 0) ? 938 (void *)mdev : (void *)drbd_m_holder); 939 if (IS_ERR(bdev)) { 940 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, 941 PTR_ERR(bdev)); 942 retcode = ERR_OPEN_MD_DISK; 943 goto fail; 944 } 945 nbc->md_bdev = bdev; 946 947 if ((nbc->backing_bdev == nbc->md_bdev) != 948 (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || 949 nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { 950 retcode = ERR_MD_IDX_INVALID; 951 goto fail; 952 } 953 954 resync_lru = lc_create("resync", drbd_bm_ext_cache, 955 61, sizeof(struct bm_extent), 956 offsetof(struct bm_extent, lce)); 957 if (!resync_lru) { 958 retcode = ERR_NOMEM; 959 goto fail; 960 } 961 962 /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ 963 drbd_md_set_sector_offsets(mdev, nbc); 964 965 if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) { 966 dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", 967 (unsigned long long) drbd_get_max_capacity(nbc), 968 (unsigned long long) nbc->dc.disk_size); 969 retcode = ERR_DISK_TO_SMALL; 970 goto fail; 971 } 972 973 if (nbc->dc.meta_dev_idx < 0) { 974 max_possible_sectors = DRBD_MAX_SECTORS_FLEX; 975 /* at least one MB, otherwise it does not make sense */ 976 min_md_device_sectors = (2<<10); 977 } else { 978 max_possible_sectors = DRBD_MAX_SECTORS; 979 min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1); 980 } 981 982 if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { 983 retcode = ERR_MD_DISK_TO_SMALL; 984 dev_warn(DEV, "refusing attach: md-device too small, " 985 "at least %llu sectors needed for this meta-disk type\n", 986 (unsigned long long) min_md_device_sectors); 987 goto fail; 988 } 989 990 /* Make sure the new disk is big enough 991 * (we may currently be R_PRIMARY with no local disk...) */ 992 if (drbd_get_max_capacity(nbc) < 993 drbd_get_capacity(mdev->this_bdev)) { 994 retcode = ERR_DISK_TO_SMALL; 995 goto fail; 996 } 997 998 nbc->known_size = drbd_get_capacity(nbc->backing_bdev); 999 1000 if (nbc->known_size > max_possible_sectors) { 1001 dev_warn(DEV, "==> truncating very big lower level device " 1002 "to currently maximum possible %llu sectors <==\n", 1003 (unsigned long long) max_possible_sectors); 1004 if (nbc->dc.meta_dev_idx >= 0) 1005 dev_warn(DEV, "==>> using internal or flexible " 1006 "meta data may help <<==\n"); 1007 } 1008 1009 drbd_suspend_io(mdev); 1010 /* also wait for the last barrier ack. */ 1011 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state)); 1012 /* and for any other previously queued work */ 1013 drbd_flush_workqueue(mdev); 1014 1015 rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); 1016 retcode = rv; /* FIXME: Type mismatch. */ 1017 drbd_resume_io(mdev); 1018 if (rv < SS_SUCCESS) 1019 goto fail; 1020 1021 if (!get_ldev_if_state(mdev, D_ATTACHING)) 1022 goto force_diskless; 1023 1024 drbd_md_set_sector_offsets(mdev, nbc); 1025 1026 /* allocate a second IO page if logical_block_size != 512 */ 1027 logical_block_size = bdev_logical_block_size(nbc->md_bdev); 1028 if (logical_block_size == 0) 1029 logical_block_size = MD_SECTOR_SIZE; 1030 1031 if (logical_block_size != MD_SECTOR_SIZE) { 1032 if (!mdev->md_io_tmpp) { 1033 struct page *page = alloc_page(GFP_NOIO); 1034 if (!page) 1035 goto force_diskless_dec; 1036 1037 dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n", 1038 logical_block_size, MD_SECTOR_SIZE); 1039 dev_warn(DEV, "Workaround engaged (has performance impact).\n"); 1040 1041 mdev->md_io_tmpp = page; 1042 } 1043 } 1044 1045 if (!mdev->bitmap) { 1046 if (drbd_bm_init(mdev)) { 1047 retcode = ERR_NOMEM; 1048 goto force_diskless_dec; 1049 } 1050 } 1051 1052 retcode = drbd_md_read(mdev, nbc); 1053 if (retcode != NO_ERROR) 1054 goto force_diskless_dec; 1055 1056 if (mdev->state.conn < C_CONNECTED && 1057 mdev->state.role == R_PRIMARY && 1058 (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { 1059 dev_err(DEV, "Can only attach to data with current UUID=%016llX\n", 1060 (unsigned long long)mdev->ed_uuid); 1061 retcode = ERR_DATA_NOT_CURRENT; 1062 goto force_diskless_dec; 1063 } 1064 1065 /* Since we are diskless, fix the activity log first... */ 1066 if (drbd_check_al_size(mdev)) { 1067 retcode = ERR_NOMEM; 1068 goto force_diskless_dec; 1069 } 1070 1071 /* Prevent shrinking of consistent devices ! */ 1072 if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && 1073 drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) { 1074 dev_warn(DEV, "refusing to truncate a consistent device\n"); 1075 retcode = ERR_DISK_TO_SMALL; 1076 goto force_diskless_dec; 1077 } 1078 1079 if (!drbd_al_read_log(mdev, nbc)) { 1080 retcode = ERR_IO_MD_DISK; 1081 goto force_diskless_dec; 1082 } 1083 1084 /* Reset the "barriers don't work" bits here, then force meta data to 1085 * be written, to ensure we determine if barriers are supported. */ 1086 if (nbc->dc.no_md_flush) 1087 set_bit(MD_NO_FUA, &mdev->flags); 1088 else 1089 clear_bit(MD_NO_FUA, &mdev->flags); 1090 1091 /* Point of no return reached. 1092 * Devices and memory are no longer released by error cleanup below. 1093 * now mdev takes over responsibility, and the state engine should 1094 * clean it up somewhere. */ 1095 D_ASSERT(mdev->ldev == NULL); 1096 mdev->ldev = nbc; 1097 mdev->resync = resync_lru; 1098 nbc = NULL; 1099 resync_lru = NULL; 1100 1101 mdev->write_ordering = WO_bdev_flush; 1102 drbd_bump_write_ordering(mdev, WO_bdev_flush); 1103 1104 if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY)) 1105 set_bit(CRASHED_PRIMARY, &mdev->flags); 1106 else 1107 clear_bit(CRASHED_PRIMARY, &mdev->flags); 1108 1109 if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && 1110 !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) { 1111 set_bit(CRASHED_PRIMARY, &mdev->flags); 1112 cp_discovered = 1; 1113 } 1114 1115 mdev->send_cnt = 0; 1116 mdev->recv_cnt = 0; 1117 mdev->read_cnt = 0; 1118 mdev->writ_cnt = 0; 1119 1120 max_bio_size = DRBD_MAX_BIO_SIZE; 1121 if (mdev->state.conn == C_CONNECTED) { 1122 /* We are Primary, Connected, and now attach a new local 1123 * backing store. We must not increase the user visible maximum 1124 * bio size on this device to something the peer may not be 1125 * able to handle. */ 1126 if (mdev->agreed_pro_version < 94) 1127 max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9; 1128 else if (mdev->agreed_pro_version == 94) 1129 max_bio_size = DRBD_MAX_SIZE_H80_PACKET; 1130 /* else: drbd 8.3.9 and later, stay with default */ 1131 } 1132 1133 drbd_setup_queue_param(mdev, max_bio_size); 1134 1135 /* If I am currently not R_PRIMARY, 1136 * but meta data primary indicator is set, 1137 * I just now recover from a hard crash, 1138 * and have been R_PRIMARY before that crash. 1139 * 1140 * Now, if I had no connection before that crash 1141 * (have been degraded R_PRIMARY), chances are that 1142 * I won't find my peer now either. 1143 * 1144 * In that case, and _only_ in that case, 1145 * we use the degr-wfc-timeout instead of the default, 1146 * so we can automatically recover from a crash of a 1147 * degraded but active "cluster" after a certain timeout. 1148 */ 1149 clear_bit(USE_DEGR_WFC_T, &mdev->flags); 1150 if (mdev->state.role != R_PRIMARY && 1151 drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && 1152 !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND)) 1153 set_bit(USE_DEGR_WFC_T, &mdev->flags); 1154 1155 dd = drbd_determin_dev_size(mdev, 0); 1156 if (dd == dev_size_error) { 1157 retcode = ERR_NOMEM_BITMAP; 1158 goto force_diskless_dec; 1159 } else if (dd == grew) 1160 set_bit(RESYNC_AFTER_NEG, &mdev->flags); 1161 1162 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { 1163 dev_info(DEV, "Assuming that all blocks are out of sync " 1164 "(aka FullSync)\n"); 1165 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, 1166 "set_n_write from attaching", BM_LOCKED_MASK)) { 1167 retcode = ERR_IO_MD_DISK; 1168 goto force_diskless_dec; 1169 } 1170 } else { 1171 if (drbd_bitmap_io(mdev, &drbd_bm_read, 1172 "read from attaching", BM_LOCKED_MASK) < 0) { 1173 retcode = ERR_IO_MD_DISK; 1174 goto force_diskless_dec; 1175 } 1176 } 1177 1178 if (cp_discovered) { 1179 drbd_al_apply_to_bm(mdev); 1180 if (drbd_bitmap_io(mdev, &drbd_bm_write, 1181 "crashed primary apply AL", BM_LOCKED_MASK)) { 1182 retcode = ERR_IO_MD_DISK; 1183 goto force_diskless_dec; 1184 } 1185 } 1186 1187 if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) 1188 drbd_suspend_al(mdev); /* IO is still suspended here... */ 1189 1190 spin_lock_irq(&mdev->req_lock); 1191 os = mdev->state; 1192 ns.i = os.i; 1193 /* If MDF_CONSISTENT is not set go into inconsistent state, 1194 otherwise investigate MDF_WasUpToDate... 1195 If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state, 1196 otherwise into D_CONSISTENT state. 1197 */ 1198 if (drbd_md_test_flag(mdev->ldev, MDF_CONSISTENT)) { 1199 if (drbd_md_test_flag(mdev->ldev, MDF_WAS_UP_TO_DATE)) 1200 ns.disk = D_CONSISTENT; 1201 else 1202 ns.disk = D_OUTDATED; 1203 } else { 1204 ns.disk = D_INCONSISTENT; 1205 } 1206 1207 if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED)) 1208 ns.pdsk = D_OUTDATED; 1209 1210 if ( ns.disk == D_CONSISTENT && 1211 (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE)) 1212 ns.disk = D_UP_TO_DATE; 1213 1214 /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, 1215 MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before 1216 this point, because drbd_request_state() modifies these 1217 flags. */ 1218 1219 /* In case we are C_CONNECTED postpone any decision on the new disk 1220 state after the negotiation phase. */ 1221 if (mdev->state.conn == C_CONNECTED) { 1222 mdev->new_state_tmp.i = ns.i; 1223 ns.i = os.i; 1224 ns.disk = D_NEGOTIATING; 1225 1226 /* We expect to receive up-to-date UUIDs soon. 1227 To avoid a race in receive_state, free p_uuid while 1228 holding req_lock. I.e. atomic with the state change */ 1229 kfree(mdev->p_uuid); 1230 mdev->p_uuid = NULL; 1231 } 1232 1233 rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); 1234 ns = mdev->state; 1235 spin_unlock_irq(&mdev->req_lock); 1236 1237 if (rv < SS_SUCCESS) 1238 goto force_diskless_dec; 1239 1240 if (mdev->state.role == R_PRIMARY) 1241 mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; 1242 else 1243 mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; 1244 1245 drbd_md_mark_dirty(mdev); 1246 drbd_md_sync(mdev); 1247 1248 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); 1249 put_ldev(mdev); 1250 reply->ret_code = retcode; 1251 drbd_reconfig_done(mdev); 1252 return 0; 1253 1254 force_diskless_dec: 1255 put_ldev(mdev); 1256 force_diskless: 1257 drbd_force_state(mdev, NS(disk, D_FAILED)); 1258 drbd_md_sync(mdev); 1259 fail: 1260 if (nbc) { 1261 if (nbc->backing_bdev) 1262 blkdev_put(nbc->backing_bdev, 1263 FMODE_READ | FMODE_WRITE | FMODE_EXCL); 1264 if (nbc->md_bdev) 1265 blkdev_put(nbc->md_bdev, 1266 FMODE_READ | FMODE_WRITE | FMODE_EXCL); 1267 kfree(nbc); 1268 } 1269 lc_destroy(resync_lru); 1270 1271 reply->ret_code = retcode; 1272 drbd_reconfig_done(mdev); 1273 return 0; 1274} 1275 1276/* Detaching the disk is a process in multiple stages. First we need to lock 1277 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io. 1278 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all 1279 * internal references as well. 1280 * Only then we have finally detached. */ 1281static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1282 struct drbd_nl_cfg_reply *reply) 1283{ 1284 drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ 1285 reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS)); 1286 if (mdev->state.disk == D_DISKLESS) 1287 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); 1288 drbd_resume_io(mdev); 1289 return 0; 1290} 1291 1292static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1293 struct drbd_nl_cfg_reply *reply) 1294{ 1295 int i, ns; 1296 enum drbd_ret_code retcode; 1297 struct net_conf *new_conf = NULL; 1298 struct crypto_hash *tfm = NULL; 1299 struct crypto_hash *integrity_w_tfm = NULL; 1300 struct crypto_hash *integrity_r_tfm = NULL; 1301 struct hlist_head *new_tl_hash = NULL; 1302 struct hlist_head *new_ee_hash = NULL; 1303 struct drbd_conf *odev; 1304 char hmac_name[CRYPTO_MAX_ALG_NAME]; 1305 void *int_dig_out = NULL; 1306 void *int_dig_in = NULL; 1307 void *int_dig_vv = NULL; 1308 struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; 1309 1310 drbd_reconfig_start(mdev); 1311 1312 if (mdev->state.conn > C_STANDALONE) { 1313 retcode = ERR_NET_CONFIGURED; 1314 goto fail; 1315 } 1316 1317 /* allocation not in the IO path, cqueue thread context */ 1318 new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); 1319 if (!new_conf) { 1320 retcode = ERR_NOMEM; 1321 goto fail; 1322 } 1323 1324 new_conf->timeout = DRBD_TIMEOUT_DEF; 1325 new_conf->try_connect_int = DRBD_CONNECT_INT_DEF; 1326 new_conf->ping_int = DRBD_PING_INT_DEF; 1327 new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; 1328 new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF; 1329 new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; 1330 new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; 1331 new_conf->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF; 1332 new_conf->ko_count = DRBD_KO_COUNT_DEF; 1333 new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF; 1334 new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF; 1335 new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF; 1336 new_conf->want_lose = 0; 1337 new_conf->two_primaries = 0; 1338 new_conf->wire_protocol = DRBD_PROT_C; 1339 new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; 1340 new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; 1341 new_conf->on_congestion = DRBD_ON_CONGESTION_DEF; 1342 new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF; 1343 1344 if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) { 1345 retcode = ERR_MANDATORY_TAG; 1346 goto fail; 1347 } 1348 1349 if (new_conf->two_primaries 1350 && (new_conf->wire_protocol != DRBD_PROT_C)) { 1351 retcode = ERR_NOT_PROTO_C; 1352 goto fail; 1353 } 1354 1355 if (get_ldev(mdev)) { 1356 enum drbd_fencing_p fp = mdev->ldev->dc.fencing; 1357 put_ldev(mdev); 1358 if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) { 1359 retcode = ERR_STONITH_AND_PROT_A; 1360 goto fail; 1361 } 1362 } 1363 1364 if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) { 1365 retcode = ERR_CONG_NOT_PROTO_A; 1366 goto fail; 1367 } 1368 1369 if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { 1370 retcode = ERR_DISCARD; 1371 goto fail; 1372 } 1373 1374 retcode = NO_ERROR; 1375 1376 new_my_addr = (struct sockaddr *)&new_conf->my_addr; 1377 new_peer_addr = (struct sockaddr *)&new_conf->peer_addr; 1378 for (i = 0; i < minor_count; i++) { 1379 odev = minor_to_mdev(i); 1380 if (!odev || odev == mdev) 1381 continue; 1382 if (get_net_conf(odev)) { 1383 taken_addr = (struct sockaddr *)&odev->net_conf->my_addr; 1384 if (new_conf->my_addr_len == odev->net_conf->my_addr_len && 1385 !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len)) 1386 retcode = ERR_LOCAL_ADDR; 1387 1388 taken_addr = (struct sockaddr *)&odev->net_conf->peer_addr; 1389 if (new_conf->peer_addr_len == odev->net_conf->peer_addr_len && 1390 !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len)) 1391 retcode = ERR_PEER_ADDR; 1392 1393 put_net_conf(odev); 1394 if (retcode != NO_ERROR) 1395 goto fail; 1396 } 1397 } 1398 1399 if (new_conf->cram_hmac_alg[0] != 0) { 1400 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", 1401 new_conf->cram_hmac_alg); 1402 tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC); 1403 if (IS_ERR(tfm)) { 1404 tfm = NULL; 1405 retcode = ERR_AUTH_ALG; 1406 goto fail; 1407 } 1408 1409 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) { 1410 retcode = ERR_AUTH_ALG_ND; 1411 goto fail; 1412 } 1413 } 1414 1415 if (new_conf->integrity_alg[0]) { 1416 integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC); 1417 if (IS_ERR(integrity_w_tfm)) { 1418 integrity_w_tfm = NULL; 1419 retcode=ERR_INTEGRITY_ALG; 1420 goto fail; 1421 } 1422 1423 if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) { 1424 retcode=ERR_INTEGRITY_ALG_ND; 1425 goto fail; 1426 } 1427 1428 integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC); 1429 if (IS_ERR(integrity_r_tfm)) { 1430 integrity_r_tfm = NULL; 1431 retcode=ERR_INTEGRITY_ALG; 1432 goto fail; 1433 } 1434 } 1435 1436 ns = new_conf->max_epoch_size/8; 1437 if (mdev->tl_hash_s != ns) { 1438 new_tl_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL); 1439 if (!new_tl_hash) { 1440 retcode = ERR_NOMEM; 1441 goto fail; 1442 } 1443 } 1444 1445 ns = new_conf->max_buffers/8; 1446 if (new_conf->two_primaries && (mdev->ee_hash_s != ns)) { 1447 new_ee_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL); 1448 if (!new_ee_hash) { 1449 retcode = ERR_NOMEM; 1450 goto fail; 1451 } 1452 } 1453 1454 ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0; 1455 1456 if (integrity_w_tfm) { 1457 i = crypto_hash_digestsize(integrity_w_tfm); 1458 int_dig_out = kmalloc(i, GFP_KERNEL); 1459 if (!int_dig_out) { 1460 retcode = ERR_NOMEM; 1461 goto fail; 1462 } 1463 int_dig_in = kmalloc(i, GFP_KERNEL); 1464 if (!int_dig_in) { 1465 retcode = ERR_NOMEM; 1466 goto fail; 1467 } 1468 int_dig_vv = kmalloc(i, GFP_KERNEL); 1469 if (!int_dig_vv) { 1470 retcode = ERR_NOMEM; 1471 goto fail; 1472 } 1473 } 1474 1475 if (!mdev->bitmap) { 1476 if(drbd_bm_init(mdev)) { 1477 retcode = ERR_NOMEM; 1478 goto fail; 1479 } 1480 } 1481 1482 drbd_flush_workqueue(mdev); 1483 spin_lock_irq(&mdev->req_lock); 1484 if (mdev->net_conf != NULL) { 1485 retcode = ERR_NET_CONFIGURED; 1486 spin_unlock_irq(&mdev->req_lock); 1487 goto fail; 1488 } 1489 mdev->net_conf = new_conf; 1490 1491 mdev->send_cnt = 0; 1492 mdev->recv_cnt = 0; 1493 1494 if (new_tl_hash) { 1495 kfree(mdev->tl_hash); 1496 mdev->tl_hash_s = mdev->net_conf->max_epoch_size/8; 1497 mdev->tl_hash = new_tl_hash; 1498 } 1499 1500 if (new_ee_hash) { 1501 kfree(mdev->ee_hash); 1502 mdev->ee_hash_s = mdev->net_conf->max_buffers/8; 1503 mdev->ee_hash = new_ee_hash; 1504 } 1505 1506 crypto_free_hash(mdev->cram_hmac_tfm); 1507 mdev->cram_hmac_tfm = tfm; 1508 1509 crypto_free_hash(mdev->integrity_w_tfm); 1510 mdev->integrity_w_tfm = integrity_w_tfm; 1511 1512 crypto_free_hash(mdev->integrity_r_tfm); 1513 mdev->integrity_r_tfm = integrity_r_tfm; 1514 1515 kfree(mdev->int_dig_out); 1516 kfree(mdev->int_dig_in); 1517 kfree(mdev->int_dig_vv); 1518 mdev->int_dig_out=int_dig_out; 1519 mdev->int_dig_in=int_dig_in; 1520 mdev->int_dig_vv=int_dig_vv; 1521 retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL); 1522 spin_unlock_irq(&mdev->req_lock); 1523 1524 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); 1525 reply->ret_code = retcode; 1526 drbd_reconfig_done(mdev); 1527 return 0; 1528 1529fail: 1530 kfree(int_dig_out); 1531 kfree(int_dig_in); 1532 kfree(int_dig_vv); 1533 crypto_free_hash(tfm); 1534 crypto_free_hash(integrity_w_tfm); 1535 crypto_free_hash(integrity_r_tfm); 1536 kfree(new_tl_hash); 1537 kfree(new_ee_hash); 1538 kfree(new_conf); 1539 1540 reply->ret_code = retcode; 1541 drbd_reconfig_done(mdev); 1542 return 0; 1543} 1544 1545static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1546 struct drbd_nl_cfg_reply *reply) 1547{ 1548 int retcode; 1549 struct disconnect dc; 1550 1551 memset(&dc, 0, sizeof(struct disconnect)); 1552 if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) { 1553 retcode = ERR_MANDATORY_TAG; 1554 goto fail; 1555 } 1556 1557 if (dc.force) { 1558 spin_lock_irq(&mdev->req_lock); 1559 if (mdev->state.conn >= C_WF_CONNECTION) 1560 _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL); 1561 spin_unlock_irq(&mdev->req_lock); 1562 goto done; 1563 } 1564 1565 retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED); 1566 1567 if (retcode == SS_NOTHING_TO_DO) 1568 goto done; 1569 else if (retcode == SS_ALREADY_STANDALONE) 1570 goto done; 1571 else if (retcode == SS_PRIMARY_NOP) { 1572 /* Our statche checking code wants to see the peer outdated. */ 1573 retcode = drbd_request_state(mdev, NS2(conn, C_DISCONNECTING, 1574 pdsk, D_OUTDATED)); 1575 } else if (retcode == SS_CW_FAILED_BY_PEER) { 1576 /* The peer probably wants to see us outdated. */ 1577 retcode = _drbd_request_state(mdev, NS2(conn, C_DISCONNECTING, 1578 disk, D_OUTDATED), 1579 CS_ORDERED); 1580 if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) { 1581 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 1582 retcode = SS_SUCCESS; 1583 } 1584 } 1585 1586 if (retcode < SS_SUCCESS) 1587 goto fail; 1588 1589 if (wait_event_interruptible(mdev->state_wait, 1590 mdev->state.conn != C_DISCONNECTING)) { 1591 /* Do not test for mdev->state.conn == C_STANDALONE, since 1592 someone else might connect us in the mean time! */ 1593 retcode = ERR_INTR; 1594 goto fail; 1595 } 1596 1597 done: 1598 retcode = NO_ERROR; 1599 fail: 1600 drbd_md_sync(mdev); 1601 reply->ret_code = retcode; 1602 return 0; 1603} 1604 1605void resync_after_online_grow(struct drbd_conf *mdev) 1606{ 1607 int iass; /* I am sync source */ 1608 1609 dev_info(DEV, "Resync of new storage after online grow\n"); 1610 if (mdev->state.role != mdev->state.peer) 1611 iass = (mdev->state.role == R_PRIMARY); 1612 else 1613 iass = test_bit(DISCARD_CONCURRENT, &mdev->flags); 1614 1615 if (iass) 1616 drbd_start_resync(mdev, C_SYNC_SOURCE); 1617 else 1618 _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE); 1619} 1620 1621static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1622 struct drbd_nl_cfg_reply *reply) 1623{ 1624 struct resize rs; 1625 int retcode = NO_ERROR; 1626 enum determine_dev_size dd; 1627 enum dds_flags ddsf; 1628 1629 memset(&rs, 0, sizeof(struct resize)); 1630 if (!resize_from_tags(mdev, nlp->tag_list, &rs)) { 1631 retcode = ERR_MANDATORY_TAG; 1632 goto fail; 1633 } 1634 1635 if (mdev->state.conn > C_CONNECTED) { 1636 retcode = ERR_RESIZE_RESYNC; 1637 goto fail; 1638 } 1639 1640 if (mdev->state.role == R_SECONDARY && 1641 mdev->state.peer == R_SECONDARY) { 1642 retcode = ERR_NO_PRIMARY; 1643 goto fail; 1644 } 1645 1646 if (!get_ldev(mdev)) { 1647 retcode = ERR_NO_DISK; 1648 goto fail; 1649 } 1650 1651 if (rs.no_resync && mdev->agreed_pro_version < 93) { 1652 retcode = ERR_NEED_APV_93; 1653 goto fail; 1654 } 1655 1656 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) 1657 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); 1658 1659 mdev->ldev->dc.disk_size = (sector_t)rs.resize_size; 1660 ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); 1661 dd = drbd_determin_dev_size(mdev, ddsf); 1662 drbd_md_sync(mdev); 1663 put_ldev(mdev); 1664 if (dd == dev_size_error) { 1665 retcode = ERR_NOMEM_BITMAP; 1666 goto fail; 1667 } 1668 1669 if (mdev->state.conn == C_CONNECTED) { 1670 if (dd == grew) 1671 set_bit(RESIZE_PENDING, &mdev->flags); 1672 1673 drbd_send_uuids(mdev); 1674 drbd_send_sizes(mdev, 1, ddsf); 1675 } 1676 1677 fail: 1678 reply->ret_code = retcode; 1679 return 0; 1680} 1681 1682static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1683 struct drbd_nl_cfg_reply *reply) 1684{ 1685 int retcode = NO_ERROR; 1686 int err; 1687 int ovr; /* online verify running */ 1688 int rsr; /* re-sync running */ 1689 struct crypto_hash *verify_tfm = NULL; 1690 struct crypto_hash *csums_tfm = NULL; 1691 struct syncer_conf sc; 1692 cpumask_var_t new_cpu_mask; 1693 int *rs_plan_s = NULL; 1694 int fifo_size; 1695 1696 if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { 1697 retcode = ERR_NOMEM; 1698 goto fail; 1699 } 1700 1701 if (nlp->flags & DRBD_NL_SET_DEFAULTS) { 1702 memset(&sc, 0, sizeof(struct syncer_conf)); 1703 sc.rate = DRBD_RATE_DEF; 1704 sc.after = DRBD_AFTER_DEF; 1705 sc.al_extents = DRBD_AL_EXTENTS_DEF; 1706 sc.on_no_data = DRBD_ON_NO_DATA_DEF; 1707 sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF; 1708 sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF; 1709 sc.c_fill_target = DRBD_C_FILL_TARGET_DEF; 1710 sc.c_max_rate = DRBD_C_MAX_RATE_DEF; 1711 sc.c_min_rate = DRBD_C_MIN_RATE_DEF; 1712 } else 1713 memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); 1714 1715 if (!syncer_conf_from_tags(mdev, nlp->tag_list, &sc)) { 1716 retcode = ERR_MANDATORY_TAG; 1717 goto fail; 1718 } 1719 1720 /* re-sync running */ 1721 rsr = ( mdev->state.conn == C_SYNC_SOURCE || 1722 mdev->state.conn == C_SYNC_TARGET || 1723 mdev->state.conn == C_PAUSED_SYNC_S || 1724 mdev->state.conn == C_PAUSED_SYNC_T ); 1725 1726 if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) { 1727 retcode = ERR_CSUMS_RESYNC_RUNNING; 1728 goto fail; 1729 } 1730 1731 if (!rsr && sc.csums_alg[0]) { 1732 csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC); 1733 if (IS_ERR(csums_tfm)) { 1734 csums_tfm = NULL; 1735 retcode = ERR_CSUMS_ALG; 1736 goto fail; 1737 } 1738 1739 if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) { 1740 retcode = ERR_CSUMS_ALG_ND; 1741 goto fail; 1742 } 1743 } 1744 1745 /* online verify running */ 1746 ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T); 1747 1748 if (ovr) { 1749 if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) { 1750 retcode = ERR_VERIFY_RUNNING; 1751 goto fail; 1752 } 1753 } 1754 1755 if (!ovr && sc.verify_alg[0]) { 1756 verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC); 1757 if (IS_ERR(verify_tfm)) { 1758 verify_tfm = NULL; 1759 retcode = ERR_VERIFY_ALG; 1760 goto fail; 1761 } 1762 1763 if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) { 1764 retcode = ERR_VERIFY_ALG_ND; 1765 goto fail; 1766 } 1767 } 1768 1769 /* silently ignore cpu mask on UP kernel */ 1770 if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) { 1771 err = __bitmap_parse(sc.cpu_mask, 32, 0, 1772 cpumask_bits(new_cpu_mask), nr_cpu_ids); 1773 if (err) { 1774 dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); 1775 retcode = ERR_CPU_MASK_PARSE; 1776 goto fail; 1777 } 1778 } 1779 1780 ERR_IF (sc.rate < 1) sc.rate = 1; 1781 ERR_IF (sc.al_extents < 7) sc.al_extents = 127; /* arbitrary minimum */ 1782#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT) 1783 if (sc.al_extents > AL_MAX) { 1784 dev_err(DEV, "sc.al_extents > %d\n", AL_MAX); 1785 sc.al_extents = AL_MAX; 1786 } 1787#undef AL_MAX 1788 1789 /* to avoid spurious errors when configuring minors before configuring 1790 * the minors they depend on: if necessary, first create the minor we 1791 * depend on */ 1792 if (sc.after >= 0) 1793 ensure_mdev(sc.after, 1); 1794 1795 /* most sanity checks done, try to assign the new sync-after 1796 * dependency. need to hold the global lock in there, 1797 * to avoid a race in the dependency loop check. */ 1798 retcode = drbd_alter_sa(mdev, sc.after); 1799 if (retcode != NO_ERROR) 1800 goto fail; 1801 1802 fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; 1803 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { 1804 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); 1805 if (!rs_plan_s) { 1806 dev_err(DEV, "kmalloc of fifo_buffer failed"); 1807 retcode = ERR_NOMEM; 1808 goto fail; 1809 } 1810 } 1811 1812 /* ok, assign the rest of it as well. 1813 * lock against receive_SyncParam() */ 1814 spin_lock(&mdev->peer_seq_lock); 1815 mdev->sync_conf = sc; 1816 1817 if (!rsr) { 1818 crypto_free_hash(mdev->csums_tfm); 1819 mdev->csums_tfm = csums_tfm; 1820 csums_tfm = NULL; 1821 } 1822 1823 if (!ovr) { 1824 crypto_free_hash(mdev->verify_tfm); 1825 mdev->verify_tfm = verify_tfm; 1826 verify_tfm = NULL; 1827 } 1828 1829 if (fifo_size != mdev->rs_plan_s.size) { 1830 kfree(mdev->rs_plan_s.values); 1831 mdev->rs_plan_s.values = rs_plan_s; 1832 mdev->rs_plan_s.size = fifo_size; 1833 mdev->rs_planed = 0; 1834 rs_plan_s = NULL; 1835 } 1836 1837 spin_unlock(&mdev->peer_seq_lock); 1838 1839 if (get_ldev(mdev)) { 1840 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); 1841 drbd_al_shrink(mdev); 1842 err = drbd_check_al_size(mdev); 1843 lc_unlock(mdev->act_log); 1844 wake_up(&mdev->al_wait); 1845 1846 put_ldev(mdev); 1847 drbd_md_sync(mdev); 1848 1849 if (err) { 1850 retcode = ERR_NOMEM; 1851 goto fail; 1852 } 1853 } 1854 1855 if (mdev->state.conn >= C_CONNECTED) 1856 drbd_send_sync_param(mdev, &sc); 1857 1858 if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) { 1859 cpumask_copy(mdev->cpu_mask, new_cpu_mask); 1860 drbd_calc_cpu_mask(mdev); 1861 mdev->receiver.reset_cpu_mask = 1; 1862 mdev->asender.reset_cpu_mask = 1; 1863 mdev->worker.reset_cpu_mask = 1; 1864 } 1865 1866 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); 1867fail: 1868 kfree(rs_plan_s); 1869 free_cpumask_var(new_cpu_mask); 1870 crypto_free_hash(csums_tfm); 1871 crypto_free_hash(verify_tfm); 1872 reply->ret_code = retcode; 1873 return 0; 1874} 1875 1876static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1877 struct drbd_nl_cfg_reply *reply) 1878{ 1879 int retcode; 1880 1881 /* If there is still bitmap IO pending, probably because of a previous 1882 * resync just being finished, wait for it before requesting a new resync. */ 1883 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); 1884 1885 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); 1886 1887 if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION) 1888 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); 1889 1890 while (retcode == SS_NEED_CONNECTION) { 1891 spin_lock_irq(&mdev->req_lock); 1892 if (mdev->state.conn < C_CONNECTED) 1893 retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL); 1894 spin_unlock_irq(&mdev->req_lock); 1895 1896 if (retcode != SS_NEED_CONNECTION) 1897 break; 1898 1899 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); 1900 } 1901 1902 reply->ret_code = retcode; 1903 return 0; 1904} 1905 1906static int drbd_bmio_set_susp_al(struct drbd_conf *mdev) 1907{ 1908 int rv; 1909 1910 rv = drbd_bmio_set_n_write(mdev); 1911 drbd_suspend_al(mdev); 1912 return rv; 1913} 1914 1915static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1916 struct drbd_nl_cfg_reply *reply) 1917{ 1918 int retcode; 1919 1920 /* If there is still bitmap IO pending, probably because of a previous 1921 * resync just being finished, wait for it before requesting a new resync. */ 1922 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); 1923 1924 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); 1925 1926 if (retcode < SS_SUCCESS) { 1927 if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) { 1928 /* The peer will get a resync upon connect anyways. Just make that 1929 into a full resync. */ 1930 retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); 1931 if (retcode >= SS_SUCCESS) { 1932 if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, 1933 "set_n_write from invalidate_peer", 1934 BM_LOCKED_SET_ALLOWED)) 1935 retcode = ERR_IO_MD_DISK; 1936 } 1937 } else 1938 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); 1939 } 1940 1941 reply->ret_code = retcode; 1942 return 0; 1943} 1944 1945static int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1946 struct drbd_nl_cfg_reply *reply) 1947{ 1948 int retcode = NO_ERROR; 1949 1950 if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO) 1951 retcode = ERR_PAUSE_IS_SET; 1952 1953 reply->ret_code = retcode; 1954 return 0; 1955} 1956 1957static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1958 struct drbd_nl_cfg_reply *reply) 1959{ 1960 int retcode = NO_ERROR; 1961 union drbd_state s; 1962 1963 if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { 1964 s = mdev->state; 1965 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) { 1966 retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP : 1967 s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR; 1968 } else { 1969 retcode = ERR_PAUSE_IS_CLEAR; 1970 } 1971 } 1972 1973 reply->ret_code = retcode; 1974 return 0; 1975} 1976 1977static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1978 struct drbd_nl_cfg_reply *reply) 1979{ 1980 reply->ret_code = drbd_request_state(mdev, NS(susp, 1)); 1981 1982 return 0; 1983} 1984 1985static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1986 struct drbd_nl_cfg_reply *reply) 1987{ 1988 if (test_bit(NEW_CUR_UUID, &mdev->flags)) { 1989 drbd_uuid_new_current(mdev); 1990 clear_bit(NEW_CUR_UUID, &mdev->flags); 1991 } 1992 drbd_suspend_io(mdev); 1993 reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); 1994 if (reply->ret_code == SS_SUCCESS) { 1995 if (mdev->state.conn < C_CONNECTED) 1996 tl_clear(mdev); 1997 if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED) 1998 tl_restart(mdev, fail_frozen_disk_io); 1999 } 2000 drbd_resume_io(mdev); 2001 2002 return 0; 2003} 2004 2005static int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2006 struct drbd_nl_cfg_reply *reply) 2007{ 2008 reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED)); 2009 return 0; 2010} 2011 2012static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2013 struct drbd_nl_cfg_reply *reply) 2014{ 2015 unsigned short *tl; 2016 2017 tl = reply->tag_list; 2018 2019 if (get_ldev(mdev)) { 2020 tl = disk_conf_to_tags(mdev, &mdev->ldev->dc, tl); 2021 put_ldev(mdev); 2022 } 2023 2024 if (get_net_conf(mdev)) { 2025 tl = net_conf_to_tags(mdev, mdev->net_conf, tl); 2026 put_net_conf(mdev); 2027 } 2028 tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl); 2029 2030 put_unaligned(TT_END, tl++); /* Close the tag list */ 2031 2032 return (int)((char *)tl - (char *)reply->tag_list); 2033} 2034 2035static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2036 struct drbd_nl_cfg_reply *reply) 2037{ 2038 unsigned short *tl = reply->tag_list; 2039 union drbd_state s = mdev->state; 2040 unsigned long rs_left; 2041 unsigned int res; 2042 2043 tl = get_state_to_tags(mdev, (struct get_state *)&s, tl); 2044 2045 /* no local ref, no bitmap, no syncer progress. */ 2046 if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) { 2047 if (get_ldev(mdev)) { 2048 drbd_get_syncer_progress(mdev, &rs_left, &res); 2049 tl = tl_add_int(tl, T_sync_progress, &res); 2050 put_ldev(mdev); 2051 } 2052 } 2053 put_unaligned(TT_END, tl++); /* Close the tag list */ 2054 2055 return (int)((char *)tl - (char *)reply->tag_list); 2056} 2057 2058static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2059 struct drbd_nl_cfg_reply *reply) 2060{ 2061 unsigned short *tl; 2062 2063 tl = reply->tag_list; 2064 2065 if (get_ldev(mdev)) { 2066 tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64)); 2067 tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags); 2068 put_ldev(mdev); 2069 } 2070 put_unaligned(TT_END, tl++); /* Close the tag list */ 2071 2072 return (int)((char *)tl - (char *)reply->tag_list); 2073} 2074 2075/** 2076 * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use 2077 * @mdev: DRBD device. 2078 * @nlp: Netlink/connector packet from drbdsetup 2079 * @reply: Reply packet for drbdsetup 2080 */ 2081static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2082 struct drbd_nl_cfg_reply *reply) 2083{ 2084 unsigned short *tl; 2085 char rv; 2086 2087 tl = reply->tag_list; 2088 2089 rv = mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : 2090 test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT; 2091 2092 tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv)); 2093 put_unaligned(TT_END, tl++); /* Close the tag list */ 2094 2095 return (int)((char *)tl - (char *)reply->tag_list); 2096} 2097 2098static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2099 struct drbd_nl_cfg_reply *reply) 2100{ 2101 /* default to resume from last known position, if possible */ 2102 struct start_ov args = 2103 { .start_sector = mdev->ov_start_sector }; 2104 2105 if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) { 2106 reply->ret_code = ERR_MANDATORY_TAG; 2107 return 0; 2108 } 2109 2110 /* If there is still bitmap IO pending, e.g. previous resync or verify 2111 * just being finished, wait for it before requesting a new resync. */ 2112 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); 2113 2114 /* w_make_ov_request expects position to be aligned */ 2115 mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; 2116 reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); 2117 return 0; 2118} 2119 2120 2121static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2122 struct drbd_nl_cfg_reply *reply) 2123{ 2124 int retcode = NO_ERROR; 2125 int skip_initial_sync = 0; 2126 int err; 2127 2128 struct new_c_uuid args; 2129 2130 memset(&args, 0, sizeof(struct new_c_uuid)); 2131 if (!new_c_uuid_from_tags(mdev, nlp->tag_list, &args)) { 2132 reply->ret_code = ERR_MANDATORY_TAG; 2133 return 0; 2134 } 2135 2136 mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */ 2137 2138 if (!get_ldev(mdev)) { 2139 retcode = ERR_NO_DISK; 2140 goto out; 2141 } 2142 2143 /* this is "skip initial sync", assume to be clean */ 2144 if (mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 && 2145 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) { 2146 dev_info(DEV, "Preparing to skip initial sync\n"); 2147 skip_initial_sync = 1; 2148 } else if (mdev->state.conn != C_STANDALONE) { 2149 retcode = ERR_CONNECTED; 2150 goto out_dec; 2151 } 2152 2153 drbd_uuid_set(mdev, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */ 2154 drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */ 2155 2156 if (args.clear_bm) { 2157 err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, 2158 "clear_n_write from new_c_uuid", BM_LOCKED_MASK); 2159 if (err) { 2160 dev_err(DEV, "Writing bitmap failed with %d\n",err); 2161 retcode = ERR_IO_MD_DISK; 2162 } 2163 if (skip_initial_sync) { 2164 drbd_send_uuids_skip_initial_sync(mdev); 2165 _drbd_uuid_set(mdev, UI_BITMAP, 0); 2166 drbd_print_uuids(mdev, "cleared bitmap UUID"); 2167 spin_lock_irq(&mdev->req_lock); 2168 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), 2169 CS_VERBOSE, NULL); 2170 spin_unlock_irq(&mdev->req_lock); 2171 } 2172 } 2173 2174 drbd_md_sync(mdev); 2175out_dec: 2176 put_ldev(mdev); 2177out: 2178 mutex_unlock(&mdev->state_mutex); 2179 2180 reply->ret_code = retcode; 2181 return 0; 2182} 2183 2184struct cn_handler_struct { 2185 int (*function)(struct drbd_conf *, 2186 struct drbd_nl_cfg_req *, 2187 struct drbd_nl_cfg_reply *); 2188 int reply_body_size; 2189}; 2190 2191static struct cn_handler_struct cnd_table[] = { 2192 [ P_primary ] = { &drbd_nl_primary, 0 }, 2193 [ P_secondary ] = { &drbd_nl_secondary, 0 }, 2194 [ P_disk_conf ] = { &drbd_nl_disk_conf, 0 }, 2195 [ P_detach ] = { &drbd_nl_detach, 0 }, 2196 [ P_net_conf ] = { &drbd_nl_net_conf, 0 }, 2197 [ P_disconnect ] = { &drbd_nl_disconnect, 0 }, 2198 [ P_resize ] = { &drbd_nl_resize, 0 }, 2199 [ P_syncer_conf ] = { &drbd_nl_syncer_conf, 0 }, 2200 [ P_invalidate ] = { &drbd_nl_invalidate, 0 }, 2201 [ P_invalidate_peer ] = { &drbd_nl_invalidate_peer, 0 }, 2202 [ P_pause_sync ] = { &drbd_nl_pause_sync, 0 }, 2203 [ P_resume_sync ] = { &drbd_nl_resume_sync, 0 }, 2204 [ P_suspend_io ] = { &drbd_nl_suspend_io, 0 }, 2205 [ P_resume_io ] = { &drbd_nl_resume_io, 0 }, 2206 [ P_outdate ] = { &drbd_nl_outdate, 0 }, 2207 [ P_get_config ] = { &drbd_nl_get_config, 2208 sizeof(struct syncer_conf_tag_len_struct) + 2209 sizeof(struct disk_conf_tag_len_struct) + 2210 sizeof(struct net_conf_tag_len_struct) }, 2211 [ P_get_state ] = { &drbd_nl_get_state, 2212 sizeof(struct get_state_tag_len_struct) + 2213 sizeof(struct sync_progress_tag_len_struct) }, 2214 [ P_get_uuids ] = { &drbd_nl_get_uuids, 2215 sizeof(struct get_uuids_tag_len_struct) }, 2216 [ P_get_timeout_flag ] = { &drbd_nl_get_timeout_flag, 2217 sizeof(struct get_timeout_flag_tag_len_struct)}, 2218 [ P_start_ov ] = { &drbd_nl_start_ov, 0 }, 2219 [ P_new_c_uuid ] = { &drbd_nl_new_c_uuid, 0 }, 2220}; 2221 2222static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) 2223{ 2224 struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data; 2225 struct cn_handler_struct *cm; 2226 struct cn_msg *cn_reply; 2227 struct drbd_nl_cfg_reply *reply; 2228 struct drbd_conf *mdev; 2229 int retcode, rr; 2230 int reply_size = sizeof(struct cn_msg) 2231 + sizeof(struct drbd_nl_cfg_reply) 2232 + sizeof(short int); 2233 2234 if (!try_module_get(THIS_MODULE)) { 2235 printk(KERN_ERR "drbd: try_module_get() failed!\n"); 2236 return; 2237 } 2238 2239 if (!cap_raised(current_cap(), CAP_SYS_ADMIN)) { 2240 retcode = ERR_PERM; 2241 goto fail; 2242 } 2243 2244 mdev = ensure_mdev(nlp->drbd_minor, 2245 (nlp->flags & DRBD_NL_CREATE_DEVICE)); 2246 if (!mdev) { 2247 retcode = ERR_MINOR_INVALID; 2248 goto fail; 2249 } 2250 2251 if (nlp->packet_type >= P_nl_after_last_packet || 2252 nlp->packet_type == P_return_code_only) { 2253 retcode = ERR_PACKET_NR; 2254 goto fail; 2255 } 2256 2257 cm = cnd_table + nlp->packet_type; 2258 2259 /* This may happen if packet number is 0: */ 2260 if (cm->function == NULL) { 2261 retcode = ERR_PACKET_NR; 2262 goto fail; 2263 } 2264 2265 reply_size += cm->reply_body_size; 2266 2267 /* allocation not in the IO path, cqueue thread context */ 2268 cn_reply = kzalloc(reply_size, GFP_KERNEL); 2269 if (!cn_reply) { 2270 retcode = ERR_NOMEM; 2271 goto fail; 2272 } 2273 reply = (struct drbd_nl_cfg_reply *) cn_reply->data; 2274 2275 reply->packet_type = 2276 cm->reply_body_size ? nlp->packet_type : P_return_code_only; 2277 reply->minor = nlp->drbd_minor; 2278 reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ 2279 /* reply->tag_list; might be modified by cm->function. */ 2280 2281 rr = cm->function(mdev, nlp, reply); 2282 2283 cn_reply->id = req->id; 2284 cn_reply->seq = req->seq; 2285 cn_reply->ack = req->ack + 1; 2286 cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr; 2287 cn_reply->flags = 0; 2288 2289 rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); 2290 if (rr && rr != -ESRCH) 2291 printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); 2292 2293 kfree(cn_reply); 2294 module_put(THIS_MODULE); 2295 return; 2296 fail: 2297 drbd_nl_send_reply(req, retcode); 2298 module_put(THIS_MODULE); 2299} 2300 2301static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */ 2302 2303static unsigned short * 2304__tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, 2305 unsigned short len, int nul_terminated) 2306{ 2307 unsigned short l = tag_descriptions[tag_number(tag)].max_len; 2308 len = (len < l) ? len : l; 2309 put_unaligned(tag, tl++); 2310 put_unaligned(len, tl++); 2311 memcpy(tl, data, len); 2312 tl = (unsigned short*)((char*)tl + len); 2313 if (nul_terminated) 2314 *((char*)tl - 1) = 0; 2315 return tl; 2316} 2317 2318static unsigned short * 2319tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len) 2320{ 2321 return __tl_add_blob(tl, tag, data, len, 0); 2322} 2323 2324static unsigned short * 2325tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str) 2326{ 2327 return __tl_add_blob(tl, tag, str, strlen(str)+1, 0); 2328} 2329 2330static unsigned short * 2331tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val) 2332{ 2333 put_unaligned(tag, tl++); 2334 switch(tag_type(tag)) { 2335 case TT_INTEGER: 2336 put_unaligned(sizeof(int), tl++); 2337 put_unaligned(*(int *)val, (int *)tl); 2338 tl = (unsigned short*)((char*)tl+sizeof(int)); 2339 break; 2340 case TT_INT64: 2341 put_unaligned(sizeof(u64), tl++); 2342 put_unaligned(*(u64 *)val, (u64 *)tl); 2343 tl = (unsigned short*)((char*)tl+sizeof(u64)); 2344 break; 2345 default: 2346 /* someone did something stupid. */ 2347 ; 2348 } 2349 return tl; 2350} 2351 2352void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) 2353{ 2354 char buffer[sizeof(struct cn_msg)+ 2355 sizeof(struct drbd_nl_cfg_reply)+ 2356 sizeof(struct get_state_tag_len_struct)+ 2357 sizeof(short int)]; 2358 struct cn_msg *cn_reply = (struct cn_msg *) buffer; 2359 struct drbd_nl_cfg_reply *reply = 2360 (struct drbd_nl_cfg_reply *)cn_reply->data; 2361 unsigned short *tl = reply->tag_list; 2362 2363 /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ 2364 2365 tl = get_state_to_tags(mdev, (struct get_state *)&state, tl); 2366 2367 put_unaligned(TT_END, tl++); /* Close the tag list */ 2368 2369 cn_reply->id.idx = CN_IDX_DRBD; 2370 cn_reply->id.val = CN_VAL_DRBD; 2371 2372 cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); 2373 cn_reply->ack = 0; /* not used here. */ 2374 cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + 2375 (int)((char *)tl - (char *)reply->tag_list); 2376 cn_reply->flags = 0; 2377 2378 reply->packet_type = P_get_state; 2379 reply->minor = mdev_to_minor(mdev); 2380 reply->ret_code = NO_ERROR; 2381 2382 cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); 2383} 2384 2385void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) 2386{ 2387 char buffer[sizeof(struct cn_msg)+ 2388 sizeof(struct drbd_nl_cfg_reply)+ 2389 sizeof(struct call_helper_tag_len_struct)+ 2390 sizeof(short int)]; 2391 struct cn_msg *cn_reply = (struct cn_msg *) buffer; 2392 struct drbd_nl_cfg_reply *reply = 2393 (struct drbd_nl_cfg_reply *)cn_reply->data; 2394 unsigned short *tl = reply->tag_list; 2395 2396 /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ 2397 2398 tl = tl_add_str(tl, T_helper, helper_name); 2399 put_unaligned(TT_END, tl++); /* Close the tag list */ 2400 2401 cn_reply->id.idx = CN_IDX_DRBD; 2402 cn_reply->id.val = CN_VAL_DRBD; 2403 2404 cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); 2405 cn_reply->ack = 0; /* not used here. */ 2406 cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + 2407 (int)((char *)tl - (char *)reply->tag_list); 2408 cn_reply->flags = 0; 2409 2410 reply->packet_type = P_call_helper; 2411 reply->minor = mdev_to_minor(mdev); 2412 reply->ret_code = NO_ERROR; 2413 2414 cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); 2415} 2416 2417void drbd_bcast_ee(struct drbd_conf *mdev, 2418 const char *reason, const int dgs, 2419 const char* seen_hash, const char* calc_hash, 2420 const struct drbd_epoch_entry* e) 2421{ 2422 struct cn_msg *cn_reply; 2423 struct drbd_nl_cfg_reply *reply; 2424 unsigned short *tl; 2425 struct page *page; 2426 unsigned len; 2427 2428 if (!e) 2429 return; 2430 if (!reason || !reason[0]) 2431 return; 2432 2433 /* apparently we have to memcpy twice, first to prepare the data for the 2434 * struct cn_msg, then within cn_netlink_send from the cn_msg to the 2435 * netlink skb. */ 2436 /* receiver thread context, which is not in the writeout path (of this node), 2437 * but may be in the writeout path of the _other_ node. 2438 * GFP_NOIO to avoid potential "distributed deadlock". */ 2439 cn_reply = kzalloc( 2440 sizeof(struct cn_msg)+ 2441 sizeof(struct drbd_nl_cfg_reply)+ 2442 sizeof(struct dump_ee_tag_len_struct)+ 2443 sizeof(short int), 2444 GFP_NOIO); 2445 2446 if (!cn_reply) { 2447 dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n", 2448 (unsigned long long)e->sector, e->size); 2449 return; 2450 } 2451 2452 reply = (struct drbd_nl_cfg_reply*)cn_reply->data; 2453 tl = reply->tag_list; 2454 2455 tl = tl_add_str(tl, T_dump_ee_reason, reason); 2456 tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs); 2457 tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs); 2458 tl = tl_add_int(tl, T_ee_sector, &e->sector); 2459 tl = tl_add_int(tl, T_ee_block_id, &e->block_id); 2460 2461 /* dump the first 32k */ 2462 len = min_t(unsigned, e->size, 32 << 10); 2463 put_unaligned(T_ee_data, tl++); 2464 put_unaligned(len, tl++); 2465 2466 page = e->pages; 2467 page_chain_for_each(page) { 2468 void *d = kmap_atomic(page, KM_USER0); 2469 unsigned l = min_t(unsigned, len, PAGE_SIZE); 2470 memcpy(tl, d, l); 2471 kunmap_atomic(d, KM_USER0); 2472 tl = (unsigned short*)((char*)tl + l); 2473 len -= l; 2474 if (len == 0) 2475 break; 2476 } 2477 put_unaligned(TT_END, tl++); /* Close the tag list */ 2478 2479 cn_reply->id.idx = CN_IDX_DRBD; 2480 cn_reply->id.val = CN_VAL_DRBD; 2481 2482 cn_reply->seq = atomic_add_return(1,&drbd_nl_seq); 2483 cn_reply->ack = 0; // not used here. 2484 cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + 2485 (int)((char*)tl - (char*)reply->tag_list); 2486 cn_reply->flags = 0; 2487 2488 reply->packet_type = P_dump_ee; 2489 reply->minor = mdev_to_minor(mdev); 2490 reply->ret_code = NO_ERROR; 2491 2492 cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); 2493 kfree(cn_reply); 2494} 2495 2496void drbd_bcast_sync_progress(struct drbd_conf *mdev) 2497{ 2498 char buffer[sizeof(struct cn_msg)+ 2499 sizeof(struct drbd_nl_cfg_reply)+ 2500 sizeof(struct sync_progress_tag_len_struct)+ 2501 sizeof(short int)]; 2502 struct cn_msg *cn_reply = (struct cn_msg *) buffer; 2503 struct drbd_nl_cfg_reply *reply = 2504 (struct drbd_nl_cfg_reply *)cn_reply->data; 2505 unsigned short *tl = reply->tag_list; 2506 unsigned long rs_left; 2507 unsigned int res; 2508 2509 /* no local ref, no bitmap, no syncer progress, no broadcast. */ 2510 if (!get_ldev(mdev)) 2511 return; 2512 drbd_get_syncer_progress(mdev, &rs_left, &res); 2513 put_ldev(mdev); 2514 2515 tl = tl_add_int(tl, T_sync_progress, &res); 2516 put_unaligned(TT_END, tl++); /* Close the tag list */ 2517 2518 cn_reply->id.idx = CN_IDX_DRBD; 2519 cn_reply->id.val = CN_VAL_DRBD; 2520 2521 cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); 2522 cn_reply->ack = 0; /* not used here. */ 2523 cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + 2524 (int)((char *)tl - (char *)reply->tag_list); 2525 cn_reply->flags = 0; 2526 2527 reply->packet_type = P_sync_progress; 2528 reply->minor = mdev_to_minor(mdev); 2529 reply->ret_code = NO_ERROR; 2530 2531 cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); 2532} 2533 2534int __init drbd_nl_init(void) 2535{ 2536 static struct cb_id cn_id_drbd; 2537 int err, try=10; 2538 2539 cn_id_drbd.val = CN_VAL_DRBD; 2540 do { 2541 cn_id_drbd.idx = cn_idx; 2542 err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback); 2543 if (!err) 2544 break; 2545 cn_idx = (cn_idx + CN_IDX_STEP); 2546 } while (try--); 2547 2548 if (err) { 2549 printk(KERN_ERR "drbd: cn_drbd failed to register\n"); 2550 return err; 2551 } 2552 2553 return 0; 2554} 2555 2556void drbd_nl_cleanup(void) 2557{ 2558 static struct cb_id cn_id_drbd; 2559 2560 cn_id_drbd.idx = cn_idx; 2561 cn_id_drbd.val = CN_VAL_DRBD; 2562 2563 cn_del_callback(&cn_id_drbd); 2564} 2565 2566void drbd_nl_send_reply(struct cn_msg *req, int ret_code) 2567{ 2568 char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)]; 2569 struct cn_msg *cn_reply = (struct cn_msg *) buffer; 2570 struct drbd_nl_cfg_reply *reply = 2571 (struct drbd_nl_cfg_reply *)cn_reply->data; 2572 int rr; 2573 2574 memset(buffer, 0, sizeof(buffer)); 2575 cn_reply->id = req->id; 2576 2577 cn_reply->seq = req->seq; 2578 cn_reply->ack = req->ack + 1; 2579 cn_reply->len = sizeof(struct drbd_nl_cfg_reply); 2580 cn_reply->flags = 0; 2581 2582 reply->packet_type = P_return_code_only; 2583 reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; 2584 reply->ret_code = ret_code; 2585 2586 rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); 2587 if (rr && rr != -ESRCH) 2588 printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); 2589} 2590 2591