file.c revision 7bc3dfa37ba6f6ea81c362eb1993bd20c0828eae
1/* 2 * GPL HEADER START 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 only, 8 * as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License version 2 for more details (a copy is included 14 * in the LICENSE file that accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License 17 * version 2 along with this program; If not, see 18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf 19 * 20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 21 * CA 95054 USA or visit www.sun.com if you need additional information or 22 * have any questions. 23 * 24 * GPL HEADER END 25 */ 26/* 27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 28 * Use is subject to license terms. 29 * 30 * Copyright (c) 2011, 2012, Intel Corporation. 31 */ 32/* 33 * This file is part of Lustre, http://www.lustre.org/ 34 * Lustre is a trademark of Sun Microsystems, Inc. 35 * 36 * lustre/llite/file.c 37 * 38 * Author: Peter Braam <braam@clusterfs.com> 39 * Author: Phil Schwan <phil@clusterfs.com> 40 * Author: Andreas Dilger <adilger@clusterfs.com> 41 */ 42 43#define DEBUG_SUBSYSTEM S_LLITE 44#include <lustre_dlm.h> 45#include <lustre_lite.h> 46#include <linux/pagemap.h> 47#include <linux/file.h> 48#include "llite_internal.h" 49#include <lustre/ll_fiemap.h> 50 51#include "cl_object.h" 52 53static int 54ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg); 55 56static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, 57 bool *lease_broken); 58 59static enum llioc_iter 60ll_iocontrol_call(struct inode *inode, struct file *file, 61 unsigned int cmd, unsigned long arg, int *rcp); 62 63static struct ll_file_data *ll_file_data_get(void) 64{ 65 struct ll_file_data *fd; 66 67 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS); 68 if (fd == NULL) 69 return NULL; 70 fd->fd_write_failed = false; 71 return fd; 72} 73 74static void ll_file_data_put(struct ll_file_data *fd) 75{ 76 if (fd != NULL) 77 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab); 78} 79 80void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, 81 struct lustre_handle *fh) 82{ 83 op_data->op_fid1 = ll_i2info(inode)->lli_fid; 84 op_data->op_attr.ia_mode = inode->i_mode; 85 op_data->op_attr.ia_atime = inode->i_atime; 86 op_data->op_attr.ia_mtime = inode->i_mtime; 87 op_data->op_attr.ia_ctime = inode->i_ctime; 88 op_data->op_attr.ia_size = i_size_read(inode); 89 op_data->op_attr_blocks = inode->i_blocks; 90 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = 91 ll_inode_to_ext_flags(inode->i_flags); 92 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch; 93 if (fh) 94 op_data->op_handle = *fh; 95 op_data->op_capa1 = ll_mdscapa_get(inode); 96 97 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags) 98 op_data->op_bias |= MDS_DATA_MODIFIED; 99} 100 101/** 102 * Closes the IO epoch and packs all the attributes into @op_data for 103 * the CLOSE rpc. 104 */ 105static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data, 106 struct obd_client_handle *och) 107{ 108 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET | 109 ATTR_MTIME | ATTR_MTIME_SET | 110 ATTR_CTIME | ATTR_CTIME_SET; 111 112 if (!(och->och_flags & FMODE_WRITE)) 113 goto out; 114 115 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode)) 116 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; 117 else 118 ll_ioepoch_close(inode, op_data, &och, 0); 119 120out: 121 ll_pack_inode2opdata(inode, op_data, &och->och_fh); 122 ll_prep_md_op_data(op_data, inode, NULL, NULL, 123 0, 0, LUSTRE_OPC_ANY, NULL); 124} 125 126static int ll_close_inode_openhandle(struct obd_export *md_exp, 127 struct inode *inode, 128 struct obd_client_handle *och, 129 const __u64 *data_version) 130{ 131 struct obd_export *exp = ll_i2mdexp(inode); 132 struct md_op_data *op_data; 133 struct ptlrpc_request *req = NULL; 134 struct obd_device *obd = class_exp2obd(exp); 135 int epoch_close = 1; 136 int rc; 137 138 if (obd == NULL) { 139 /* 140 * XXX: in case of LMV, is this correct to access 141 * ->exp_handle? 142 */ 143 CERROR("Invalid MDC connection handle "LPX64"\n", 144 ll_i2mdexp(inode)->exp_handle.h_cookie); 145 GOTO(out, rc = 0); 146 } 147 148 OBD_ALLOC_PTR(op_data); 149 if (op_data == NULL) 150 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here. 151 152 ll_prepare_close(inode, op_data, och); 153 if (data_version != NULL) { 154 /* Pass in data_version implies release. */ 155 op_data->op_bias |= MDS_HSM_RELEASE; 156 op_data->op_data_version = *data_version; 157 op_data->op_lease_handle = och->och_lease_handle; 158 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; 159 } 160 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE); 161 rc = md_close(md_exp, op_data, och->och_mod, &req); 162 if (rc == -EAGAIN) { 163 /* This close must have the epoch closed. */ 164 LASSERT(epoch_close); 165 /* MDS has instructed us to obtain Size-on-MDS attribute from 166 * OSTs and send setattr to back to MDS. */ 167 rc = ll_som_update(inode, op_data); 168 if (rc) { 169 CERROR("inode %lu mdc Size-on-MDS update failed: " 170 "rc = %d\n", inode->i_ino, rc); 171 rc = 0; 172 } 173 } else if (rc) { 174 CERROR("inode %lu mdc close failed: rc = %d\n", 175 inode->i_ino, rc); 176 } 177 178 /* DATA_MODIFIED flag was successfully sent on close, cancel data 179 * modification flag. */ 180 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) { 181 struct ll_inode_info *lli = ll_i2info(inode); 182 183 spin_lock(&lli->lli_lock); 184 lli->lli_flags &= ~LLIF_DATA_MODIFIED; 185 spin_unlock(&lli->lli_lock); 186 } 187 188 if (rc == 0) { 189 rc = ll_objects_destroy(req, inode); 190 if (rc) 191 CERROR("inode %lu ll_objects destroy: rc = %d\n", 192 inode->i_ino, rc); 193 } 194 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) { 195 struct mdt_body *body; 196 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 197 if (!(body->valid & OBD_MD_FLRELEASED)) 198 rc = -EBUSY; 199 } 200 201 ll_finish_md_op_data(op_data); 202 203out: 204 if (exp_connect_som(exp) && !epoch_close && 205 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) { 206 ll_queue_done_writing(inode, LLIF_DONE_WRITING); 207 } else { 208 md_clear_open_replay_data(md_exp, och); 209 /* Free @och if it is not waiting for DONE_WRITING. */ 210 och->och_fh.cookie = DEAD_HANDLE_MAGIC; 211 OBD_FREE_PTR(och); 212 } 213 if (req) /* This is close request */ 214 ptlrpc_req_finished(req); 215 return rc; 216} 217 218int ll_md_real_close(struct inode *inode, fmode_t fmode) 219{ 220 struct ll_inode_info *lli = ll_i2info(inode); 221 struct obd_client_handle **och_p; 222 struct obd_client_handle *och; 223 __u64 *och_usecount; 224 int rc = 0; 225 226 if (fmode & FMODE_WRITE) { 227 och_p = &lli->lli_mds_write_och; 228 och_usecount = &lli->lli_open_fd_write_count; 229 } else if (fmode & FMODE_EXEC) { 230 och_p = &lli->lli_mds_exec_och; 231 och_usecount = &lli->lli_open_fd_exec_count; 232 } else { 233 LASSERT(fmode & FMODE_READ); 234 och_p = &lli->lli_mds_read_och; 235 och_usecount = &lli->lli_open_fd_read_count; 236 } 237 238 mutex_lock(&lli->lli_och_mutex); 239 if (*och_usecount > 0) { 240 /* There are still users of this handle, so skip 241 * freeing it. */ 242 mutex_unlock(&lli->lli_och_mutex); 243 return 0; 244 } 245 246 och=*och_p; 247 *och_p = NULL; 248 mutex_unlock(&lli->lli_och_mutex); 249 250 if (och != NULL) { 251 /* There might be a race and this handle may already 252 be closed. */ 253 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, 254 inode, och, NULL); 255 } 256 257 return rc; 258} 259 260static int ll_md_close(struct obd_export *md_exp, struct inode *inode, 261 struct file *file) 262{ 263 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 264 struct ll_inode_info *lli = ll_i2info(inode); 265 int rc = 0; 266 267 /* clear group lock, if present */ 268 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) 269 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid); 270 271 if (fd->fd_lease_och != NULL) { 272 bool lease_broken; 273 274 /* Usually the lease is not released when the 275 * application crashed, we need to release here. */ 276 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken); 277 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n", 278 PFID(&lli->lli_fid), rc, lease_broken); 279 280 fd->fd_lease_och = NULL; 281 } 282 283 if (fd->fd_och != NULL) { 284 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL); 285 fd->fd_och = NULL; 286 GOTO(out, rc); 287 } 288 289 /* Let's see if we have good enough OPEN lock on the file and if 290 we can skip talking to MDS */ 291 if (file->f_dentry->d_inode) { /* Can this ever be false? */ 292 int lockmode; 293 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK; 294 struct lustre_handle lockh; 295 struct inode *inode = file->f_dentry->d_inode; 296 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}}; 297 298 mutex_lock(&lli->lli_och_mutex); 299 if (fd->fd_omode & FMODE_WRITE) { 300 lockmode = LCK_CW; 301 LASSERT(lli->lli_open_fd_write_count); 302 lli->lli_open_fd_write_count--; 303 } else if (fd->fd_omode & FMODE_EXEC) { 304 lockmode = LCK_PR; 305 LASSERT(lli->lli_open_fd_exec_count); 306 lli->lli_open_fd_exec_count--; 307 } else { 308 lockmode = LCK_CR; 309 LASSERT(lli->lli_open_fd_read_count); 310 lli->lli_open_fd_read_count--; 311 } 312 mutex_unlock(&lli->lli_och_mutex); 313 314 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode), 315 LDLM_IBITS, &policy, lockmode, 316 &lockh)) { 317 rc = ll_md_real_close(file->f_dentry->d_inode, 318 fd->fd_omode); 319 } 320 } else { 321 CERROR("Releasing a file %p with negative dentry %p. Name %s", 322 file, file->f_dentry, file->f_dentry->d_name.name); 323 } 324 325out: 326 LUSTRE_FPRIVATE(file) = NULL; 327 ll_file_data_put(fd); 328 ll_capa_close(inode); 329 330 return rc; 331} 332 333/* While this returns an error code, fput() the caller does not, so we need 334 * to make every effort to clean up all of our state here. Also, applications 335 * rarely check close errors and even if an error is returned they will not 336 * re-try the close call. 337 */ 338int ll_file_release(struct inode *inode, struct file *file) 339{ 340 struct ll_file_data *fd; 341 struct ll_sb_info *sbi = ll_i2sbi(inode); 342 struct ll_inode_info *lli = ll_i2info(inode); 343 int rc; 344 345 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, 346 inode->i_generation, inode); 347 348#ifdef CONFIG_FS_POSIX_ACL 349 if (sbi->ll_flags & LL_SBI_RMT_CLIENT && 350 inode == inode->i_sb->s_root->d_inode) { 351 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 352 353 LASSERT(fd != NULL); 354 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) { 355 fd->fd_flags &= ~LL_FILE_RMTACL; 356 rct_del(&sbi->ll_rct, current_pid()); 357 et_search_free(&sbi->ll_et, current_pid()); 358 } 359 } 360#endif 361 362 if (inode->i_sb->s_root != file->f_dentry) 363 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1); 364 fd = LUSTRE_FPRIVATE(file); 365 LASSERT(fd != NULL); 366 367 /* The last ref on @file, maybe not the the owner pid of statahead. 368 * Different processes can open the same dir, "ll_opendir_key" means: 369 * it is me that should stop the statahead thread. */ 370 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd && 371 lli->lli_opendir_pid != 0) 372 ll_stop_statahead(inode, lli->lli_opendir_key); 373 374 if (inode->i_sb->s_root == file->f_dentry) { 375 LUSTRE_FPRIVATE(file) = NULL; 376 ll_file_data_put(fd); 377 return 0; 378 } 379 380 if (!S_ISDIR(inode->i_mode)) { 381 lov_read_and_clear_async_rc(lli->lli_clob); 382 lli->lli_async_rc = 0; 383 } 384 385 rc = ll_md_close(sbi->ll_md_exp, inode, file); 386 387 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val)) 388 libcfs_debug_dumplog(); 389 390 return rc; 391} 392 393static int ll_intent_file_open(struct file *file, void *lmm, 394 int lmmsize, struct lookup_intent *itp) 395{ 396 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode); 397 struct dentry *parent = file->f_dentry->d_parent; 398 const char *name = file->f_dentry->d_name.name; 399 const int len = file->f_dentry->d_name.len; 400 struct md_op_data *op_data; 401 struct ptlrpc_request *req; 402 __u32 opc = LUSTRE_OPC_ANY; 403 int rc; 404 405 if (!parent) 406 return -ENOENT; 407 408 /* Usually we come here only for NFSD, and we want open lock. 409 But we can also get here with pre 2.6.15 patchless kernels, and in 410 that case that lock is also ok */ 411 /* We can also get here if there was cached open handle in revalidate_it 412 * but it disappeared while we were getting from there to ll_file_open. 413 * But this means this file was closed and immediately opened which 414 * makes a good candidate for using OPEN lock */ 415 /* If lmmsize & lmm are not 0, we are just setting stripe info 416 * parameters. No need for the open lock */ 417 if (lmm == NULL && lmmsize == 0) { 418 itp->it_flags |= MDS_OPEN_LOCK; 419 if (itp->it_flags & FMODE_WRITE) 420 opc = LUSTRE_OPC_CREATE; 421 } 422 423 op_data = ll_prep_md_op_data(NULL, parent->d_inode, 424 file->f_dentry->d_inode, name, len, 425 O_RDWR, opc, NULL); 426 if (IS_ERR(op_data)) 427 return PTR_ERR(op_data); 428 429 itp->it_flags |= MDS_OPEN_BY_FID; 430 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp, 431 0 /*unused */, &req, ll_md_blocking_ast, 0); 432 ll_finish_md_op_data(op_data); 433 if (rc == -ESTALE) { 434 /* reason for keep own exit path - don`t flood log 435 * with messages with -ESTALE errors. 436 */ 437 if (!it_disposition(itp, DISP_OPEN_OPEN) || 438 it_open_error(DISP_OPEN_OPEN, itp)) 439 GOTO(out, rc); 440 ll_release_openhandle(file->f_dentry, itp); 441 GOTO(out, rc); 442 } 443 444 if (it_disposition(itp, DISP_LOOKUP_NEG)) 445 GOTO(out, rc = -ENOENT); 446 447 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) { 448 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp); 449 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc); 450 GOTO(out, rc); 451 } 452 453 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp); 454 if (!rc && itp->d.lustre.it_lock_mode) 455 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode, 456 itp, NULL); 457 458out: 459 ptlrpc_req_finished(req); 460 ll_intent_drop_lock(itp); 461 462 return rc; 463} 464 465/** 466 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does 467 * not believe attributes if a few ioepoch holders exist. Attributes for 468 * previous ioepoch if new one is opened are also skipped by MDS. 469 */ 470void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch) 471{ 472 if (ioepoch && lli->lli_ioepoch != ioepoch) { 473 lli->lli_ioepoch = ioepoch; 474 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n", 475 ioepoch, PFID(&lli->lli_fid)); 476 } 477} 478 479static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it, 480 struct obd_client_handle *och) 481{ 482 struct ptlrpc_request *req = it->d.lustre.it_data; 483 struct mdt_body *body; 484 485 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 486 och->och_fh = body->handle; 487 och->och_fid = body->fid1; 488 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle; 489 och->och_magic = OBD_CLIENT_HANDLE_MAGIC; 490 och->och_flags = it->it_flags; 491 492 return md_set_open_replay_data(md_exp, och, it); 493} 494 495static int ll_local_open(struct file *file, struct lookup_intent *it, 496 struct ll_file_data *fd, struct obd_client_handle *och) 497{ 498 struct inode *inode = file->f_dentry->d_inode; 499 struct ll_inode_info *lli = ll_i2info(inode); 500 501 LASSERT(!LUSTRE_FPRIVATE(file)); 502 503 LASSERT(fd != NULL); 504 505 if (och) { 506 struct ptlrpc_request *req = it->d.lustre.it_data; 507 struct mdt_body *body; 508 int rc; 509 510 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och); 511 if (rc != 0) 512 return rc; 513 514 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 515 ll_ioepoch_open(lli, body->ioepoch); 516 } 517 518 LUSTRE_FPRIVATE(file) = fd; 519 ll_readahead_init(inode, &fd->fd_ras); 520 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); 521 return 0; 522} 523 524/* Open a file, and (for the very first open) create objects on the OSTs at 525 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object 526 * creation or open until ll_lov_setstripe() ioctl is called. 527 * 528 * If we already have the stripe MD locally then we don't request it in 529 * md_open(), by passing a lmm_size = 0. 530 * 531 * It is up to the application to ensure no other processes open this file 532 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be 533 * used. We might be able to avoid races of that sort by getting lli_open_sem 534 * before returning in the O_LOV_DELAY_CREATE case and dropping it here 535 * or in ll_file_release(), but I'm not sure that is desirable/necessary. 536 */ 537int ll_file_open(struct inode *inode, struct file *file) 538{ 539 struct ll_inode_info *lli = ll_i2info(inode); 540 struct lookup_intent *it, oit = { .it_op = IT_OPEN, 541 .it_flags = file->f_flags }; 542 struct obd_client_handle **och_p = NULL; 543 __u64 *och_usecount = NULL; 544 struct ll_file_data *fd; 545 int rc = 0, opendir_set = 0; 546 547 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino, 548 inode->i_generation, inode, file->f_flags); 549 550 it = file->private_data; /* XXX: compat macro */ 551 file->private_data = NULL; /* prevent ll_local_open assertion */ 552 553 fd = ll_file_data_get(); 554 if (fd == NULL) 555 GOTO(out_openerr, rc = -ENOMEM); 556 557 fd->fd_file = file; 558 if (S_ISDIR(inode->i_mode)) { 559 spin_lock(&lli->lli_sa_lock); 560 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL && 561 lli->lli_opendir_pid == 0) { 562 lli->lli_opendir_key = fd; 563 lli->lli_opendir_pid = current_pid(); 564 opendir_set = 1; 565 } 566 spin_unlock(&lli->lli_sa_lock); 567 } 568 569 if (inode->i_sb->s_root == file->f_dentry) { 570 LUSTRE_FPRIVATE(file) = fd; 571 return 0; 572 } 573 574 if (!it || !it->d.lustre.it_disposition) { 575 /* Convert f_flags into access mode. We cannot use file->f_mode, 576 * because everything but O_ACCMODE mask was stripped from 577 * there */ 578 if ((oit.it_flags + 1) & O_ACCMODE) 579 oit.it_flags++; 580 if (file->f_flags & O_TRUNC) 581 oit.it_flags |= FMODE_WRITE; 582 583 /* kernel only call f_op->open in dentry_open. filp_open calls 584 * dentry_open after call to open_namei that checks permissions. 585 * Only nfsd_open call dentry_open directly without checking 586 * permissions and because of that this code below is safe. */ 587 if (oit.it_flags & (FMODE_WRITE | FMODE_READ)) 588 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE; 589 590 /* We do not want O_EXCL here, presumably we opened the file 591 * already? XXX - NFS implications? */ 592 oit.it_flags &= ~O_EXCL; 593 594 /* bug20584, if "it_flags" contains O_CREAT, the file will be 595 * created if necessary, then "IT_CREAT" should be set to keep 596 * consistent with it */ 597 if (oit.it_flags & O_CREAT) 598 oit.it_op |= IT_CREAT; 599 600 it = &oit; 601 } 602 603restart: 604 /* Let's see if we have file open on MDS already. */ 605 if (it->it_flags & FMODE_WRITE) { 606 och_p = &lli->lli_mds_write_och; 607 och_usecount = &lli->lli_open_fd_write_count; 608 } else if (it->it_flags & FMODE_EXEC) { 609 och_p = &lli->lli_mds_exec_och; 610 och_usecount = &lli->lli_open_fd_exec_count; 611 } else { 612 och_p = &lli->lli_mds_read_och; 613 och_usecount = &lli->lli_open_fd_read_count; 614 } 615 616 mutex_lock(&lli->lli_och_mutex); 617 if (*och_p) { /* Open handle is present */ 618 if (it_disposition(it, DISP_OPEN_OPEN)) { 619 /* Well, there's extra open request that we do not need, 620 let's close it somehow. This will decref request. */ 621 rc = it_open_error(DISP_OPEN_OPEN, it); 622 if (rc) { 623 mutex_unlock(&lli->lli_och_mutex); 624 GOTO(out_openerr, rc); 625 } 626 627 ll_release_openhandle(file->f_dentry, it); 628 } 629 (*och_usecount)++; 630 631 rc = ll_local_open(file, it, fd, NULL); 632 if (rc) { 633 (*och_usecount)--; 634 mutex_unlock(&lli->lli_och_mutex); 635 GOTO(out_openerr, rc); 636 } 637 } else { 638 LASSERT(*och_usecount == 0); 639 if (!it->d.lustre.it_disposition) { 640 /* We cannot just request lock handle now, new ELC code 641 means that one of other OPEN locks for this file 642 could be cancelled, and since blocking ast handler 643 would attempt to grab och_mutex as well, that would 644 result in a deadlock */ 645 mutex_unlock(&lli->lli_och_mutex); 646 it->it_create_mode |= M_CHECK_STALE; 647 rc = ll_intent_file_open(file, NULL, 0, it); 648 it->it_create_mode &= ~M_CHECK_STALE; 649 if (rc) 650 GOTO(out_openerr, rc); 651 652 goto restart; 653 } 654 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle)); 655 if (!*och_p) 656 GOTO(out_och_free, rc = -ENOMEM); 657 658 (*och_usecount)++; 659 660 /* md_intent_lock() didn't get a request ref if there was an 661 * open error, so don't do cleanup on the request here 662 * (bug 3430) */ 663 /* XXX (green): Should not we bail out on any error here, not 664 * just open error? */ 665 rc = it_open_error(DISP_OPEN_OPEN, it); 666 if (rc) 667 GOTO(out_och_free, rc); 668 669 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF)); 670 671 rc = ll_local_open(file, it, fd, *och_p); 672 if (rc) 673 GOTO(out_och_free, rc); 674 } 675 mutex_unlock(&lli->lli_och_mutex); 676 fd = NULL; 677 678 /* Must do this outside lli_och_mutex lock to prevent deadlock where 679 different kind of OPEN lock for this same inode gets cancelled 680 by ldlm_cancel_lru */ 681 if (!S_ISREG(inode->i_mode)) 682 GOTO(out_och_free, rc); 683 684 ll_capa_open(inode); 685 686 if (!lli->lli_has_smd && 687 (cl_is_lov_delay_create(file->f_flags) || 688 (file->f_mode & FMODE_WRITE) == 0)) { 689 CDEBUG(D_INODE, "object creation was delayed\n"); 690 GOTO(out_och_free, rc); 691 } 692 cl_lov_delay_create_clear(&file->f_flags); 693 GOTO(out_och_free, rc); 694 695out_och_free: 696 if (rc) { 697 if (och_p && *och_p) { 698 OBD_FREE(*och_p, sizeof (struct obd_client_handle)); 699 *och_p = NULL; /* OBD_FREE writes some magic there */ 700 (*och_usecount)--; 701 } 702 mutex_unlock(&lli->lli_och_mutex); 703 704out_openerr: 705 if (opendir_set != 0) 706 ll_stop_statahead(inode, lli->lli_opendir_key); 707 if (fd != NULL) 708 ll_file_data_put(fd); 709 } else { 710 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1); 711 } 712 713 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) { 714 ptlrpc_req_finished(it->d.lustre.it_data); 715 it_clear_disposition(it, DISP_ENQ_OPEN_REF); 716 } 717 718 return rc; 719} 720 721static int ll_md_blocking_lease_ast(struct ldlm_lock *lock, 722 struct ldlm_lock_desc *desc, void *data, int flag) 723{ 724 int rc; 725 struct lustre_handle lockh; 726 727 switch (flag) { 728 case LDLM_CB_BLOCKING: 729 ldlm_lock2handle(lock, &lockh); 730 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC); 731 if (rc < 0) { 732 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc); 733 return rc; 734 } 735 break; 736 case LDLM_CB_CANCELING: 737 /* do nothing */ 738 break; 739 } 740 return 0; 741} 742 743/** 744 * Acquire a lease and open the file. 745 */ 746static struct obd_client_handle * 747ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode, 748 __u64 open_flags) 749{ 750 struct lookup_intent it = { .it_op = IT_OPEN }; 751 struct ll_sb_info *sbi = ll_i2sbi(inode); 752 struct md_op_data *op_data; 753 struct ptlrpc_request *req; 754 struct lustre_handle old_handle = { 0 }; 755 struct obd_client_handle *och = NULL; 756 int rc; 757 int rc2; 758 759 if (fmode != FMODE_WRITE && fmode != FMODE_READ) 760 return ERR_PTR(-EINVAL); 761 762 if (file != NULL) { 763 struct ll_inode_info *lli = ll_i2info(inode); 764 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 765 struct obd_client_handle **och_p; 766 __u64 *och_usecount; 767 768 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC)) 769 return ERR_PTR(-EPERM); 770 771 /* Get the openhandle of the file */ 772 rc = -EBUSY; 773 mutex_lock(&lli->lli_och_mutex); 774 if (fd->fd_lease_och != NULL) { 775 mutex_unlock(&lli->lli_och_mutex); 776 return ERR_PTR(rc); 777 } 778 779 if (fd->fd_och == NULL) { 780 if (file->f_mode & FMODE_WRITE) { 781 LASSERT(lli->lli_mds_write_och != NULL); 782 och_p = &lli->lli_mds_write_och; 783 och_usecount = &lli->lli_open_fd_write_count; 784 } else { 785 LASSERT(lli->lli_mds_read_och != NULL); 786 och_p = &lli->lli_mds_read_och; 787 och_usecount = &lli->lli_open_fd_read_count; 788 } 789 if (*och_usecount == 1) { 790 fd->fd_och = *och_p; 791 *och_p = NULL; 792 *och_usecount = 0; 793 rc = 0; 794 } 795 } 796 mutex_unlock(&lli->lli_och_mutex); 797 if (rc < 0) /* more than 1 opener */ 798 return ERR_PTR(rc); 799 800 LASSERT(fd->fd_och != NULL); 801 old_handle = fd->fd_och->och_fh; 802 } 803 804 OBD_ALLOC_PTR(och); 805 if (och == NULL) 806 return ERR_PTR(-ENOMEM); 807 808 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0, 809 LUSTRE_OPC_ANY, NULL); 810 if (IS_ERR(op_data)) 811 GOTO(out, rc = PTR_ERR(op_data)); 812 813 /* To tell the MDT this openhandle is from the same owner */ 814 op_data->op_handle = old_handle; 815 816 it.it_flags = fmode | open_flags; 817 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE; 818 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req, 819 ll_md_blocking_lease_ast, 820 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise 821 * it can be cancelled which may mislead applications that the lease is 822 * broken; 823 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal 824 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast 825 * doesn't deal with openhandle, so normal openhandle will be leaked. */ 826 LDLM_FL_NO_LRU | LDLM_FL_EXCL); 827 ll_finish_md_op_data(op_data); 828 ptlrpc_req_finished(req); 829 if (rc < 0) 830 GOTO(out_release_it, rc); 831 832 if (it_disposition(&it, DISP_LOOKUP_NEG)) 833 GOTO(out_release_it, rc = -ENOENT); 834 835 rc = it_open_error(DISP_OPEN_OPEN, &it); 836 if (rc) 837 GOTO(out_release_it, rc); 838 839 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF)); 840 ll_och_fill(sbi->ll_md_exp, &it, och); 841 842 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */ 843 GOTO(out_close, rc = -EOPNOTSUPP); 844 845 /* already get lease, handle lease lock */ 846 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL); 847 if (it.d.lustre.it_lock_mode == 0 || 848 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) { 849 /* open lock must return for lease */ 850 CERROR(DFID "lease granted but no open lock, %d/%llu.\n", 851 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode, 852 it.d.lustre.it_lock_bits); 853 GOTO(out_close, rc = -EPROTO); 854 } 855 856 ll_intent_release(&it); 857 return och; 858 859out_close: 860 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL); 861 if (rc2) 862 CERROR("Close openhandle returned %d\n", rc2); 863 864 /* cancel open lock */ 865 if (it.d.lustre.it_lock_mode != 0) { 866 ldlm_lock_decref_and_cancel(&och->och_lease_handle, 867 it.d.lustre.it_lock_mode); 868 it.d.lustre.it_lock_mode = 0; 869 } 870out_release_it: 871 ll_intent_release(&it); 872out: 873 OBD_FREE_PTR(och); 874 return ERR_PTR(rc); 875} 876 877/** 878 * Release lease and close the file. 879 * It will check if the lease has ever broken. 880 */ 881static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, 882 bool *lease_broken) 883{ 884 struct ldlm_lock *lock; 885 bool cancelled = true; 886 int rc; 887 888 lock = ldlm_handle2lock(&och->och_lease_handle); 889 if (lock != NULL) { 890 lock_res_and_lock(lock); 891 cancelled = ldlm_is_cancel(lock); 892 unlock_res_and_lock(lock); 893 ldlm_lock_put(lock); 894 } 895 896 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n", 897 PFID(&ll_i2info(inode)->lli_fid), cancelled); 898 899 if (!cancelled) 900 ldlm_cli_cancel(&och->och_lease_handle, 0); 901 if (lease_broken != NULL) 902 *lease_broken = cancelled; 903 904 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och, 905 NULL); 906 return rc; 907} 908 909/* Fills the obdo with the attributes for the lsm */ 910static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, 911 struct obd_capa *capa, struct obdo *obdo, 912 __u64 ioepoch, int sync) 913{ 914 struct ptlrpc_request_set *set; 915 struct obd_info oinfo = { { { 0 } } }; 916 int rc; 917 918 LASSERT(lsm != NULL); 919 920 oinfo.oi_md = lsm; 921 oinfo.oi_oa = obdo; 922 oinfo.oi_oa->o_oi = lsm->lsm_oi; 923 oinfo.oi_oa->o_mode = S_IFREG; 924 oinfo.oi_oa->o_ioepoch = ioepoch; 925 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | 926 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | 927 OBD_MD_FLBLKSZ | OBD_MD_FLATIME | 928 OBD_MD_FLMTIME | OBD_MD_FLCTIME | 929 OBD_MD_FLGROUP | OBD_MD_FLEPOCH | 930 OBD_MD_FLDATAVERSION; 931 oinfo.oi_capa = capa; 932 if (sync) { 933 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS; 934 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK; 935 } 936 937 set = ptlrpc_prep_set(); 938 if (set == NULL) { 939 CERROR("can't allocate ptlrpc set\n"); 940 rc = -ENOMEM; 941 } else { 942 rc = obd_getattr_async(exp, &oinfo, set); 943 if (rc == 0) 944 rc = ptlrpc_set_wait(set); 945 ptlrpc_set_destroy(set); 946 } 947 if (rc == 0) 948 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | 949 OBD_MD_FLATIME | OBD_MD_FLMTIME | 950 OBD_MD_FLCTIME | OBD_MD_FLSIZE | 951 OBD_MD_FLDATAVERSION); 952 return rc; 953} 954 955/** 956 * Performs the getattr on the inode and updates its fields. 957 * If @sync != 0, perform the getattr under the server-side lock. 958 */ 959int ll_inode_getattr(struct inode *inode, struct obdo *obdo, 960 __u64 ioepoch, int sync) 961{ 962 struct obd_capa *capa = ll_mdscapa_get(inode); 963 struct lov_stripe_md *lsm; 964 int rc; 965 966 lsm = ccc_inode_lsm_get(inode); 967 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode), 968 capa, obdo, ioepoch, sync); 969 capa_put(capa); 970 if (rc == 0) { 971 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi; 972 973 obdo_refresh_inode(inode, obdo, obdo->o_valid); 974 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu," 975 " blksize %lu\n", POSTID(oi), i_size_read(inode), 976 (unsigned long long)inode->i_blocks, 977 (unsigned long)ll_inode_blksize(inode)); 978 } 979 ccc_inode_lsm_put(inode, lsm); 980 return rc; 981} 982 983int ll_merge_lvb(const struct lu_env *env, struct inode *inode) 984{ 985 struct ll_inode_info *lli = ll_i2info(inode); 986 struct cl_object *obj = lli->lli_clob; 987 struct cl_attr *attr = ccc_env_thread_attr(env); 988 struct ost_lvb lvb; 989 int rc = 0; 990 991 ll_inode_size_lock(inode); 992 /* merge timestamps the most recently obtained from mds with 993 timestamps obtained from osts */ 994 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime; 995 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime; 996 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime; 997 inode_init_lvb(inode, &lvb); 998 999 cl_object_attr_lock(obj); 1000 rc = cl_object_attr_get(env, obj, attr); 1001 cl_object_attr_unlock(obj); 1002 1003 if (rc == 0) { 1004 if (lvb.lvb_atime < attr->cat_atime) 1005 lvb.lvb_atime = attr->cat_atime; 1006 if (lvb.lvb_ctime < attr->cat_ctime) 1007 lvb.lvb_ctime = attr->cat_ctime; 1008 if (lvb.lvb_mtime < attr->cat_mtime) 1009 lvb.lvb_mtime = attr->cat_mtime; 1010 1011 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n", 1012 PFID(&lli->lli_fid), attr->cat_size); 1013 cl_isize_write_nolock(inode, attr->cat_size); 1014 1015 inode->i_blocks = attr->cat_blocks; 1016 1017 LTIME_S(inode->i_mtime) = lvb.lvb_mtime; 1018 LTIME_S(inode->i_atime) = lvb.lvb_atime; 1019 LTIME_S(inode->i_ctime) = lvb.lvb_ctime; 1020 } 1021 ll_inode_size_unlock(inode); 1022 1023 return rc; 1024} 1025 1026int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm, 1027 lstat_t *st) 1028{ 1029 struct obdo obdo = { 0 }; 1030 int rc; 1031 1032 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0); 1033 if (rc == 0) { 1034 st->st_size = obdo.o_size; 1035 st->st_blocks = obdo.o_blocks; 1036 st->st_mtime = obdo.o_mtime; 1037 st->st_atime = obdo.o_atime; 1038 st->st_ctime = obdo.o_ctime; 1039 } 1040 return rc; 1041} 1042 1043static bool file_is_noatime(const struct file *file) 1044{ 1045 const struct vfsmount *mnt = file->f_path.mnt; 1046 const struct inode *inode = file->f_path.dentry->d_inode; 1047 1048 /* Adapted from file_accessed() and touch_atime().*/ 1049 if (file->f_flags & O_NOATIME) 1050 return true; 1051 1052 if (inode->i_flags & S_NOATIME) 1053 return true; 1054 1055 if (IS_NOATIME(inode)) 1056 return true; 1057 1058 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY)) 1059 return true; 1060 1061 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1062 return true; 1063 1064 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1065 return true; 1066 1067 return false; 1068} 1069 1070void ll_io_init(struct cl_io *io, const struct file *file, int write) 1071{ 1072 struct inode *inode = file->f_dentry->d_inode; 1073 1074 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK; 1075 if (write) { 1076 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND); 1077 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC || 1078 file->f_flags & O_DIRECT || 1079 IS_SYNC(inode); 1080 } 1081 io->ci_obj = ll_i2info(inode)->lli_clob; 1082 io->ci_lockreq = CILR_MAYBE; 1083 if (ll_file_nolock(file)) { 1084 io->ci_lockreq = CILR_NEVER; 1085 io->ci_no_srvlock = 1; 1086 } else if (file->f_flags & O_APPEND) { 1087 io->ci_lockreq = CILR_MANDATORY; 1088 } 1089 1090 io->ci_noatime = file_is_noatime(file); 1091} 1092 1093static ssize_t 1094ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, 1095 struct file *file, enum cl_io_type iot, 1096 loff_t *ppos, size_t count) 1097{ 1098 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode); 1099 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 1100 struct cl_io *io; 1101 ssize_t result; 1102 1103restart: 1104 io = ccc_env_thread_io(env); 1105 ll_io_init(io, file, iot == CIT_WRITE); 1106 1107 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) { 1108 struct vvp_io *vio = vvp_env_io(env); 1109 struct ccc_io *cio = ccc_env_io(env); 1110 int write_mutex_locked = 0; 1111 1112 cio->cui_fd = LUSTRE_FPRIVATE(file); 1113 vio->cui_io_subtype = args->via_io_subtype; 1114 1115 switch (vio->cui_io_subtype) { 1116 case IO_NORMAL: 1117 cio->cui_iov = args->u.normal.via_iov; 1118 cio->cui_nrsegs = args->u.normal.via_nrsegs; 1119 cio->cui_tot_nrsegs = cio->cui_nrsegs; 1120 cio->cui_iocb = args->u.normal.via_iocb; 1121 if ((iot == CIT_WRITE) && 1122 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { 1123 if (mutex_lock_interruptible(&lli-> 1124 lli_write_mutex)) 1125 GOTO(out, result = -ERESTARTSYS); 1126 write_mutex_locked = 1; 1127 } else if (iot == CIT_READ) { 1128 down_read(&lli->lli_trunc_sem); 1129 } 1130 break; 1131 case IO_SPLICE: 1132 vio->u.splice.cui_pipe = args->u.splice.via_pipe; 1133 vio->u.splice.cui_flags = args->u.splice.via_flags; 1134 break; 1135 default: 1136 CERROR("Unknown IO type - %u\n", vio->cui_io_subtype); 1137 LBUG(); 1138 } 1139 result = cl_io_loop(env, io); 1140 if (write_mutex_locked) 1141 mutex_unlock(&lli->lli_write_mutex); 1142 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ) 1143 up_read(&lli->lli_trunc_sem); 1144 } else { 1145 /* cl_io_rw_init() handled IO */ 1146 result = io->ci_result; 1147 } 1148 1149 if (io->ci_nob > 0) { 1150 result = io->ci_nob; 1151 *ppos = io->u.ci_wr.wr.crw_pos; 1152 } 1153 GOTO(out, result); 1154out: 1155 cl_io_fini(env, io); 1156 /* If any bit been read/written (result != 0), we just return 1157 * short read/write instead of restart io. */ 1158 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) { 1159 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n", 1160 iot == CIT_READ ? "read" : "write", 1161 file->f_dentry->d_name.name, *ppos, count); 1162 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob); 1163 goto restart; 1164 } 1165 1166 if (iot == CIT_READ) { 1167 if (result >= 0) 1168 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), 1169 LPROC_LL_READ_BYTES, result); 1170 } else if (iot == CIT_WRITE) { 1171 if (result >= 0) { 1172 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), 1173 LPROC_LL_WRITE_BYTES, result); 1174 fd->fd_write_failed = false; 1175 } else if (result != -ERESTARTSYS) { 1176 fd->fd_write_failed = true; 1177 } 1178 } 1179 1180 return result; 1181} 1182 1183static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov, 1184 unsigned long nr_segs, loff_t pos) 1185{ 1186 struct lu_env *env; 1187 struct vvp_io_args *args; 1188 size_t count = 0; 1189 ssize_t result; 1190 int refcheck; 1191 1192 result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); 1193 if (result) 1194 return result; 1195 1196 env = cl_env_get(&refcheck); 1197 if (IS_ERR(env)) 1198 return PTR_ERR(env); 1199 1200 args = vvp_env_args(env, IO_NORMAL); 1201 args->u.normal.via_iov = (struct iovec *)iov; 1202 args->u.normal.via_nrsegs = nr_segs; 1203 args->u.normal.via_iocb = iocb; 1204 1205 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ, 1206 &iocb->ki_pos, count); 1207 cl_env_put(env, &refcheck); 1208 return result; 1209} 1210 1211static ssize_t ll_file_read(struct file *file, char *buf, size_t count, 1212 loff_t *ppos) 1213{ 1214 struct lu_env *env; 1215 struct iovec *local_iov; 1216 struct kiocb *kiocb; 1217 ssize_t result; 1218 int refcheck; 1219 1220 env = cl_env_get(&refcheck); 1221 if (IS_ERR(env)) 1222 return PTR_ERR(env); 1223 1224 local_iov = &vvp_env_info(env)->vti_local_iov; 1225 kiocb = &vvp_env_info(env)->vti_kiocb; 1226 local_iov->iov_base = (void __user *)buf; 1227 local_iov->iov_len = count; 1228 init_sync_kiocb(kiocb, file); 1229 kiocb->ki_pos = *ppos; 1230 kiocb->ki_nbytes = count; 1231 1232 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos); 1233 *ppos = kiocb->ki_pos; 1234 1235 cl_env_put(env, &refcheck); 1236 return result; 1237} 1238 1239/* 1240 * Write to a file (through the page cache). 1241 */ 1242static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 1243 unsigned long nr_segs, loff_t pos) 1244{ 1245 struct lu_env *env; 1246 struct vvp_io_args *args; 1247 size_t count = 0; 1248 ssize_t result; 1249 int refcheck; 1250 1251 result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); 1252 if (result) 1253 return result; 1254 1255 env = cl_env_get(&refcheck); 1256 if (IS_ERR(env)) 1257 return PTR_ERR(env); 1258 1259 args = vvp_env_args(env, IO_NORMAL); 1260 args->u.normal.via_iov = (struct iovec *)iov; 1261 args->u.normal.via_nrsegs = nr_segs; 1262 args->u.normal.via_iocb = iocb; 1263 1264 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE, 1265 &iocb->ki_pos, count); 1266 cl_env_put(env, &refcheck); 1267 return result; 1268} 1269 1270static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, 1271 loff_t *ppos) 1272{ 1273 struct lu_env *env; 1274 struct iovec *local_iov; 1275 struct kiocb *kiocb; 1276 ssize_t result; 1277 int refcheck; 1278 1279 env = cl_env_get(&refcheck); 1280 if (IS_ERR(env)) 1281 return PTR_ERR(env); 1282 1283 local_iov = &vvp_env_info(env)->vti_local_iov; 1284 kiocb = &vvp_env_info(env)->vti_kiocb; 1285 local_iov->iov_base = (void __user *)buf; 1286 local_iov->iov_len = count; 1287 init_sync_kiocb(kiocb, file); 1288 kiocb->ki_pos = *ppos; 1289 kiocb->ki_nbytes = count; 1290 1291 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos); 1292 *ppos = kiocb->ki_pos; 1293 1294 cl_env_put(env, &refcheck); 1295 return result; 1296} 1297 1298 1299 1300/* 1301 * Send file content (through pagecache) somewhere with helper 1302 */ 1303static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos, 1304 struct pipe_inode_info *pipe, size_t count, 1305 unsigned int flags) 1306{ 1307 struct lu_env *env; 1308 struct vvp_io_args *args; 1309 ssize_t result; 1310 int refcheck; 1311 1312 env = cl_env_get(&refcheck); 1313 if (IS_ERR(env)) 1314 return PTR_ERR(env); 1315 1316 args = vvp_env_args(env, IO_SPLICE); 1317 args->u.splice.via_pipe = pipe; 1318 args->u.splice.via_flags = flags; 1319 1320 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count); 1321 cl_env_put(env, &refcheck); 1322 return result; 1323} 1324 1325static int ll_lov_recreate(struct inode *inode, struct ost_id *oi, 1326 obd_count ost_idx) 1327{ 1328 struct obd_export *exp = ll_i2dtexp(inode); 1329 struct obd_trans_info oti = { 0 }; 1330 struct obdo *oa = NULL; 1331 int lsm_size; 1332 int rc = 0; 1333 struct lov_stripe_md *lsm = NULL, *lsm2; 1334 1335 OBDO_ALLOC(oa); 1336 if (oa == NULL) 1337 return -ENOMEM; 1338 1339 lsm = ccc_inode_lsm_get(inode); 1340 if (!lsm_has_objects(lsm)) 1341 GOTO(out, rc = -ENOENT); 1342 1343 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) * 1344 (lsm->lsm_stripe_count)); 1345 1346 OBD_ALLOC_LARGE(lsm2, lsm_size); 1347 if (lsm2 == NULL) 1348 GOTO(out, rc = -ENOMEM); 1349 1350 oa->o_oi = *oi; 1351 oa->o_nlink = ost_idx; 1352 oa->o_flags |= OBD_FL_RECREATE_OBJS; 1353 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP; 1354 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | 1355 OBD_MD_FLMTIME | OBD_MD_FLCTIME); 1356 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid); 1357 memcpy(lsm2, lsm, lsm_size); 1358 ll_inode_size_lock(inode); 1359 rc = obd_create(NULL, exp, oa, &lsm2, &oti); 1360 ll_inode_size_unlock(inode); 1361 1362 OBD_FREE_LARGE(lsm2, lsm_size); 1363 GOTO(out, rc); 1364out: 1365 ccc_inode_lsm_put(inode, lsm); 1366 OBDO_FREE(oa); 1367 return rc; 1368} 1369 1370static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg) 1371{ 1372 struct ll_recreate_obj ucreat; 1373 struct ost_id oi; 1374 1375 if (!capable(CFS_CAP_SYS_ADMIN)) 1376 return -EPERM; 1377 1378 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg, 1379 sizeof(ucreat))) 1380 return -EFAULT; 1381 1382 ostid_set_seq_mdt0(&oi); 1383 ostid_set_id(&oi, ucreat.lrc_id); 1384 return ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx); 1385} 1386 1387static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg) 1388{ 1389 struct lu_fid fid; 1390 struct ost_id oi; 1391 obd_count ost_idx; 1392 1393 if (!capable(CFS_CAP_SYS_ADMIN)) 1394 return -EPERM; 1395 1396 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid))) 1397 return -EFAULT; 1398 1399 fid_to_ostid(&fid, &oi); 1400 ost_idx = (fid_seq(&fid) >> 16) & 0xffff; 1401 return ll_lov_recreate(inode, &oi, ost_idx); 1402} 1403 1404int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, 1405 int flags, struct lov_user_md *lum, int lum_size) 1406{ 1407 struct lov_stripe_md *lsm = NULL; 1408 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags}; 1409 int rc = 0; 1410 1411 lsm = ccc_inode_lsm_get(inode); 1412 if (lsm != NULL) { 1413 ccc_inode_lsm_put(inode, lsm); 1414 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n", 1415 inode->i_ino); 1416 GOTO(out, rc = -EEXIST); 1417 } 1418 1419 ll_inode_size_lock(inode); 1420 rc = ll_intent_file_open(file, lum, lum_size, &oit); 1421 if (rc) 1422 GOTO(out_unlock, rc); 1423 rc = oit.d.lustre.it_status; 1424 if (rc < 0) 1425 GOTO(out_req_free, rc); 1426 1427 ll_release_openhandle(file->f_dentry, &oit); 1428 1429out_unlock: 1430 ll_inode_size_unlock(inode); 1431 ll_intent_release(&oit); 1432 ccc_inode_lsm_put(inode, lsm); 1433out: 1434 cl_lov_delay_create_clear(&file->f_flags); 1435 return rc; 1436out_req_free: 1437 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data); 1438 goto out; 1439} 1440 1441int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, 1442 struct lov_mds_md **lmmp, int *lmm_size, 1443 struct ptlrpc_request **request) 1444{ 1445 struct ll_sb_info *sbi = ll_i2sbi(inode); 1446 struct mdt_body *body; 1447 struct lov_mds_md *lmm = NULL; 1448 struct ptlrpc_request *req = NULL; 1449 struct md_op_data *op_data; 1450 int rc, lmmsize; 1451 1452 rc = ll_get_default_mdsize(sbi, &lmmsize); 1453 if (rc) 1454 return rc; 1455 1456 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename, 1457 strlen(filename), lmmsize, 1458 LUSTRE_OPC_ANY, NULL); 1459 if (IS_ERR(op_data)) 1460 return PTR_ERR(op_data); 1461 1462 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA; 1463 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req); 1464 ll_finish_md_op_data(op_data); 1465 if (rc < 0) { 1466 CDEBUG(D_INFO, "md_getattr_name failed " 1467 "on %s: rc %d\n", filename, rc); 1468 GOTO(out, rc); 1469 } 1470 1471 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 1472 LASSERT(body != NULL); /* checked by mdc_getattr_name */ 1473 1474 lmmsize = body->eadatasize; 1475 1476 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) || 1477 lmmsize == 0) { 1478 GOTO(out, rc = -ENODATA); 1479 } 1480 1481 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize); 1482 LASSERT(lmm != NULL); 1483 1484 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) && 1485 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) { 1486 GOTO(out, rc = -EPROTO); 1487 } 1488 1489 /* 1490 * This is coming from the MDS, so is probably in 1491 * little endian. We convert it to host endian before 1492 * passing it to userspace. 1493 */ 1494 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) { 1495 int stripe_count; 1496 1497 stripe_count = le16_to_cpu(lmm->lmm_stripe_count); 1498 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED) 1499 stripe_count = 0; 1500 1501 /* if function called for directory - we should 1502 * avoid swab not existent lsm objects */ 1503 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) { 1504 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm); 1505 if (S_ISREG(body->mode)) 1506 lustre_swab_lov_user_md_objects( 1507 ((struct lov_user_md_v1 *)lmm)->lmm_objects, 1508 stripe_count); 1509 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) { 1510 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm); 1511 if (S_ISREG(body->mode)) 1512 lustre_swab_lov_user_md_objects( 1513 ((struct lov_user_md_v3 *)lmm)->lmm_objects, 1514 stripe_count); 1515 } 1516 } 1517 1518out: 1519 *lmmp = lmm; 1520 *lmm_size = lmmsize; 1521 *request = req; 1522 return rc; 1523} 1524 1525static int ll_lov_setea(struct inode *inode, struct file *file, 1526 unsigned long arg) 1527{ 1528 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE; 1529 struct lov_user_md *lump; 1530 int lum_size = sizeof(struct lov_user_md) + 1531 sizeof(struct lov_user_ost_data); 1532 int rc; 1533 1534 if (!capable(CFS_CAP_SYS_ADMIN)) 1535 return -EPERM; 1536 1537 OBD_ALLOC_LARGE(lump, lum_size); 1538 if (lump == NULL) 1539 return -ENOMEM; 1540 1541 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) { 1542 OBD_FREE_LARGE(lump, lum_size); 1543 return -EFAULT; 1544 } 1545 1546 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size); 1547 1548 OBD_FREE_LARGE(lump, lum_size); 1549 return rc; 1550} 1551 1552static int ll_lov_setstripe(struct inode *inode, struct file *file, 1553 unsigned long arg) 1554{ 1555 struct lov_user_md_v3 lumv3; 1556 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3; 1557 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg; 1558 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg; 1559 int lum_size, rc; 1560 int flags = FMODE_WRITE; 1561 1562 /* first try with v1 which is smaller than v3 */ 1563 lum_size = sizeof(struct lov_user_md_v1); 1564 if (copy_from_user(lumv1, lumv1p, lum_size)) 1565 return -EFAULT; 1566 1567 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) { 1568 lum_size = sizeof(struct lov_user_md_v3); 1569 if (copy_from_user(&lumv3, lumv3p, lum_size)) 1570 return -EFAULT; 1571 } 1572 1573 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size); 1574 if (rc == 0) { 1575 struct lov_stripe_md *lsm; 1576 __u32 gen; 1577 1578 put_user(0, &lumv1p->lmm_stripe_count); 1579 1580 ll_layout_refresh(inode, &gen); 1581 lsm = ccc_inode_lsm_get(inode); 1582 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 1583 0, lsm, (void *)arg); 1584 ccc_inode_lsm_put(inode, lsm); 1585 } 1586 return rc; 1587} 1588 1589static int ll_lov_getstripe(struct inode *inode, unsigned long arg) 1590{ 1591 struct lov_stripe_md *lsm; 1592 int rc = -ENODATA; 1593 1594 lsm = ccc_inode_lsm_get(inode); 1595 if (lsm != NULL) 1596 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, 1597 lsm, (void *)arg); 1598 ccc_inode_lsm_put(inode, lsm); 1599 return rc; 1600} 1601 1602static int 1603ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg) 1604{ 1605 struct ll_inode_info *lli = ll_i2info(inode); 1606 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 1607 struct ccc_grouplock grouplock; 1608 int rc; 1609 1610 if (ll_file_nolock(file)) 1611 return -EOPNOTSUPP; 1612 1613 spin_lock(&lli->lli_lock); 1614 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { 1615 CWARN("group lock already existed with gid %lu\n", 1616 fd->fd_grouplock.cg_gid); 1617 spin_unlock(&lli->lli_lock); 1618 return -EINVAL; 1619 } 1620 LASSERT(fd->fd_grouplock.cg_lock == NULL); 1621 spin_unlock(&lli->lli_lock); 1622 1623 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob, 1624 arg, (file->f_flags & O_NONBLOCK), &grouplock); 1625 if (rc) 1626 return rc; 1627 1628 spin_lock(&lli->lli_lock); 1629 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { 1630 spin_unlock(&lli->lli_lock); 1631 CERROR("another thread just won the race\n"); 1632 cl_put_grouplock(&grouplock); 1633 return -EINVAL; 1634 } 1635 1636 fd->fd_flags |= LL_FILE_GROUP_LOCKED; 1637 fd->fd_grouplock = grouplock; 1638 spin_unlock(&lli->lli_lock); 1639 1640 CDEBUG(D_INFO, "group lock %lu obtained\n", arg); 1641 return 0; 1642} 1643 1644int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg) 1645{ 1646 struct ll_inode_info *lli = ll_i2info(inode); 1647 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 1648 struct ccc_grouplock grouplock; 1649 1650 spin_lock(&lli->lli_lock); 1651 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) { 1652 spin_unlock(&lli->lli_lock); 1653 CWARN("no group lock held\n"); 1654 return -EINVAL; 1655 } 1656 LASSERT(fd->fd_grouplock.cg_lock != NULL); 1657 1658 if (fd->fd_grouplock.cg_gid != arg) { 1659 CWARN("group lock %lu doesn't match current id %lu\n", 1660 arg, fd->fd_grouplock.cg_gid); 1661 spin_unlock(&lli->lli_lock); 1662 return -EINVAL; 1663 } 1664 1665 grouplock = fd->fd_grouplock; 1666 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock)); 1667 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED; 1668 spin_unlock(&lli->lli_lock); 1669 1670 cl_put_grouplock(&grouplock); 1671 CDEBUG(D_INFO, "group lock %lu released\n", arg); 1672 return 0; 1673} 1674 1675/** 1676 * Close inode open handle 1677 * 1678 * \param dentry [in] dentry which contains the inode 1679 * \param it [in,out] intent which contains open info and result 1680 * 1681 * \retval 0 success 1682 * \retval <0 failure 1683 */ 1684int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it) 1685{ 1686 struct inode *inode = dentry->d_inode; 1687 struct obd_client_handle *och; 1688 int rc; 1689 1690 LASSERT(inode); 1691 1692 /* Root ? Do nothing. */ 1693 if (dentry->d_inode->i_sb->s_root == dentry) 1694 return 0; 1695 1696 /* No open handle to close? Move away */ 1697 if (!it_disposition(it, DISP_OPEN_OPEN)) 1698 return 0; 1699 1700 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0); 1701 1702 OBD_ALLOC(och, sizeof(*och)); 1703 if (!och) 1704 GOTO(out, rc = -ENOMEM); 1705 1706 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och); 1707 1708 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, 1709 inode, och, NULL); 1710out: 1711 /* this one is in place of ll_file_open */ 1712 if (it_disposition(it, DISP_ENQ_OPEN_REF)) { 1713 ptlrpc_req_finished(it->d.lustre.it_data); 1714 it_clear_disposition(it, DISP_ENQ_OPEN_REF); 1715 } 1716 return rc; 1717} 1718 1719/** 1720 * Get size for inode for which FIEMAP mapping is requested. 1721 * Make the FIEMAP get_info call and returns the result. 1722 */ 1723static int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap, 1724 size_t num_bytes) 1725{ 1726 struct obd_export *exp = ll_i2dtexp(inode); 1727 struct lov_stripe_md *lsm = NULL; 1728 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, }; 1729 __u32 vallen = num_bytes; 1730 int rc; 1731 1732 /* Checks for fiemap flags */ 1733 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) { 1734 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT; 1735 return -EBADR; 1736 } 1737 1738 /* Check for FIEMAP_FLAG_SYNC */ 1739 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) { 1740 rc = filemap_fdatawrite(inode->i_mapping); 1741 if (rc) 1742 return rc; 1743 } 1744 1745 lsm = ccc_inode_lsm_get(inode); 1746 if (lsm == NULL) 1747 return -ENOENT; 1748 1749 /* If the stripe_count > 1 and the application does not understand 1750 * DEVICE_ORDER flag, then it cannot interpret the extents correctly. 1751 */ 1752 if (lsm->lsm_stripe_count > 1 && 1753 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) 1754 GOTO(out, rc = -EOPNOTSUPP); 1755 1756 fm_key.oa.o_oi = lsm->lsm_oi; 1757 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; 1758 1759 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE); 1760 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid); 1761 /* If filesize is 0, then there would be no objects for mapping */ 1762 if (fm_key.oa.o_size == 0) { 1763 fiemap->fm_mapped_extents = 0; 1764 GOTO(out, rc = 0); 1765 } 1766 1767 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap)); 1768 1769 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen, 1770 fiemap, lsm); 1771 if (rc) 1772 CERROR("obd_get_info failed: rc = %d\n", rc); 1773 1774out: 1775 ccc_inode_lsm_put(inode, lsm); 1776 return rc; 1777} 1778 1779int ll_fid2path(struct inode *inode, void *arg) 1780{ 1781 struct obd_export *exp = ll_i2mdexp(inode); 1782 struct getinfo_fid2path *gfout, *gfin; 1783 int outsize, rc; 1784 1785 if (!capable(CFS_CAP_DAC_READ_SEARCH) && 1786 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH)) 1787 return -EPERM; 1788 1789 /* Need to get the buflen */ 1790 OBD_ALLOC_PTR(gfin); 1791 if (gfin == NULL) 1792 return -ENOMEM; 1793 if (copy_from_user(gfin, arg, sizeof(*gfin))) { 1794 OBD_FREE_PTR(gfin); 1795 return -EFAULT; 1796 } 1797 1798 outsize = sizeof(*gfout) + gfin->gf_pathlen; 1799 OBD_ALLOC(gfout, outsize); 1800 if (gfout == NULL) { 1801 OBD_FREE_PTR(gfin); 1802 return -ENOMEM; 1803 } 1804 memcpy(gfout, gfin, sizeof(*gfout)); 1805 OBD_FREE_PTR(gfin); 1806 1807 /* Call mdc_iocontrol */ 1808 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL); 1809 if (rc) 1810 GOTO(gf_free, rc); 1811 1812 if (copy_to_user(arg, gfout, outsize)) 1813 rc = -EFAULT; 1814 1815gf_free: 1816 OBD_FREE(gfout, outsize); 1817 return rc; 1818} 1819 1820static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg) 1821{ 1822 struct ll_user_fiemap *fiemap_s; 1823 size_t num_bytes, ret_bytes; 1824 unsigned int extent_count; 1825 int rc = 0; 1826 1827 /* Get the extent count so we can calculate the size of 1828 * required fiemap buffer */ 1829 if (get_user(extent_count, 1830 &((struct ll_user_fiemap __user *)arg)->fm_extent_count)) 1831 return -EFAULT; 1832 1833 if (extent_count >= 1834 (SIZE_MAX - sizeof(*fiemap_s)) / sizeof(struct ll_fiemap_extent)) 1835 return -EINVAL; 1836 num_bytes = sizeof(*fiemap_s) + (extent_count * 1837 sizeof(struct ll_fiemap_extent)); 1838 1839 OBD_ALLOC_LARGE(fiemap_s, num_bytes); 1840 if (fiemap_s == NULL) 1841 return -ENOMEM; 1842 1843 /* get the fiemap value */ 1844 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg, 1845 sizeof(*fiemap_s))) 1846 GOTO(error, rc = -EFAULT); 1847 1848 /* If fm_extent_count is non-zero, read the first extent since 1849 * it is used to calculate end_offset and device from previous 1850 * fiemap call. */ 1851 if (extent_count) { 1852 if (copy_from_user(&fiemap_s->fm_extents[0], 1853 (char __user *)arg + sizeof(*fiemap_s), 1854 sizeof(struct ll_fiemap_extent))) 1855 GOTO(error, rc = -EFAULT); 1856 } 1857 1858 rc = ll_do_fiemap(inode, fiemap_s, num_bytes); 1859 if (rc) 1860 GOTO(error, rc); 1861 1862 ret_bytes = sizeof(struct ll_user_fiemap); 1863 1864 if (extent_count != 0) 1865 ret_bytes += (fiemap_s->fm_mapped_extents * 1866 sizeof(struct ll_fiemap_extent)); 1867 1868 if (copy_to_user((void *)arg, fiemap_s, ret_bytes)) 1869 rc = -EFAULT; 1870 1871error: 1872 OBD_FREE_LARGE(fiemap_s, num_bytes); 1873 return rc; 1874} 1875 1876/* 1877 * Read the data_version for inode. 1878 * 1879 * This value is computed using stripe object version on OST. 1880 * Version is computed using server side locking. 1881 * 1882 * @param extent_lock Take extent lock. Not needed if a process is already 1883 * holding the OST object group locks. 1884 */ 1885int ll_data_version(struct inode *inode, __u64 *data_version, 1886 int extent_lock) 1887{ 1888 struct lov_stripe_md *lsm = NULL; 1889 struct ll_sb_info *sbi = ll_i2sbi(inode); 1890 struct obdo *obdo = NULL; 1891 int rc; 1892 1893 /* If no stripe, we consider version is 0. */ 1894 lsm = ccc_inode_lsm_get(inode); 1895 if (!lsm_has_objects(lsm)) { 1896 *data_version = 0; 1897 CDEBUG(D_INODE, "No object for inode\n"); 1898 GOTO(out, rc = 0); 1899 } 1900 1901 OBD_ALLOC_PTR(obdo); 1902 if (obdo == NULL) 1903 GOTO(out, rc = -ENOMEM); 1904 1905 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock); 1906 if (rc == 0) { 1907 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION)) 1908 rc = -EOPNOTSUPP; 1909 else 1910 *data_version = obdo->o_data_version; 1911 } 1912 1913 OBD_FREE_PTR(obdo); 1914out: 1915 ccc_inode_lsm_put(inode, lsm); 1916 return rc; 1917} 1918 1919/* 1920 * Trigger a HSM release request for the provided inode. 1921 */ 1922int ll_hsm_release(struct inode *inode) 1923{ 1924 struct cl_env_nest nest; 1925 struct lu_env *env; 1926 struct obd_client_handle *och = NULL; 1927 __u64 data_version = 0; 1928 int rc; 1929 1930 1931 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n", 1932 ll_get_fsname(inode->i_sb, NULL, 0), 1933 PFID(&ll_i2info(inode)->lli_fid)); 1934 1935 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE); 1936 if (IS_ERR(och)) 1937 GOTO(out, rc = PTR_ERR(och)); 1938 1939 /* Grab latest data_version and [am]time values */ 1940 rc = ll_data_version(inode, &data_version, 1); 1941 if (rc != 0) 1942 GOTO(out, rc); 1943 1944 env = cl_env_nested_get(&nest); 1945 if (IS_ERR(env)) 1946 GOTO(out, rc = PTR_ERR(env)); 1947 1948 ll_merge_lvb(env, inode); 1949 cl_env_nested_put(&nest, env); 1950 1951 /* Release the file. 1952 * NB: lease lock handle is released in mdc_hsm_release_pack() because 1953 * we still need it to pack l_remote_handle to MDT. */ 1954 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och, 1955 &data_version); 1956 och = NULL; 1957 1958 1959out: 1960 if (och != NULL && !IS_ERR(och)) /* close the file */ 1961 ll_lease_close(och, inode, NULL); 1962 1963 return rc; 1964} 1965 1966struct ll_swap_stack { 1967 struct iattr ia1, ia2; 1968 __u64 dv1, dv2; 1969 struct inode *inode1, *inode2; 1970 bool check_dv1, check_dv2; 1971}; 1972 1973static int ll_swap_layouts(struct file *file1, struct file *file2, 1974 struct lustre_swap_layouts *lsl) 1975{ 1976 struct mdc_swap_layouts msl; 1977 struct md_op_data *op_data; 1978 __u32 gid; 1979 __u64 dv; 1980 struct ll_swap_stack *llss = NULL; 1981 int rc; 1982 1983 OBD_ALLOC_PTR(llss); 1984 if (llss == NULL) 1985 return -ENOMEM; 1986 1987 llss->inode1 = file1->f_dentry->d_inode; 1988 llss->inode2 = file2->f_dentry->d_inode; 1989 1990 if (!S_ISREG(llss->inode2->i_mode)) 1991 GOTO(free, rc = -EINVAL); 1992 1993 if (inode_permission(llss->inode1, MAY_WRITE) || 1994 inode_permission(llss->inode2, MAY_WRITE)) 1995 GOTO(free, rc = -EPERM); 1996 1997 if (llss->inode2->i_sb != llss->inode1->i_sb) 1998 GOTO(free, rc = -EXDEV); 1999 2000 /* we use 2 bool because it is easier to swap than 2 bits */ 2001 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1) 2002 llss->check_dv1 = true; 2003 2004 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2) 2005 llss->check_dv2 = true; 2006 2007 /* we cannot use lsl->sl_dvX directly because we may swap them */ 2008 llss->dv1 = lsl->sl_dv1; 2009 llss->dv2 = lsl->sl_dv2; 2010 2011 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2)); 2012 if (rc == 0) /* same file, done! */ 2013 GOTO(free, rc = 0); 2014 2015 if (rc < 0) { /* sequentialize it */ 2016 swap(llss->inode1, llss->inode2); 2017 swap(file1, file2); 2018 swap(llss->dv1, llss->dv2); 2019 swap(llss->check_dv1, llss->check_dv2); 2020 } 2021 2022 gid = lsl->sl_gid; 2023 if (gid != 0) { /* application asks to flush dirty cache */ 2024 rc = ll_get_grouplock(llss->inode1, file1, gid); 2025 if (rc < 0) 2026 GOTO(free, rc); 2027 2028 rc = ll_get_grouplock(llss->inode2, file2, gid); 2029 if (rc < 0) { 2030 ll_put_grouplock(llss->inode1, file1, gid); 2031 GOTO(free, rc); 2032 } 2033 } 2034 2035 /* to be able to restore mtime and atime after swap 2036 * we need to first save them */ 2037 if (lsl->sl_flags & 2038 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) { 2039 llss->ia1.ia_mtime = llss->inode1->i_mtime; 2040 llss->ia1.ia_atime = llss->inode1->i_atime; 2041 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME; 2042 llss->ia2.ia_mtime = llss->inode2->i_mtime; 2043 llss->ia2.ia_atime = llss->inode2->i_atime; 2044 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME; 2045 } 2046 2047 /* ultimate check, before swapping the layouts we check if 2048 * dataversion has changed (if requested) */ 2049 if (llss->check_dv1) { 2050 rc = ll_data_version(llss->inode1, &dv, 0); 2051 if (rc) 2052 GOTO(putgl, rc); 2053 if (dv != llss->dv1) 2054 GOTO(putgl, rc = -EAGAIN); 2055 } 2056 2057 if (llss->check_dv2) { 2058 rc = ll_data_version(llss->inode2, &dv, 0); 2059 if (rc) 2060 GOTO(putgl, rc); 2061 if (dv != llss->dv2) 2062 GOTO(putgl, rc = -EAGAIN); 2063 } 2064 2065 /* struct md_op_data is used to send the swap args to the mdt 2066 * only flags is missing, so we use struct mdc_swap_layouts 2067 * through the md_op_data->op_data */ 2068 /* flags from user space have to be converted before they are send to 2069 * server, no flag is sent today, they are only used on the client */ 2070 msl.msl_flags = 0; 2071 rc = -ENOMEM; 2072 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0, 2073 0, LUSTRE_OPC_ANY, &msl); 2074 if (IS_ERR(op_data)) 2075 GOTO(free, rc = PTR_ERR(op_data)); 2076 2077 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1), 2078 sizeof(*op_data), op_data, NULL); 2079 ll_finish_md_op_data(op_data); 2080 2081putgl: 2082 if (gid != 0) { 2083 ll_put_grouplock(llss->inode2, file2, gid); 2084 ll_put_grouplock(llss->inode1, file1, gid); 2085 } 2086 2087 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */ 2088 if (rc != 0) 2089 GOTO(free, rc); 2090 2091 /* clear useless flags */ 2092 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) { 2093 llss->ia1.ia_valid &= ~ATTR_MTIME; 2094 llss->ia2.ia_valid &= ~ATTR_MTIME; 2095 } 2096 2097 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) { 2098 llss->ia1.ia_valid &= ~ATTR_ATIME; 2099 llss->ia2.ia_valid &= ~ATTR_ATIME; 2100 } 2101 2102 /* update time if requested */ 2103 rc = 0; 2104 if (llss->ia2.ia_valid != 0) { 2105 mutex_lock(&llss->inode1->i_mutex); 2106 rc = ll_setattr(file1->f_dentry, &llss->ia2); 2107 mutex_unlock(&llss->inode1->i_mutex); 2108 } 2109 2110 if (llss->ia1.ia_valid != 0) { 2111 int rc1; 2112 2113 mutex_lock(&llss->inode2->i_mutex); 2114 rc1 = ll_setattr(file2->f_dentry, &llss->ia1); 2115 mutex_unlock(&llss->inode2->i_mutex); 2116 if (rc == 0) 2117 rc = rc1; 2118 } 2119 2120free: 2121 if (llss != NULL) 2122 OBD_FREE_PTR(llss); 2123 2124 return rc; 2125} 2126 2127static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss) 2128{ 2129 struct md_op_data *op_data; 2130 int rc; 2131 2132 /* Non-root users are forbidden to set or clear flags which are 2133 * NOT defined in HSM_USER_MASK. */ 2134 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) && 2135 !capable(CFS_CAP_SYS_ADMIN)) 2136 return -EPERM; 2137 2138 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, 2139 LUSTRE_OPC_ANY, hss); 2140 if (IS_ERR(op_data)) 2141 return PTR_ERR(op_data); 2142 2143 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode), 2144 sizeof(*op_data), op_data, NULL); 2145 2146 ll_finish_md_op_data(op_data); 2147 2148 return rc; 2149} 2150 2151static int ll_hsm_import(struct inode *inode, struct file *file, 2152 struct hsm_user_import *hui) 2153{ 2154 struct hsm_state_set *hss = NULL; 2155 struct iattr *attr = NULL; 2156 int rc; 2157 2158 2159 if (!S_ISREG(inode->i_mode)) 2160 return -EINVAL; 2161 2162 /* set HSM flags */ 2163 OBD_ALLOC_PTR(hss); 2164 if (hss == NULL) 2165 GOTO(out, rc = -ENOMEM); 2166 2167 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID; 2168 hss->hss_archive_id = hui->hui_archive_id; 2169 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED; 2170 rc = ll_hsm_state_set(inode, hss); 2171 if (rc != 0) 2172 GOTO(out, rc); 2173 2174 OBD_ALLOC_PTR(attr); 2175 if (attr == NULL) 2176 GOTO(out, rc = -ENOMEM); 2177 2178 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO); 2179 attr->ia_mode |= S_IFREG; 2180 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid); 2181 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid); 2182 attr->ia_size = hui->hui_size; 2183 attr->ia_mtime.tv_sec = hui->hui_mtime; 2184 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns; 2185 attr->ia_atime.tv_sec = hui->hui_atime; 2186 attr->ia_atime.tv_nsec = hui->hui_atime_ns; 2187 2188 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE | 2189 ATTR_UID | ATTR_GID | 2190 ATTR_MTIME | ATTR_MTIME_SET | 2191 ATTR_ATIME | ATTR_ATIME_SET; 2192 2193 rc = ll_setattr_raw(file->f_dentry, attr, true); 2194 if (rc == -ENODATA) 2195 rc = 0; 2196 2197out: 2198 if (hss != NULL) 2199 OBD_FREE_PTR(hss); 2200 2201 if (attr != NULL) 2202 OBD_FREE_PTR(attr); 2203 2204 return rc; 2205} 2206 2207static long 2208ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2209{ 2210 struct inode *inode = file->f_dentry->d_inode; 2211 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 2212 int flags, rc; 2213 2214 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino, 2215 inode->i_generation, inode, cmd); 2216 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1); 2217 2218 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ 2219 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */ 2220 return -ENOTTY; 2221 2222 switch(cmd) { 2223 case LL_IOC_GETFLAGS: 2224 /* Get the current value of the file flags */ 2225 return put_user(fd->fd_flags, (int *)arg); 2226 case LL_IOC_SETFLAGS: 2227 case LL_IOC_CLRFLAGS: 2228 /* Set or clear specific file flags */ 2229 /* XXX This probably needs checks to ensure the flags are 2230 * not abused, and to handle any flag side effects. 2231 */ 2232 if (get_user(flags, (int *) arg)) 2233 return -EFAULT; 2234 2235 if (cmd == LL_IOC_SETFLAGS) { 2236 if ((flags & LL_FILE_IGNORE_LOCK) && 2237 !(file->f_flags & O_DIRECT)) { 2238 CERROR("%s: unable to disable locking on " 2239 "non-O_DIRECT file\n", current->comm); 2240 return -EINVAL; 2241 } 2242 2243 fd->fd_flags |= flags; 2244 } else { 2245 fd->fd_flags &= ~flags; 2246 } 2247 return 0; 2248 case LL_IOC_LOV_SETSTRIPE: 2249 return ll_lov_setstripe(inode, file, arg); 2250 case LL_IOC_LOV_SETEA: 2251 return ll_lov_setea(inode, file, arg); 2252 case LL_IOC_LOV_SWAP_LAYOUTS: { 2253 struct file *file2; 2254 struct lustre_swap_layouts lsl; 2255 2256 if (copy_from_user(&lsl, (char *)arg, 2257 sizeof(struct lustre_swap_layouts))) 2258 return -EFAULT; 2259 2260 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */ 2261 return -EPERM; 2262 2263 file2 = fget(lsl.sl_fd); 2264 if (file2 == NULL) 2265 return -EBADF; 2266 2267 rc = -EPERM; 2268 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */ 2269 rc = ll_swap_layouts(file, file2, &lsl); 2270 fput(file2); 2271 return rc; 2272 } 2273 case LL_IOC_LOV_GETSTRIPE: 2274 return ll_lov_getstripe(inode, arg); 2275 case LL_IOC_RECREATE_OBJ: 2276 return ll_lov_recreate_obj(inode, arg); 2277 case LL_IOC_RECREATE_FID: 2278 return ll_lov_recreate_fid(inode, arg); 2279 case FSFILT_IOC_FIEMAP: 2280 return ll_ioctl_fiemap(inode, arg); 2281 case FSFILT_IOC_GETFLAGS: 2282 case FSFILT_IOC_SETFLAGS: 2283 return ll_iocontrol(inode, file, cmd, arg); 2284 case FSFILT_IOC_GETVERSION_OLD: 2285 case FSFILT_IOC_GETVERSION: 2286 return put_user(inode->i_generation, (int *)arg); 2287 case LL_IOC_GROUP_LOCK: 2288 return ll_get_grouplock(inode, file, arg); 2289 case LL_IOC_GROUP_UNLOCK: 2290 return ll_put_grouplock(inode, file, arg); 2291 case IOC_OBD_STATFS: 2292 return ll_obd_statfs(inode, (void *)arg); 2293 2294 /* We need to special case any other ioctls we want to handle, 2295 * to send them to the MDS/OST as appropriate and to properly 2296 * network encode the arg field. 2297 case FSFILT_IOC_SETVERSION_OLD: 2298 case FSFILT_IOC_SETVERSION: 2299 */ 2300 case LL_IOC_FLUSHCTX: 2301 return ll_flush_ctx(inode); 2302 case LL_IOC_PATH2FID: { 2303 if (copy_to_user((void *)arg, ll_inode2fid(inode), 2304 sizeof(struct lu_fid))) 2305 return -EFAULT; 2306 2307 return 0; 2308 } 2309 case OBD_IOC_FID2PATH: 2310 return ll_fid2path(inode, (void *)arg); 2311 case LL_IOC_DATA_VERSION: { 2312 struct ioc_data_version idv; 2313 int rc; 2314 2315 if (copy_from_user(&idv, (char *)arg, sizeof(idv))) 2316 return -EFAULT; 2317 2318 rc = ll_data_version(inode, &idv.idv_version, 2319 !(idv.idv_flags & LL_DV_NOFLUSH)); 2320 2321 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv))) 2322 return -EFAULT; 2323 2324 return rc; 2325 } 2326 2327 case LL_IOC_GET_MDTIDX: { 2328 int mdtidx; 2329 2330 mdtidx = ll_get_mdt_idx(inode); 2331 if (mdtidx < 0) 2332 return mdtidx; 2333 2334 if (put_user((int)mdtidx, (int*)arg)) 2335 return -EFAULT; 2336 2337 return 0; 2338 } 2339 case OBD_IOC_GETDTNAME: 2340 case OBD_IOC_GETMDNAME: 2341 return ll_get_obd_name(inode, cmd, arg); 2342 case LL_IOC_HSM_STATE_GET: { 2343 struct md_op_data *op_data; 2344 struct hsm_user_state *hus; 2345 int rc; 2346 2347 OBD_ALLOC_PTR(hus); 2348 if (hus == NULL) 2349 return -ENOMEM; 2350 2351 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, 2352 LUSTRE_OPC_ANY, hus); 2353 if (IS_ERR(op_data)) { 2354 OBD_FREE_PTR(hus); 2355 return PTR_ERR(op_data); 2356 } 2357 2358 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data), 2359 op_data, NULL); 2360 2361 if (copy_to_user((void *)arg, hus, sizeof(*hus))) 2362 rc = -EFAULT; 2363 2364 ll_finish_md_op_data(op_data); 2365 OBD_FREE_PTR(hus); 2366 return rc; 2367 } 2368 case LL_IOC_HSM_STATE_SET: { 2369 struct hsm_state_set *hss; 2370 int rc; 2371 2372 OBD_ALLOC_PTR(hss); 2373 if (hss == NULL) 2374 return -ENOMEM; 2375 2376 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) { 2377 OBD_FREE_PTR(hss); 2378 return -EFAULT; 2379 } 2380 2381 rc = ll_hsm_state_set(inode, hss); 2382 2383 OBD_FREE_PTR(hss); 2384 return rc; 2385 } 2386 case LL_IOC_HSM_ACTION: { 2387 struct md_op_data *op_data; 2388 struct hsm_current_action *hca; 2389 int rc; 2390 2391 OBD_ALLOC_PTR(hca); 2392 if (hca == NULL) 2393 return -ENOMEM; 2394 2395 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, 2396 LUSTRE_OPC_ANY, hca); 2397 if (IS_ERR(op_data)) { 2398 OBD_FREE_PTR(hca); 2399 return PTR_ERR(op_data); 2400 } 2401 2402 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data), 2403 op_data, NULL); 2404 2405 if (copy_to_user((char *)arg, hca, sizeof(*hca))) 2406 rc = -EFAULT; 2407 2408 ll_finish_md_op_data(op_data); 2409 OBD_FREE_PTR(hca); 2410 return rc; 2411 } 2412 case LL_IOC_SET_LEASE: { 2413 struct ll_inode_info *lli = ll_i2info(inode); 2414 struct obd_client_handle *och = NULL; 2415 bool lease_broken; 2416 fmode_t mode = 0; 2417 2418 switch (arg) { 2419 case F_WRLCK: 2420 if (!(file->f_mode & FMODE_WRITE)) 2421 return -EPERM; 2422 mode = FMODE_WRITE; 2423 break; 2424 case F_RDLCK: 2425 if (!(file->f_mode & FMODE_READ)) 2426 return -EPERM; 2427 mode = FMODE_READ; 2428 break; 2429 case F_UNLCK: 2430 mutex_lock(&lli->lli_och_mutex); 2431 if (fd->fd_lease_och != NULL) { 2432 och = fd->fd_lease_och; 2433 fd->fd_lease_och = NULL; 2434 } 2435 mutex_unlock(&lli->lli_och_mutex); 2436 2437 if (och != NULL) { 2438 mode = och->och_flags & 2439 (FMODE_READ|FMODE_WRITE); 2440 rc = ll_lease_close(och, inode, &lease_broken); 2441 if (rc == 0 && lease_broken) 2442 mode = 0; 2443 } else { 2444 rc = -ENOLCK; 2445 } 2446 2447 /* return the type of lease or error */ 2448 return rc < 0 ? rc : (int)mode; 2449 default: 2450 return -EINVAL; 2451 } 2452 2453 CDEBUG(D_INODE, "Set lease with mode %d\n", mode); 2454 2455 /* apply for lease */ 2456 och = ll_lease_open(inode, file, mode, 0); 2457 if (IS_ERR(och)) 2458 return PTR_ERR(och); 2459 2460 rc = 0; 2461 mutex_lock(&lli->lli_och_mutex); 2462 if (fd->fd_lease_och == NULL) { 2463 fd->fd_lease_och = och; 2464 och = NULL; 2465 } 2466 mutex_unlock(&lli->lli_och_mutex); 2467 if (och != NULL) { 2468 /* impossible now that only excl is supported for now */ 2469 ll_lease_close(och, inode, &lease_broken); 2470 rc = -EBUSY; 2471 } 2472 return rc; 2473 } 2474 case LL_IOC_GET_LEASE: { 2475 struct ll_inode_info *lli = ll_i2info(inode); 2476 struct ldlm_lock *lock = NULL; 2477 2478 rc = 0; 2479 mutex_lock(&lli->lli_och_mutex); 2480 if (fd->fd_lease_och != NULL) { 2481 struct obd_client_handle *och = fd->fd_lease_och; 2482 2483 lock = ldlm_handle2lock(&och->och_lease_handle); 2484 if (lock != NULL) { 2485 lock_res_and_lock(lock); 2486 if (!ldlm_is_cancel(lock)) 2487 rc = och->och_flags & 2488 (FMODE_READ | FMODE_WRITE); 2489 unlock_res_and_lock(lock); 2490 ldlm_lock_put(lock); 2491 } 2492 } 2493 mutex_unlock(&lli->lli_och_mutex); 2494 return rc; 2495 } 2496 case LL_IOC_HSM_IMPORT: { 2497 struct hsm_user_import *hui; 2498 2499 OBD_ALLOC_PTR(hui); 2500 if (hui == NULL) 2501 return -ENOMEM; 2502 2503 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) { 2504 OBD_FREE_PTR(hui); 2505 return -EFAULT; 2506 } 2507 2508 rc = ll_hsm_import(inode, file, hui); 2509 2510 OBD_FREE_PTR(hui); 2511 return rc; 2512 } 2513 default: { 2514 int err; 2515 2516 if (LLIOC_STOP == 2517 ll_iocontrol_call(inode, file, cmd, arg, &err)) 2518 return err; 2519 2520 return obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL, 2521 (void *)arg); 2522 } 2523 } 2524} 2525 2526 2527static loff_t ll_file_seek(struct file *file, loff_t offset, int origin) 2528{ 2529 struct inode *inode = file->f_dentry->d_inode; 2530 loff_t retval, eof = 0; 2531 2532 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) : 2533 (origin == SEEK_CUR) ? file->f_pos : 0); 2534 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n", 2535 inode->i_ino, inode->i_generation, inode, retval, retval, 2536 origin); 2537 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1); 2538 2539 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) { 2540 retval = ll_glimpse_size(inode); 2541 if (retval != 0) 2542 return retval; 2543 eof = i_size_read(inode); 2544 } 2545 2546 retval = generic_file_llseek_size(file, offset, origin, 2547 ll_file_maxbytes(inode), eof); 2548 return retval; 2549} 2550 2551static int ll_flush(struct file *file, fl_owner_t id) 2552{ 2553 struct inode *inode = file->f_dentry->d_inode; 2554 struct ll_inode_info *lli = ll_i2info(inode); 2555 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 2556 int rc, err; 2557 2558 LASSERT(!S_ISDIR(inode->i_mode)); 2559 2560 /* catch async errors that were recorded back when async writeback 2561 * failed for pages in this mapping. */ 2562 rc = lli->lli_async_rc; 2563 lli->lli_async_rc = 0; 2564 err = lov_read_and_clear_async_rc(lli->lli_clob); 2565 if (rc == 0) 2566 rc = err; 2567 2568 /* The application has been told write failure already. 2569 * Do not report failure again. */ 2570 if (fd->fd_write_failed) 2571 return 0; 2572 return rc ? -EIO : 0; 2573} 2574 2575/** 2576 * Called to make sure a portion of file has been written out. 2577 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST. 2578 * 2579 * Return how many pages have been written. 2580 */ 2581int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, 2582 enum cl_fsync_mode mode, int ignore_layout) 2583{ 2584 struct cl_env_nest nest; 2585 struct lu_env *env; 2586 struct cl_io *io; 2587 struct obd_capa *capa = NULL; 2588 struct cl_fsync_io *fio; 2589 int result; 2590 2591 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL && 2592 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL) 2593 return -EINVAL; 2594 2595 env = cl_env_nested_get(&nest); 2596 if (IS_ERR(env)) 2597 return PTR_ERR(env); 2598 2599 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE); 2600 2601 io = ccc_env_thread_io(env); 2602 io->ci_obj = cl_i2info(inode)->lli_clob; 2603 io->ci_ignore_layout = ignore_layout; 2604 2605 /* initialize parameters for sync */ 2606 fio = &io->u.ci_fsync; 2607 fio->fi_capa = capa; 2608 fio->fi_start = start; 2609 fio->fi_end = end; 2610 fio->fi_fid = ll_inode2fid(inode); 2611 fio->fi_mode = mode; 2612 fio->fi_nr_written = 0; 2613 2614 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0) 2615 result = cl_io_loop(env, io); 2616 else 2617 result = io->ci_result; 2618 if (result == 0) 2619 result = fio->fi_nr_written; 2620 cl_io_fini(env, io); 2621 cl_env_nested_put(&nest, env); 2622 2623 capa_put(capa); 2624 2625 return result; 2626} 2627 2628/* 2629 * When dentry is provided (the 'else' case), *file->f_dentry may be 2630 * null and dentry must be used directly rather than pulled from 2631 * *file->f_dentry as is done otherwise. 2632 */ 2633 2634int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync) 2635{ 2636 struct dentry *dentry = file->f_dentry; 2637 struct inode *inode = dentry->d_inode; 2638 struct ll_inode_info *lli = ll_i2info(inode); 2639 struct ptlrpc_request *req; 2640 struct obd_capa *oc; 2641 int rc, err; 2642 2643 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, 2644 inode->i_generation, inode); 2645 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1); 2646 2647 rc = filemap_write_and_wait_range(inode->i_mapping, start, end); 2648 mutex_lock(&inode->i_mutex); 2649 2650 /* catch async errors that were recorded back when async writeback 2651 * failed for pages in this mapping. */ 2652 if (!S_ISDIR(inode->i_mode)) { 2653 err = lli->lli_async_rc; 2654 lli->lli_async_rc = 0; 2655 if (rc == 0) 2656 rc = err; 2657 err = lov_read_and_clear_async_rc(lli->lli_clob); 2658 if (rc == 0) 2659 rc = err; 2660 } 2661 2662 oc = ll_mdscapa_get(inode); 2663 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc, 2664 &req); 2665 capa_put(oc); 2666 if (!rc) 2667 rc = err; 2668 if (!err) 2669 ptlrpc_req_finished(req); 2670 2671 if (S_ISREG(inode->i_mode)) { 2672 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 2673 2674 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0); 2675 if (rc == 0 && err < 0) 2676 rc = err; 2677 if (rc < 0) 2678 fd->fd_write_failed = true; 2679 else 2680 fd->fd_write_failed = false; 2681 } 2682 2683 mutex_unlock(&inode->i_mutex); 2684 return rc; 2685} 2686 2687static int 2688ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) 2689{ 2690 struct inode *inode = file->f_dentry->d_inode; 2691 struct ll_sb_info *sbi = ll_i2sbi(inode); 2692 struct ldlm_enqueue_info einfo = { 2693 .ei_type = LDLM_FLOCK, 2694 .ei_cb_cp = ldlm_flock_completion_ast, 2695 .ei_cbdata = file_lock, 2696 }; 2697 struct md_op_data *op_data; 2698 struct lustre_handle lockh = {0}; 2699 ldlm_policy_data_t flock = {{0}}; 2700 int flags = 0; 2701 int rc; 2702 int rc2 = 0; 2703 2704 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n", 2705 inode->i_ino, file_lock); 2706 2707 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1); 2708 2709 if (file_lock->fl_flags & FL_FLOCK) { 2710 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK)); 2711 /* flocks are whole-file locks */ 2712 flock.l_flock.end = OFFSET_MAX; 2713 /* For flocks owner is determined by the local file descriptor*/ 2714 flock.l_flock.owner = (unsigned long)file_lock->fl_file; 2715 } else if (file_lock->fl_flags & FL_POSIX) { 2716 flock.l_flock.owner = (unsigned long)file_lock->fl_owner; 2717 flock.l_flock.start = file_lock->fl_start; 2718 flock.l_flock.end = file_lock->fl_end; 2719 } else { 2720 return -EINVAL; 2721 } 2722 flock.l_flock.pid = file_lock->fl_pid; 2723 2724 /* Somewhat ugly workaround for svc lockd. 2725 * lockd installs custom fl_lmops->lm_compare_owner that checks 2726 * for the fl_owner to be the same (which it always is on local node 2727 * I guess between lockd processes) and then compares pid. 2728 * As such we assign pid to the owner field to make it all work, 2729 * conflict with normal locks is unlikely since pid space and 2730 * pointer space for current->files are not intersecting */ 2731 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner) 2732 flock.l_flock.owner = (unsigned long)file_lock->fl_pid; 2733 2734 switch (file_lock->fl_type) { 2735 case F_RDLCK: 2736 einfo.ei_mode = LCK_PR; 2737 break; 2738 case F_UNLCK: 2739 /* An unlock request may or may not have any relation to 2740 * existing locks so we may not be able to pass a lock handle 2741 * via a normal ldlm_lock_cancel() request. The request may even 2742 * unlock a byte range in the middle of an existing lock. In 2743 * order to process an unlock request we need all of the same 2744 * information that is given with a normal read or write record 2745 * lock request. To avoid creating another ldlm unlock (cancel) 2746 * message we'll treat a LCK_NL flock request as an unlock. */ 2747 einfo.ei_mode = LCK_NL; 2748 break; 2749 case F_WRLCK: 2750 einfo.ei_mode = LCK_PW; 2751 break; 2752 default: 2753 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", 2754 file_lock->fl_type); 2755 return -ENOTSUPP; 2756 } 2757 2758 switch (cmd) { 2759 case F_SETLKW: 2760#ifdef F_SETLKW64 2761 case F_SETLKW64: 2762#endif 2763 flags = 0; 2764 break; 2765 case F_SETLK: 2766#ifdef F_SETLK64 2767 case F_SETLK64: 2768#endif 2769 flags = LDLM_FL_BLOCK_NOWAIT; 2770 break; 2771 case F_GETLK: 2772#ifdef F_GETLK64 2773 case F_GETLK64: 2774#endif 2775 flags = LDLM_FL_TEST_LOCK; 2776 /* Save the old mode so that if the mode in the lock changes we 2777 * can decrement the appropriate reader or writer refcount. */ 2778 file_lock->fl_type = einfo.ei_mode; 2779 break; 2780 default: 2781 CERROR("unknown fcntl lock command: %d\n", cmd); 2782 return -EINVAL; 2783 } 2784 2785 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, 2786 LUSTRE_OPC_ANY, NULL); 2787 if (IS_ERR(op_data)) 2788 return PTR_ERR(op_data); 2789 2790 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, " 2791 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid, 2792 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end); 2793 2794 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, 2795 op_data, &lockh, &flock, 0, NULL /* req */, flags); 2796 2797 if ((file_lock->fl_flags & FL_FLOCK) && 2798 (rc == 0 || file_lock->fl_type == F_UNLCK)) 2799 rc2 = flock_lock_file_wait(file, file_lock); 2800 if ((file_lock->fl_flags & FL_POSIX) && 2801 (rc == 0 || file_lock->fl_type == F_UNLCK) && 2802 !(flags & LDLM_FL_TEST_LOCK)) 2803 rc2 = posix_lock_file_wait(file, file_lock); 2804 2805 if (rc2 && file_lock->fl_type != F_UNLCK) { 2806 einfo.ei_mode = LCK_NL; 2807 md_enqueue(sbi->ll_md_exp, &einfo, NULL, 2808 op_data, &lockh, &flock, 0, NULL /* req */, flags); 2809 rc = rc2; 2810 } 2811 2812 ll_finish_md_op_data(op_data); 2813 2814 return rc; 2815} 2816 2817static int 2818ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock) 2819{ 2820 return -ENOSYS; 2821} 2822 2823/** 2824 * test if some locks matching bits and l_req_mode are acquired 2825 * - bits can be in different locks 2826 * - if found clear the common lock bits in *bits 2827 * - the bits not found, are kept in *bits 2828 * \param inode [IN] 2829 * \param bits [IN] searched lock bits [IN] 2830 * \param l_req_mode [IN] searched lock mode 2831 * \retval boolean, true iff all bits are found 2832 */ 2833int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode) 2834{ 2835 struct lustre_handle lockh; 2836 ldlm_policy_data_t policy; 2837 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ? 2838 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode; 2839 struct lu_fid *fid; 2840 __u64 flags; 2841 int i; 2842 2843 if (!inode) 2844 return 0; 2845 2846 fid = &ll_i2info(inode)->lli_fid; 2847 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid), 2848 ldlm_lockname[mode]); 2849 2850 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK; 2851 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) { 2852 policy.l_inodebits.bits = *bits & (1 << i); 2853 if (policy.l_inodebits.bits == 0) 2854 continue; 2855 2856 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, 2857 &policy, mode, &lockh)) { 2858 struct ldlm_lock *lock; 2859 2860 lock = ldlm_handle2lock(&lockh); 2861 if (lock) { 2862 *bits &= 2863 ~(lock->l_policy_data.l_inodebits.bits); 2864 LDLM_LOCK_PUT(lock); 2865 } else { 2866 *bits &= ~policy.l_inodebits.bits; 2867 } 2868 } 2869 } 2870 return *bits == 0; 2871} 2872 2873ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits, 2874 struct lustre_handle *lockh, __u64 flags, 2875 ldlm_mode_t mode) 2876{ 2877 ldlm_policy_data_t policy = { .l_inodebits = {bits}}; 2878 struct lu_fid *fid; 2879 ldlm_mode_t rc; 2880 2881 fid = &ll_i2info(inode)->lli_fid; 2882 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid)); 2883 2884 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags, 2885 fid, LDLM_IBITS, &policy, mode, lockh); 2886 2887 return rc; 2888} 2889 2890static int ll_inode_revalidate_fini(struct inode *inode, int rc) 2891{ 2892 /* Already unlinked. Just update nlink and return success */ 2893 if (rc == -ENOENT) { 2894 clear_nlink(inode); 2895 /* This path cannot be hit for regular files unless in 2896 * case of obscure races, so no need to validate size. 2897 */ 2898 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) 2899 return 0; 2900 } else if (rc != 0) { 2901 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR, 2902 "%s: revalidate FID "DFID" error: rc = %d\n", 2903 ll_get_fsname(inode->i_sb, NULL, 0), 2904 PFID(ll_inode2fid(inode)), rc); 2905 } 2906 2907 return rc; 2908} 2909 2910static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) 2911{ 2912 struct inode *inode = dentry->d_inode; 2913 struct ptlrpc_request *req = NULL; 2914 struct obd_export *exp; 2915 int rc = 0; 2916 2917 LASSERT(inode != NULL); 2918 2919 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n", 2920 inode->i_ino, inode->i_generation, inode, dentry->d_name.name); 2921 2922 exp = ll_i2mdexp(inode); 2923 2924 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC. 2925 * But under CMD case, it caused some lock issues, should be fixed 2926 * with new CMD ibits lock. See bug 12718 */ 2927 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) { 2928 struct lookup_intent oit = { .it_op = IT_GETATTR }; 2929 struct md_op_data *op_data; 2930 2931 if (ibits == MDS_INODELOCK_LOOKUP) 2932 oit.it_op = IT_LOOKUP; 2933 2934 /* Call getattr by fid, so do not provide name at all. */ 2935 op_data = ll_prep_md_op_data(NULL, dentry->d_inode, 2936 dentry->d_inode, NULL, 0, 0, 2937 LUSTRE_OPC_ANY, NULL); 2938 if (IS_ERR(op_data)) 2939 return PTR_ERR(op_data); 2940 2941 oit.it_create_mode |= M_CHECK_STALE; 2942 rc = md_intent_lock(exp, op_data, NULL, 0, 2943 /* we are not interested in name 2944 based lookup */ 2945 &oit, 0, &req, 2946 ll_md_blocking_ast, 0); 2947 ll_finish_md_op_data(op_data); 2948 oit.it_create_mode &= ~M_CHECK_STALE; 2949 if (rc < 0) { 2950 rc = ll_inode_revalidate_fini(inode, rc); 2951 GOTO (out, rc); 2952 } 2953 2954 rc = ll_revalidate_it_finish(req, &oit, dentry); 2955 if (rc != 0) { 2956 ll_intent_release(&oit); 2957 GOTO(out, rc); 2958 } 2959 2960 /* Unlinked? Unhash dentry, so it is not picked up later by 2961 do_lookup() -> ll_revalidate_it(). We cannot use d_drop 2962 here to preserve get_cwd functionality on 2.6. 2963 Bug 10503 */ 2964 if (!dentry->d_inode->i_nlink) 2965 d_lustre_invalidate(dentry, 0); 2966 2967 ll_lookup_finish_locks(&oit, dentry); 2968 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) { 2969 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode); 2970 obd_valid valid = OBD_MD_FLGETATTR; 2971 struct md_op_data *op_data; 2972 int ealen = 0; 2973 2974 if (S_ISREG(inode->i_mode)) { 2975 rc = ll_get_default_mdsize(sbi, &ealen); 2976 if (rc) 2977 return rc; 2978 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE; 2979 } 2980 2981 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 2982 0, ealen, LUSTRE_OPC_ANY, 2983 NULL); 2984 if (IS_ERR(op_data)) 2985 return PTR_ERR(op_data); 2986 2987 op_data->op_valid = valid; 2988 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one 2989 * capa for this inode. Because we only keep capas of dirs 2990 * fresh. */ 2991 rc = md_getattr(sbi->ll_md_exp, op_data, &req); 2992 ll_finish_md_op_data(op_data); 2993 if (rc) { 2994 rc = ll_inode_revalidate_fini(inode, rc); 2995 return rc; 2996 } 2997 2998 rc = ll_prep_inode(&inode, req, NULL, NULL); 2999 } 3000out: 3001 ptlrpc_req_finished(req); 3002 return rc; 3003} 3004 3005static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits) 3006{ 3007 struct inode *inode = dentry->d_inode; 3008 int rc; 3009 3010 rc = __ll_inode_revalidate(dentry, ibits); 3011 if (rc != 0) 3012 return rc; 3013 3014 /* if object isn't regular file, don't validate size */ 3015 if (!S_ISREG(inode->i_mode)) { 3016 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime; 3017 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime; 3018 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime; 3019 } else { 3020 /* In case of restore, the MDT has the right size and has 3021 * already send it back without granting the layout lock, 3022 * inode is up-to-date so glimpse is useless. 3023 * Also to glimpse we need the layout, in case of a running 3024 * restore the MDT holds the layout lock so the glimpse will 3025 * block up to the end of restore (getattr will block) 3026 */ 3027 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING)) 3028 rc = ll_glimpse_size(inode); 3029 } 3030 return rc; 3031} 3032 3033int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) 3034{ 3035 struct inode *inode = de->d_inode; 3036 struct ll_sb_info *sbi = ll_i2sbi(inode); 3037 struct ll_inode_info *lli = ll_i2info(inode); 3038 int res = 0; 3039 3040 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE | 3041 MDS_INODELOCK_LOOKUP); 3042 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1); 3043 3044 if (res) 3045 return res; 3046 3047 stat->dev = inode->i_sb->s_dev; 3048 if (ll_need_32bit_api(sbi)) 3049 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1); 3050 else 3051 stat->ino = inode->i_ino; 3052 stat->mode = inode->i_mode; 3053 stat->nlink = inode->i_nlink; 3054 stat->uid = inode->i_uid; 3055 stat->gid = inode->i_gid; 3056 stat->rdev = inode->i_rdev; 3057 stat->atime = inode->i_atime; 3058 stat->mtime = inode->i_mtime; 3059 stat->ctime = inode->i_ctime; 3060 stat->blksize = 1 << inode->i_blkbits; 3061 3062 stat->size = i_size_read(inode); 3063 stat->blocks = inode->i_blocks; 3064 3065 return 0; 3066} 3067 3068static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 3069 __u64 start, __u64 len) 3070{ 3071 int rc; 3072 size_t num_bytes; 3073 struct ll_user_fiemap *fiemap; 3074 unsigned int extent_count = fieinfo->fi_extents_max; 3075 3076 num_bytes = sizeof(*fiemap) + (extent_count * 3077 sizeof(struct ll_fiemap_extent)); 3078 OBD_ALLOC_LARGE(fiemap, num_bytes); 3079 3080 if (fiemap == NULL) 3081 return -ENOMEM; 3082 3083 fiemap->fm_flags = fieinfo->fi_flags; 3084 fiemap->fm_extent_count = fieinfo->fi_extents_max; 3085 fiemap->fm_start = start; 3086 fiemap->fm_length = len; 3087 if (extent_count > 0) 3088 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start, 3089 sizeof(struct ll_fiemap_extent)); 3090 3091 rc = ll_do_fiemap(inode, fiemap, num_bytes); 3092 3093 fieinfo->fi_flags = fiemap->fm_flags; 3094 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents; 3095 if (extent_count > 0) 3096 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0], 3097 fiemap->fm_mapped_extents * 3098 sizeof(struct ll_fiemap_extent)); 3099 3100 OBD_FREE_LARGE(fiemap, num_bytes); 3101 return rc; 3102} 3103 3104struct posix_acl *ll_get_acl(struct inode *inode, int type) 3105{ 3106 struct ll_inode_info *lli = ll_i2info(inode); 3107 struct posix_acl *acl = NULL; 3108 3109 spin_lock(&lli->lli_lock); 3110 /* VFS' acl_permission_check->check_acl will release the refcount */ 3111 acl = posix_acl_dup(lli->lli_posix_acl); 3112 spin_unlock(&lli->lli_lock); 3113 3114 return acl; 3115} 3116 3117 3118int ll_inode_permission(struct inode *inode, int mask) 3119{ 3120 int rc = 0; 3121 3122#ifdef MAY_NOT_BLOCK 3123 if (mask & MAY_NOT_BLOCK) 3124 return -ECHILD; 3125#endif 3126 3127 /* as root inode are NOT getting validated in lookup operation, 3128 * need to do it before permission check. */ 3129 3130 if (inode == inode->i_sb->s_root->d_inode) { 3131 rc = __ll_inode_revalidate(inode->i_sb->s_root, 3132 MDS_INODELOCK_LOOKUP); 3133 if (rc) 3134 return rc; 3135 } 3136 3137 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n", 3138 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask); 3139 3140 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT) 3141 return lustre_check_remote_perm(inode, mask); 3142 3143 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1); 3144 rc = generic_permission(inode, mask); 3145 3146 return rc; 3147} 3148 3149/* -o localflock - only provides locally consistent flock locks */ 3150struct file_operations ll_file_operations = { 3151 .read = ll_file_read, 3152 .aio_read = ll_file_aio_read, 3153 .write = ll_file_write, 3154 .aio_write = ll_file_aio_write, 3155 .unlocked_ioctl = ll_file_ioctl, 3156 .open = ll_file_open, 3157 .release = ll_file_release, 3158 .mmap = ll_file_mmap, 3159 .llseek = ll_file_seek, 3160 .splice_read = ll_file_splice_read, 3161 .fsync = ll_fsync, 3162 .flush = ll_flush 3163}; 3164 3165struct file_operations ll_file_operations_flock = { 3166 .read = ll_file_read, 3167 .aio_read = ll_file_aio_read, 3168 .write = ll_file_write, 3169 .aio_write = ll_file_aio_write, 3170 .unlocked_ioctl = ll_file_ioctl, 3171 .open = ll_file_open, 3172 .release = ll_file_release, 3173 .mmap = ll_file_mmap, 3174 .llseek = ll_file_seek, 3175 .splice_read = ll_file_splice_read, 3176 .fsync = ll_fsync, 3177 .flush = ll_flush, 3178 .flock = ll_file_flock, 3179 .lock = ll_file_flock 3180}; 3181 3182/* These are for -o noflock - to return ENOSYS on flock calls */ 3183struct file_operations ll_file_operations_noflock = { 3184 .read = ll_file_read, 3185 .aio_read = ll_file_aio_read, 3186 .write = ll_file_write, 3187 .aio_write = ll_file_aio_write, 3188 .unlocked_ioctl = ll_file_ioctl, 3189 .open = ll_file_open, 3190 .release = ll_file_release, 3191 .mmap = ll_file_mmap, 3192 .llseek = ll_file_seek, 3193 .splice_read = ll_file_splice_read, 3194 .fsync = ll_fsync, 3195 .flush = ll_flush, 3196 .flock = ll_file_noflock, 3197 .lock = ll_file_noflock 3198}; 3199 3200struct inode_operations ll_file_inode_operations = { 3201 .setattr = ll_setattr, 3202 .getattr = ll_getattr, 3203 .permission = ll_inode_permission, 3204 .setxattr = ll_setxattr, 3205 .getxattr = ll_getxattr, 3206 .listxattr = ll_listxattr, 3207 .removexattr = ll_removexattr, 3208 .fiemap = ll_fiemap, 3209 .get_acl = ll_get_acl, 3210}; 3211 3212/* dynamic ioctl number support routines */ 3213static struct llioc_ctl_data { 3214 struct rw_semaphore ioc_sem; 3215 struct list_head ioc_head; 3216} llioc = { 3217 __RWSEM_INITIALIZER(llioc.ioc_sem), 3218 LIST_HEAD_INIT(llioc.ioc_head) 3219}; 3220 3221 3222struct llioc_data { 3223 struct list_head iocd_list; 3224 unsigned int iocd_size; 3225 llioc_callback_t iocd_cb; 3226 unsigned int iocd_count; 3227 unsigned int iocd_cmd[0]; 3228}; 3229 3230void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd) 3231{ 3232 unsigned int size; 3233 struct llioc_data *in_data = NULL; 3234 3235 if (cb == NULL || cmd == NULL || 3236 count > LLIOC_MAX_CMD || count < 0) 3237 return NULL; 3238 3239 size = sizeof(*in_data) + count * sizeof(unsigned int); 3240 OBD_ALLOC(in_data, size); 3241 if (in_data == NULL) 3242 return NULL; 3243 3244 memset(in_data, 0, sizeof(*in_data)); 3245 in_data->iocd_size = size; 3246 in_data->iocd_cb = cb; 3247 in_data->iocd_count = count; 3248 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count); 3249 3250 down_write(&llioc.ioc_sem); 3251 list_add_tail(&in_data->iocd_list, &llioc.ioc_head); 3252 up_write(&llioc.ioc_sem); 3253 3254 return in_data; 3255} 3256 3257void ll_iocontrol_unregister(void *magic) 3258{ 3259 struct llioc_data *tmp; 3260 3261 if (magic == NULL) 3262 return; 3263 3264 down_write(&llioc.ioc_sem); 3265 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) { 3266 if (tmp == magic) { 3267 unsigned int size = tmp->iocd_size; 3268 3269 list_del(&tmp->iocd_list); 3270 up_write(&llioc.ioc_sem); 3271 3272 OBD_FREE(tmp, size); 3273 return; 3274 } 3275 } 3276 up_write(&llioc.ioc_sem); 3277 3278 CWARN("didn't find iocontrol register block with magic: %p\n", magic); 3279} 3280 3281EXPORT_SYMBOL(ll_iocontrol_register); 3282EXPORT_SYMBOL(ll_iocontrol_unregister); 3283 3284static enum llioc_iter 3285ll_iocontrol_call(struct inode *inode, struct file *file, 3286 unsigned int cmd, unsigned long arg, int *rcp) 3287{ 3288 enum llioc_iter ret = LLIOC_CONT; 3289 struct llioc_data *data; 3290 int rc = -EINVAL, i; 3291 3292 down_read(&llioc.ioc_sem); 3293 list_for_each_entry(data, &llioc.ioc_head, iocd_list) { 3294 for (i = 0; i < data->iocd_count; i++) { 3295 if (cmd != data->iocd_cmd[i]) 3296 continue; 3297 3298 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc); 3299 break; 3300 } 3301 3302 if (ret == LLIOC_STOP) 3303 break; 3304 } 3305 up_read(&llioc.ioc_sem); 3306 3307 if (rcp) 3308 *rcp = rc; 3309 return ret; 3310} 3311 3312int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf) 3313{ 3314 struct ll_inode_info *lli = ll_i2info(inode); 3315 struct cl_env_nest nest; 3316 struct lu_env *env; 3317 int result; 3318 3319 if (lli->lli_clob == NULL) 3320 return 0; 3321 3322 env = cl_env_nested_get(&nest); 3323 if (IS_ERR(env)) 3324 return PTR_ERR(env); 3325 3326 result = cl_conf_set(env, lli->lli_clob, conf); 3327 cl_env_nested_put(&nest, env); 3328 3329 if (conf->coc_opc == OBJECT_CONF_SET) { 3330 struct ldlm_lock *lock = conf->coc_lock; 3331 3332 LASSERT(lock != NULL); 3333 LASSERT(ldlm_has_layout(lock)); 3334 if (result == 0) { 3335 /* it can only be allowed to match after layout is 3336 * applied to inode otherwise false layout would be 3337 * seen. Applying layout should happen before dropping 3338 * the intent lock. */ 3339 ldlm_lock_allow_match(lock); 3340 } 3341 } 3342 return result; 3343} 3344 3345/* Fetch layout from MDT with getxattr request, if it's not ready yet */ 3346static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) 3347 3348{ 3349 struct ll_sb_info *sbi = ll_i2sbi(inode); 3350 struct obd_capa *oc; 3351 struct ptlrpc_request *req; 3352 struct mdt_body *body; 3353 void *lvbdata; 3354 void *lmm; 3355 int lmmsize; 3356 int rc; 3357 3358 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n", 3359 PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY), 3360 lock->l_lvb_data, lock->l_lvb_len); 3361 3362 if ((lock->l_lvb_data != NULL) && (lock->l_flags & LDLM_FL_LVB_READY)) 3363 return 0; 3364 3365 /* if layout lock was granted right away, the layout is returned 3366 * within DLM_LVB of dlm reply; otherwise if the lock was ever 3367 * blocked and then granted via completion ast, we have to fetch 3368 * layout here. Please note that we can't use the LVB buffer in 3369 * completion AST because it doesn't have a large enough buffer */ 3370 oc = ll_mdscapa_get(inode); 3371 rc = ll_get_default_mdsize(sbi, &lmmsize); 3372 if (rc == 0) 3373 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, 3374 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0, 3375 lmmsize, 0, &req); 3376 capa_put(oc); 3377 if (rc < 0) 3378 return rc; 3379 3380 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 3381 if (body == NULL) 3382 GOTO(out, rc = -EPROTO); 3383 3384 lmmsize = body->eadatasize; 3385 if (lmmsize == 0) /* empty layout */ 3386 GOTO(out, rc = 0); 3387 3388 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize); 3389 if (lmm == NULL) 3390 GOTO(out, rc = -EFAULT); 3391 3392 OBD_ALLOC_LARGE(lvbdata, lmmsize); 3393 if (lvbdata == NULL) 3394 GOTO(out, rc = -ENOMEM); 3395 3396 memcpy(lvbdata, lmm, lmmsize); 3397 lock_res_and_lock(lock); 3398 if (lock->l_lvb_data != NULL) 3399 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len); 3400 3401 lock->l_lvb_data = lvbdata; 3402 lock->l_lvb_len = lmmsize; 3403 unlock_res_and_lock(lock); 3404 3405out: 3406 ptlrpc_req_finished(req); 3407 return rc; 3408} 3409 3410/** 3411 * Apply the layout to the inode. Layout lock is held and will be released 3412 * in this function. 3413 */ 3414static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode, 3415 struct inode *inode, __u32 *gen, bool reconf) 3416{ 3417 struct ll_inode_info *lli = ll_i2info(inode); 3418 struct ll_sb_info *sbi = ll_i2sbi(inode); 3419 struct ldlm_lock *lock; 3420 struct lustre_md md = { NULL }; 3421 struct cl_object_conf conf; 3422 int rc = 0; 3423 bool lvb_ready; 3424 bool wait_layout = false; 3425 3426 LASSERT(lustre_handle_is_used(lockh)); 3427 3428 lock = ldlm_handle2lock(lockh); 3429 LASSERT(lock != NULL); 3430 LASSERT(ldlm_has_layout(lock)); 3431 3432 LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n", 3433 inode, PFID(&lli->lli_fid), reconf); 3434 3435 /* in case this is a caching lock and reinstate with new inode */ 3436 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL); 3437 3438 lock_res_and_lock(lock); 3439 lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY); 3440 unlock_res_and_lock(lock); 3441 /* checking lvb_ready is racy but this is okay. The worst case is 3442 * that multi processes may configure the file on the same time. */ 3443 if (lvb_ready || !reconf) { 3444 rc = -ENODATA; 3445 if (lvb_ready) { 3446 /* layout_gen must be valid if layout lock is not 3447 * cancelled and stripe has already set */ 3448 *gen = ll_layout_version_get(lli); 3449 rc = 0; 3450 } 3451 GOTO(out, rc); 3452 } 3453 3454 rc = ll_layout_fetch(inode, lock); 3455 if (rc < 0) 3456 GOTO(out, rc); 3457 3458 /* for layout lock, lmm is returned in lock's lvb. 3459 * lvb_data is immutable if the lock is held so it's safe to access it 3460 * without res lock. See the description in ldlm_lock_decref_internal() 3461 * for the condition to free lvb_data of layout lock */ 3462 if (lock->l_lvb_data != NULL) { 3463 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm, 3464 lock->l_lvb_data, lock->l_lvb_len); 3465 if (rc >= 0) { 3466 *gen = LL_LAYOUT_GEN_EMPTY; 3467 if (md.lsm != NULL) 3468 *gen = md.lsm->lsm_layout_gen; 3469 rc = 0; 3470 } else { 3471 CERROR("%s: file "DFID" unpackmd error: %d\n", 3472 ll_get_fsname(inode->i_sb, NULL, 0), 3473 PFID(&lli->lli_fid), rc); 3474 } 3475 } 3476 if (rc < 0) 3477 GOTO(out, rc); 3478 3479 /* set layout to file. Unlikely this will fail as old layout was 3480 * surely eliminated */ 3481 memset(&conf, 0, sizeof(conf)); 3482 conf.coc_opc = OBJECT_CONF_SET; 3483 conf.coc_inode = inode; 3484 conf.coc_lock = lock; 3485 conf.u.coc_md = &md; 3486 rc = ll_layout_conf(inode, &conf); 3487 3488 if (md.lsm != NULL) 3489 obd_free_memmd(sbi->ll_dt_exp, &md.lsm); 3490 3491 /* refresh layout failed, need to wait */ 3492 wait_layout = rc == -EBUSY; 3493 3494out: 3495 LDLM_LOCK_PUT(lock); 3496 ldlm_lock_decref(lockh, mode); 3497 3498 /* wait for IO to complete if it's still being used. */ 3499 if (wait_layout) { 3500 CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n", 3501 ll_get_fsname(inode->i_sb, NULL, 0), 3502 inode, PFID(&lli->lli_fid)); 3503 3504 memset(&conf, 0, sizeof(conf)); 3505 conf.coc_opc = OBJECT_CONF_WAIT; 3506 conf.coc_inode = inode; 3507 rc = ll_layout_conf(inode, &conf); 3508 if (rc == 0) 3509 rc = -EAGAIN; 3510 3511 CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n", 3512 PFID(&lli->lli_fid), rc); 3513 } 3514 return rc; 3515} 3516 3517/** 3518 * This function checks if there exists a LAYOUT lock on the client side, 3519 * or enqueues it if it doesn't have one in cache. 3520 * 3521 * This function will not hold layout lock so it may be revoked any time after 3522 * this function returns. Any operations depend on layout should be redone 3523 * in that case. 3524 * 3525 * This function should be called before lov_io_init() to get an uptodate 3526 * layout version, the caller should save the version number and after IO 3527 * is finished, this function should be called again to verify that layout 3528 * is not changed during IO time. 3529 */ 3530int ll_layout_refresh(struct inode *inode, __u32 *gen) 3531{ 3532 struct ll_inode_info *lli = ll_i2info(inode); 3533 struct ll_sb_info *sbi = ll_i2sbi(inode); 3534 struct md_op_data *op_data; 3535 struct lookup_intent it; 3536 struct lustre_handle lockh; 3537 ldlm_mode_t mode; 3538 struct ldlm_enqueue_info einfo = { 3539 .ei_type = LDLM_IBITS, 3540 .ei_mode = LCK_CR, 3541 .ei_cb_bl = ll_md_blocking_ast, 3542 .ei_cb_cp = ldlm_completion_ast, 3543 }; 3544 int rc; 3545 3546 *gen = ll_layout_version_get(lli); 3547 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != LL_LAYOUT_GEN_NONE) 3548 return 0; 3549 3550 /* sanity checks */ 3551 LASSERT(fid_is_sane(ll_inode2fid(inode))); 3552 LASSERT(S_ISREG(inode->i_mode)); 3553 3554 /* take layout lock mutex to enqueue layout lock exclusively. */ 3555 mutex_lock(&lli->lli_layout_mutex); 3556 3557again: 3558 /* mostly layout lock is caching on the local side, so try to match 3559 * it before grabbing layout lock mutex. */ 3560 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0, 3561 LCK_CR | LCK_CW | LCK_PR | LCK_PW); 3562 if (mode != 0) { /* hit cached lock */ 3563 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true); 3564 if (rc == -EAGAIN) 3565 goto again; 3566 3567 mutex_unlock(&lli->lli_layout_mutex); 3568 return rc; 3569 } 3570 3571 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 3572 0, 0, LUSTRE_OPC_ANY, NULL); 3573 if (IS_ERR(op_data)) { 3574 mutex_unlock(&lli->lli_layout_mutex); 3575 return PTR_ERR(op_data); 3576 } 3577 3578 /* have to enqueue one */ 3579 memset(&it, 0, sizeof(it)); 3580 it.it_op = IT_LAYOUT; 3581 lockh.cookie = 0ULL; 3582 3583 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n", 3584 ll_get_fsname(inode->i_sb, NULL, 0), inode, 3585 PFID(&lli->lli_fid)); 3586 3587 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh, 3588 NULL, 0, NULL, 0); 3589 if (it.d.lustre.it_data != NULL) 3590 ptlrpc_req_finished(it.d.lustre.it_data); 3591 it.d.lustre.it_data = NULL; 3592 3593 ll_finish_md_op_data(op_data); 3594 3595 mode = it.d.lustre.it_lock_mode; 3596 it.d.lustre.it_lock_mode = 0; 3597 ll_intent_drop_lock(&it); 3598 3599 if (rc == 0) { 3600 /* set lock data in case this is a new lock */ 3601 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL); 3602 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true); 3603 if (rc == -EAGAIN) 3604 goto again; 3605 } 3606 mutex_unlock(&lli->lli_layout_mutex); 3607 3608 return rc; 3609} 3610 3611/** 3612 * This function send a restore request to the MDT 3613 */ 3614int ll_layout_restore(struct inode *inode) 3615{ 3616 struct hsm_user_request *hur; 3617 int len, rc; 3618 3619 len = sizeof(struct hsm_user_request) + 3620 sizeof(struct hsm_user_item); 3621 OBD_ALLOC(hur, len); 3622 if (hur == NULL) 3623 return -ENOMEM; 3624 3625 hur->hur_request.hr_action = HUA_RESTORE; 3626 hur->hur_request.hr_archive_id = 0; 3627 hur->hur_request.hr_flags = 0; 3628 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid, 3629 sizeof(hur->hur_user_item[0].hui_fid)); 3630 hur->hur_user_item[0].hui_extent.length = -1; 3631 hur->hur_request.hr_itemcount = 1; 3632 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp, 3633 len, hur, NULL); 3634 OBD_FREE(hur, len); 3635 return rc; 3636} 3637