file.c revision a720b790627c2e840f7eb58cf53fefc0428cc758
1/* 2 * GPL HEADER START 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 only, 8 * as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License version 2 for more details (a copy is included 14 * in the LICENSE file that accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License 17 * version 2 along with this program; If not, see 18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf 19 * 20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 21 * CA 95054 USA or visit www.sun.com if you need additional information or 22 * have any questions. 23 * 24 * GPL HEADER END 25 */ 26/* 27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 28 * Use is subject to license terms. 29 * 30 * Copyright (c) 2011, 2012, Intel Corporation. 31 */ 32/* 33 * This file is part of Lustre, http://www.lustre.org/ 34 * Lustre is a trademark of Sun Microsystems, Inc. 35 * 36 * lustre/llite/file.c 37 * 38 * Author: Peter Braam <braam@clusterfs.com> 39 * Author: Phil Schwan <phil@clusterfs.com> 40 * Author: Andreas Dilger <adilger@clusterfs.com> 41 */ 42 43#define DEBUG_SUBSYSTEM S_LLITE 44#include <lustre_dlm.h> 45#include <lustre_lite.h> 46#include <linux/pagemap.h> 47#include <linux/file.h> 48#include "llite_internal.h" 49#include <lustre/ll_fiemap.h> 50 51#include "cl_object.h" 52 53struct ll_file_data *ll_file_data_get(void) 54{ 55 struct ll_file_data *fd; 56 57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO); 58 if (fd == NULL) 59 return NULL; 60 fd->fd_write_failed = false; 61 return fd; 62} 63 64static void ll_file_data_put(struct ll_file_data *fd) 65{ 66 if (fd != NULL) 67 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab); 68} 69 70void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, 71 struct lustre_handle *fh) 72{ 73 op_data->op_fid1 = ll_i2info(inode)->lli_fid; 74 op_data->op_attr.ia_mode = inode->i_mode; 75 op_data->op_attr.ia_atime = inode->i_atime; 76 op_data->op_attr.ia_mtime = inode->i_mtime; 77 op_data->op_attr.ia_ctime = inode->i_ctime; 78 op_data->op_attr.ia_size = i_size_read(inode); 79 op_data->op_attr_blocks = inode->i_blocks; 80 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = 81 ll_inode_to_ext_flags(inode->i_flags); 82 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch; 83 if (fh) 84 op_data->op_handle = *fh; 85 op_data->op_capa1 = ll_mdscapa_get(inode); 86 87 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags) 88 op_data->op_bias |= MDS_DATA_MODIFIED; 89} 90 91/** 92 * Closes the IO epoch and packs all the attributes into @op_data for 93 * the CLOSE rpc. 94 */ 95static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data, 96 struct obd_client_handle *och) 97{ 98 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET | 99 ATTR_MTIME | ATTR_MTIME_SET | 100 ATTR_CTIME | ATTR_CTIME_SET; 101 102 if (!(och->och_flags & FMODE_WRITE)) 103 goto out; 104 105 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode)) 106 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; 107 else 108 ll_ioepoch_close(inode, op_data, &och, 0); 109 110out: 111 ll_pack_inode2opdata(inode, op_data, &och->och_fh); 112 ll_prep_md_op_data(op_data, inode, NULL, NULL, 113 0, 0, LUSTRE_OPC_ANY, NULL); 114} 115 116static int ll_close_inode_openhandle(struct obd_export *md_exp, 117 struct inode *inode, 118 struct obd_client_handle *och, 119 const __u64 *data_version) 120{ 121 struct obd_export *exp = ll_i2mdexp(inode); 122 struct md_op_data *op_data; 123 struct ptlrpc_request *req = NULL; 124 struct obd_device *obd = class_exp2obd(exp); 125 int epoch_close = 1; 126 int rc; 127 128 if (obd == NULL) { 129 /* 130 * XXX: in case of LMV, is this correct to access 131 * ->exp_handle? 132 */ 133 CERROR("Invalid MDC connection handle "LPX64"\n", 134 ll_i2mdexp(inode)->exp_handle.h_cookie); 135 GOTO(out, rc = 0); 136 } 137 138 OBD_ALLOC_PTR(op_data); 139 if (op_data == NULL) 140 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here. 141 142 ll_prepare_close(inode, op_data, och); 143 if (data_version != NULL) { 144 /* Pass in data_version implies release. */ 145 op_data->op_bias |= MDS_HSM_RELEASE; 146 op_data->op_data_version = *data_version; 147 op_data->op_lease_handle = och->och_lease_handle; 148 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; 149 } 150 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE); 151 rc = md_close(md_exp, op_data, och->och_mod, &req); 152 if (rc == -EAGAIN) { 153 /* This close must have the epoch closed. */ 154 LASSERT(epoch_close); 155 /* MDS has instructed us to obtain Size-on-MDS attribute from 156 * OSTs and send setattr to back to MDS. */ 157 rc = ll_som_update(inode, op_data); 158 if (rc) { 159 CERROR("inode %lu mdc Size-on-MDS update failed: " 160 "rc = %d\n", inode->i_ino, rc); 161 rc = 0; 162 } 163 } else if (rc) { 164 CERROR("inode %lu mdc close failed: rc = %d\n", 165 inode->i_ino, rc); 166 } 167 168 /* DATA_MODIFIED flag was successfully sent on close, cancel data 169 * modification flag. */ 170 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) { 171 struct ll_inode_info *lli = ll_i2info(inode); 172 173 spin_lock(&lli->lli_lock); 174 lli->lli_flags &= ~LLIF_DATA_MODIFIED; 175 spin_unlock(&lli->lli_lock); 176 } 177 178 if (rc == 0) { 179 rc = ll_objects_destroy(req, inode); 180 if (rc) 181 CERROR("inode %lu ll_objects destroy: rc = %d\n", 182 inode->i_ino, rc); 183 } 184 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) { 185 struct mdt_body *body; 186 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 187 if (!(body->valid & OBD_MD_FLRELEASED)) 188 rc = -EBUSY; 189 } 190 191 ll_finish_md_op_data(op_data); 192 193out: 194 if (exp_connect_som(exp) && !epoch_close && 195 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) { 196 ll_queue_done_writing(inode, LLIF_DONE_WRITING); 197 } else { 198 md_clear_open_replay_data(md_exp, och); 199 /* Free @och if it is not waiting for DONE_WRITING. */ 200 och->och_fh.cookie = DEAD_HANDLE_MAGIC; 201 OBD_FREE_PTR(och); 202 } 203 if (req) /* This is close request */ 204 ptlrpc_req_finished(req); 205 return rc; 206} 207 208int ll_md_real_close(struct inode *inode, int flags) 209{ 210 struct ll_inode_info *lli = ll_i2info(inode); 211 struct obd_client_handle **och_p; 212 struct obd_client_handle *och; 213 __u64 *och_usecount; 214 int rc = 0; 215 216 if (flags & FMODE_WRITE) { 217 och_p = &lli->lli_mds_write_och; 218 och_usecount = &lli->lli_open_fd_write_count; 219 } else if (flags & FMODE_EXEC) { 220 och_p = &lli->lli_mds_exec_och; 221 och_usecount = &lli->lli_open_fd_exec_count; 222 } else { 223 LASSERT(flags & FMODE_READ); 224 och_p = &lli->lli_mds_read_och; 225 och_usecount = &lli->lli_open_fd_read_count; 226 } 227 228 mutex_lock(&lli->lli_och_mutex); 229 if (*och_usecount) { /* There are still users of this handle, so 230 skip freeing it. */ 231 mutex_unlock(&lli->lli_och_mutex); 232 return 0; 233 } 234 och=*och_p; 235 *och_p = NULL; 236 mutex_unlock(&lli->lli_och_mutex); 237 238 if (och) { /* There might be a race and somebody have freed this och 239 already */ 240 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, 241 inode, och, NULL); 242 } 243 244 return rc; 245} 246 247int ll_md_close(struct obd_export *md_exp, struct inode *inode, 248 struct file *file) 249{ 250 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 251 struct ll_inode_info *lli = ll_i2info(inode); 252 int rc = 0; 253 254 /* clear group lock, if present */ 255 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) 256 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid); 257 258 if (fd->fd_lease_och != NULL) { 259 bool lease_broken; 260 261 /* Usually the lease is not released when the 262 * application crashed, we need to release here. */ 263 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken); 264 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n", 265 PFID(&lli->lli_fid), rc, lease_broken); 266 267 fd->fd_lease_och = NULL; 268 } 269 270 if (fd->fd_och != NULL) { 271 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL); 272 fd->fd_och = NULL; 273 GOTO(out, rc); 274 } 275 276 /* Let's see if we have good enough OPEN lock on the file and if 277 we can skip talking to MDS */ 278 if (file->f_dentry->d_inode) { /* Can this ever be false? */ 279 int lockmode; 280 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK; 281 struct lustre_handle lockh; 282 struct inode *inode = file->f_dentry->d_inode; 283 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}}; 284 285 mutex_lock(&lli->lli_och_mutex); 286 if (fd->fd_omode & FMODE_WRITE) { 287 lockmode = LCK_CW; 288 LASSERT(lli->lli_open_fd_write_count); 289 lli->lli_open_fd_write_count--; 290 } else if (fd->fd_omode & FMODE_EXEC) { 291 lockmode = LCK_PR; 292 LASSERT(lli->lli_open_fd_exec_count); 293 lli->lli_open_fd_exec_count--; 294 } else { 295 lockmode = LCK_CR; 296 LASSERT(lli->lli_open_fd_read_count); 297 lli->lli_open_fd_read_count--; 298 } 299 mutex_unlock(&lli->lli_och_mutex); 300 301 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode), 302 LDLM_IBITS, &policy, lockmode, 303 &lockh)) { 304 rc = ll_md_real_close(file->f_dentry->d_inode, 305 fd->fd_omode); 306 } 307 } else { 308 CERROR("Releasing a file %p with negative dentry %p. Name %s", 309 file, file->f_dentry, file->f_dentry->d_name.name); 310 } 311 312out: 313 LUSTRE_FPRIVATE(file) = NULL; 314 ll_file_data_put(fd); 315 ll_capa_close(inode); 316 317 return rc; 318} 319 320/* While this returns an error code, fput() the caller does not, so we need 321 * to make every effort to clean up all of our state here. Also, applications 322 * rarely check close errors and even if an error is returned they will not 323 * re-try the close call. 324 */ 325int ll_file_release(struct inode *inode, struct file *file) 326{ 327 struct ll_file_data *fd; 328 struct ll_sb_info *sbi = ll_i2sbi(inode); 329 struct ll_inode_info *lli = ll_i2info(inode); 330 int rc; 331 332 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, 333 inode->i_generation, inode); 334 335#ifdef CONFIG_FS_POSIX_ACL 336 if (sbi->ll_flags & LL_SBI_RMT_CLIENT && 337 inode == inode->i_sb->s_root->d_inode) { 338 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 339 340 LASSERT(fd != NULL); 341 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) { 342 fd->fd_flags &= ~LL_FILE_RMTACL; 343 rct_del(&sbi->ll_rct, current_pid()); 344 et_search_free(&sbi->ll_et, current_pid()); 345 } 346 } 347#endif 348 349 if (inode->i_sb->s_root != file->f_dentry) 350 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1); 351 fd = LUSTRE_FPRIVATE(file); 352 LASSERT(fd != NULL); 353 354 /* The last ref on @file, maybe not the the owner pid of statahead. 355 * Different processes can open the same dir, "ll_opendir_key" means: 356 * it is me that should stop the statahead thread. */ 357 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd && 358 lli->lli_opendir_pid != 0) 359 ll_stop_statahead(inode, lli->lli_opendir_key); 360 361 if (inode->i_sb->s_root == file->f_dentry) { 362 LUSTRE_FPRIVATE(file) = NULL; 363 ll_file_data_put(fd); 364 return 0; 365 } 366 367 if (!S_ISDIR(inode->i_mode)) { 368 lov_read_and_clear_async_rc(lli->lli_clob); 369 lli->lli_async_rc = 0; 370 } 371 372 rc = ll_md_close(sbi->ll_md_exp, inode, file); 373 374 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val)) 375 libcfs_debug_dumplog(); 376 377 return rc; 378} 379 380static int ll_intent_file_open(struct file *file, void *lmm, 381 int lmmsize, struct lookup_intent *itp) 382{ 383 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode); 384 struct dentry *parent = file->f_dentry->d_parent; 385 const char *name = file->f_dentry->d_name.name; 386 const int len = file->f_dentry->d_name.len; 387 struct md_op_data *op_data; 388 struct ptlrpc_request *req; 389 __u32 opc = LUSTRE_OPC_ANY; 390 int rc; 391 392 if (!parent) 393 return -ENOENT; 394 395 /* Usually we come here only for NFSD, and we want open lock. 396 But we can also get here with pre 2.6.15 patchless kernels, and in 397 that case that lock is also ok */ 398 /* We can also get here if there was cached open handle in revalidate_it 399 * but it disappeared while we were getting from there to ll_file_open. 400 * But this means this file was closed and immediately opened which 401 * makes a good candidate for using OPEN lock */ 402 /* If lmmsize & lmm are not 0, we are just setting stripe info 403 * parameters. No need for the open lock */ 404 if (lmm == NULL && lmmsize == 0) { 405 itp->it_flags |= MDS_OPEN_LOCK; 406 if (itp->it_flags & FMODE_WRITE) 407 opc = LUSTRE_OPC_CREATE; 408 } 409 410 op_data = ll_prep_md_op_data(NULL, parent->d_inode, 411 file->f_dentry->d_inode, name, len, 412 O_RDWR, opc, NULL); 413 if (IS_ERR(op_data)) 414 return PTR_ERR(op_data); 415 416 itp->it_flags |= MDS_OPEN_BY_FID; 417 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp, 418 0 /*unused */, &req, ll_md_blocking_ast, 0); 419 ll_finish_md_op_data(op_data); 420 if (rc == -ESTALE) { 421 /* reason for keep own exit path - don`t flood log 422 * with messages with -ESTALE errors. 423 */ 424 if (!it_disposition(itp, DISP_OPEN_OPEN) || 425 it_open_error(DISP_OPEN_OPEN, itp)) 426 GOTO(out, rc); 427 ll_release_openhandle(file->f_dentry, itp); 428 GOTO(out, rc); 429 } 430 431 if (it_disposition(itp, DISP_LOOKUP_NEG)) 432 GOTO(out, rc = -ENOENT); 433 434 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) { 435 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp); 436 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc); 437 GOTO(out, rc); 438 } 439 440 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp); 441 if (!rc && itp->d.lustre.it_lock_mode) 442 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode, 443 itp, NULL); 444 445out: 446 ptlrpc_req_finished(itp->d.lustre.it_data); 447 it_clear_disposition(itp, DISP_ENQ_COMPLETE); 448 ll_intent_drop_lock(itp); 449 450 return rc; 451} 452 453/** 454 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does 455 * not believe attributes if a few ioepoch holders exist. Attributes for 456 * previous ioepoch if new one is opened are also skipped by MDS. 457 */ 458void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch) 459{ 460 if (ioepoch && lli->lli_ioepoch != ioepoch) { 461 lli->lli_ioepoch = ioepoch; 462 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n", 463 ioepoch, PFID(&lli->lli_fid)); 464 } 465} 466 467static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it, 468 struct obd_client_handle *och) 469{ 470 struct ptlrpc_request *req = it->d.lustre.it_data; 471 struct mdt_body *body; 472 473 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 474 och->och_fh = body->handle; 475 och->och_fid = body->fid1; 476 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle; 477 och->och_magic = OBD_CLIENT_HANDLE_MAGIC; 478 och->och_flags = it->it_flags; 479 480 return md_set_open_replay_data(md_exp, och, req); 481} 482 483int ll_local_open(struct file *file, struct lookup_intent *it, 484 struct ll_file_data *fd, struct obd_client_handle *och) 485{ 486 struct inode *inode = file->f_dentry->d_inode; 487 struct ll_inode_info *lli = ll_i2info(inode); 488 489 LASSERT(!LUSTRE_FPRIVATE(file)); 490 491 LASSERT(fd != NULL); 492 493 if (och) { 494 struct ptlrpc_request *req = it->d.lustre.it_data; 495 struct mdt_body *body; 496 int rc; 497 498 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och); 499 if (rc != 0) 500 return rc; 501 502 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 503 ll_ioepoch_open(lli, body->ioepoch); 504 } 505 506 LUSTRE_FPRIVATE(file) = fd; 507 ll_readahead_init(inode, &fd->fd_ras); 508 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); 509 return 0; 510} 511 512/* Open a file, and (for the very first open) create objects on the OSTs at 513 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object 514 * creation or open until ll_lov_setstripe() ioctl is called. 515 * 516 * If we already have the stripe MD locally then we don't request it in 517 * md_open(), by passing a lmm_size = 0. 518 * 519 * It is up to the application to ensure no other processes open this file 520 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be 521 * used. We might be able to avoid races of that sort by getting lli_open_sem 522 * before returning in the O_LOV_DELAY_CREATE case and dropping it here 523 * or in ll_file_release(), but I'm not sure that is desirable/necessary. 524 */ 525int ll_file_open(struct inode *inode, struct file *file) 526{ 527 struct ll_inode_info *lli = ll_i2info(inode); 528 struct lookup_intent *it, oit = { .it_op = IT_OPEN, 529 .it_flags = file->f_flags }; 530 struct obd_client_handle **och_p = NULL; 531 __u64 *och_usecount = NULL; 532 struct ll_file_data *fd; 533 int rc = 0, opendir_set = 0; 534 535 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino, 536 inode->i_generation, inode, file->f_flags); 537 538 it = file->private_data; /* XXX: compat macro */ 539 file->private_data = NULL; /* prevent ll_local_open assertion */ 540 541 fd = ll_file_data_get(); 542 if (fd == NULL) 543 GOTO(out_openerr, rc = -ENOMEM); 544 545 fd->fd_file = file; 546 if (S_ISDIR(inode->i_mode)) { 547 spin_lock(&lli->lli_sa_lock); 548 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL && 549 lli->lli_opendir_pid == 0) { 550 lli->lli_opendir_key = fd; 551 lli->lli_opendir_pid = current_pid(); 552 opendir_set = 1; 553 } 554 spin_unlock(&lli->lli_sa_lock); 555 } 556 557 if (inode->i_sb->s_root == file->f_dentry) { 558 LUSTRE_FPRIVATE(file) = fd; 559 return 0; 560 } 561 562 if (!it || !it->d.lustre.it_disposition) { 563 /* Convert f_flags into access mode. We cannot use file->f_mode, 564 * because everything but O_ACCMODE mask was stripped from 565 * there */ 566 if ((oit.it_flags + 1) & O_ACCMODE) 567 oit.it_flags++; 568 if (file->f_flags & O_TRUNC) 569 oit.it_flags |= FMODE_WRITE; 570 571 /* kernel only call f_op->open in dentry_open. filp_open calls 572 * dentry_open after call to open_namei that checks permissions. 573 * Only nfsd_open call dentry_open directly without checking 574 * permissions and because of that this code below is safe. */ 575 if (oit.it_flags & (FMODE_WRITE | FMODE_READ)) 576 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE; 577 578 /* We do not want O_EXCL here, presumably we opened the file 579 * already? XXX - NFS implications? */ 580 oit.it_flags &= ~O_EXCL; 581 582 /* bug20584, if "it_flags" contains O_CREAT, the file will be 583 * created if necessary, then "IT_CREAT" should be set to keep 584 * consistent with it */ 585 if (oit.it_flags & O_CREAT) 586 oit.it_op |= IT_CREAT; 587 588 it = &oit; 589 } 590 591restart: 592 /* Let's see if we have file open on MDS already. */ 593 if (it->it_flags & FMODE_WRITE) { 594 och_p = &lli->lli_mds_write_och; 595 och_usecount = &lli->lli_open_fd_write_count; 596 } else if (it->it_flags & FMODE_EXEC) { 597 och_p = &lli->lli_mds_exec_och; 598 och_usecount = &lli->lli_open_fd_exec_count; 599 } else { 600 och_p = &lli->lli_mds_read_och; 601 och_usecount = &lli->lli_open_fd_read_count; 602 } 603 604 mutex_lock(&lli->lli_och_mutex); 605 if (*och_p) { /* Open handle is present */ 606 if (it_disposition(it, DISP_OPEN_OPEN)) { 607 /* Well, there's extra open request that we do not need, 608 let's close it somehow. This will decref request. */ 609 rc = it_open_error(DISP_OPEN_OPEN, it); 610 if (rc) { 611 mutex_unlock(&lli->lli_och_mutex); 612 GOTO(out_openerr, rc); 613 } 614 615 ll_release_openhandle(file->f_dentry, it); 616 } 617 (*och_usecount)++; 618 619 rc = ll_local_open(file, it, fd, NULL); 620 if (rc) { 621 (*och_usecount)--; 622 mutex_unlock(&lli->lli_och_mutex); 623 GOTO(out_openerr, rc); 624 } 625 } else { 626 LASSERT(*och_usecount == 0); 627 if (!it->d.lustre.it_disposition) { 628 /* We cannot just request lock handle now, new ELC code 629 means that one of other OPEN locks for this file 630 could be cancelled, and since blocking ast handler 631 would attempt to grab och_mutex as well, that would 632 result in a deadlock */ 633 mutex_unlock(&lli->lli_och_mutex); 634 it->it_create_mode |= M_CHECK_STALE; 635 rc = ll_intent_file_open(file, NULL, 0, it); 636 it->it_create_mode &= ~M_CHECK_STALE; 637 if (rc) 638 GOTO(out_openerr, rc); 639 640 goto restart; 641 } 642 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle)); 643 if (!*och_p) 644 GOTO(out_och_free, rc = -ENOMEM); 645 646 (*och_usecount)++; 647 648 /* md_intent_lock() didn't get a request ref if there was an 649 * open error, so don't do cleanup on the request here 650 * (bug 3430) */ 651 /* XXX (green): Should not we bail out on any error here, not 652 * just open error? */ 653 rc = it_open_error(DISP_OPEN_OPEN, it); 654 if (rc) 655 GOTO(out_och_free, rc); 656 657 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF)); 658 659 rc = ll_local_open(file, it, fd, *och_p); 660 if (rc) 661 GOTO(out_och_free, rc); 662 } 663 mutex_unlock(&lli->lli_och_mutex); 664 fd = NULL; 665 666 /* Must do this outside lli_och_mutex lock to prevent deadlock where 667 different kind of OPEN lock for this same inode gets cancelled 668 by ldlm_cancel_lru */ 669 if (!S_ISREG(inode->i_mode)) 670 GOTO(out_och_free, rc); 671 672 ll_capa_open(inode); 673 674 if (!lli->lli_has_smd) { 675 if (file->f_flags & O_LOV_DELAY_CREATE || 676 !(file->f_mode & FMODE_WRITE)) { 677 CDEBUG(D_INODE, "object creation was delayed\n"); 678 GOTO(out_och_free, rc); 679 } 680 } 681 file->f_flags &= ~O_LOV_DELAY_CREATE; 682 GOTO(out_och_free, rc); 683 684out_och_free: 685 if (rc) { 686 if (och_p && *och_p) { 687 OBD_FREE(*och_p, sizeof (struct obd_client_handle)); 688 *och_p = NULL; /* OBD_FREE writes some magic there */ 689 (*och_usecount)--; 690 } 691 mutex_unlock(&lli->lli_och_mutex); 692 693out_openerr: 694 if (opendir_set != 0) 695 ll_stop_statahead(inode, lli->lli_opendir_key); 696 if (fd != NULL) 697 ll_file_data_put(fd); 698 } else { 699 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1); 700 } 701 702 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) { 703 ptlrpc_req_finished(it->d.lustre.it_data); 704 it_clear_disposition(it, DISP_ENQ_OPEN_REF); 705 } 706 707 return rc; 708} 709 710static int ll_md_blocking_lease_ast(struct ldlm_lock *lock, 711 struct ldlm_lock_desc *desc, void *data, int flag) 712{ 713 int rc; 714 struct lustre_handle lockh; 715 716 switch (flag) { 717 case LDLM_CB_BLOCKING: 718 ldlm_lock2handle(lock, &lockh); 719 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC); 720 if (rc < 0) { 721 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc); 722 return rc; 723 } 724 break; 725 case LDLM_CB_CANCELING: 726 /* do nothing */ 727 break; 728 } 729 return 0; 730} 731 732/** 733 * Acquire a lease and open the file. 734 */ 735struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file, 736 fmode_t fmode, __u64 open_flags) 737{ 738 struct lookup_intent it = { .it_op = IT_OPEN }; 739 struct ll_sb_info *sbi = ll_i2sbi(inode); 740 struct md_op_data *op_data; 741 struct ptlrpc_request *req; 742 struct lustre_handle old_handle = { 0 }; 743 struct obd_client_handle *och = NULL; 744 int rc; 745 int rc2; 746 747 if (fmode != FMODE_WRITE && fmode != FMODE_READ) 748 return ERR_PTR(-EINVAL); 749 750 if (file != NULL) { 751 struct ll_inode_info *lli = ll_i2info(inode); 752 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 753 struct obd_client_handle **och_p; 754 __u64 *och_usecount; 755 756 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC)) 757 return ERR_PTR(-EPERM); 758 759 /* Get the openhandle of the file */ 760 rc = -EBUSY; 761 mutex_lock(&lli->lli_och_mutex); 762 if (fd->fd_lease_och != NULL) { 763 mutex_unlock(&lli->lli_och_mutex); 764 return ERR_PTR(rc); 765 } 766 767 if (fd->fd_och == NULL) { 768 if (file->f_mode & FMODE_WRITE) { 769 LASSERT(lli->lli_mds_write_och != NULL); 770 och_p = &lli->lli_mds_write_och; 771 och_usecount = &lli->lli_open_fd_write_count; 772 } else { 773 LASSERT(lli->lli_mds_read_och != NULL); 774 och_p = &lli->lli_mds_read_och; 775 och_usecount = &lli->lli_open_fd_read_count; 776 } 777 if (*och_usecount == 1) { 778 fd->fd_och = *och_p; 779 *och_p = NULL; 780 *och_usecount = 0; 781 rc = 0; 782 } 783 } 784 mutex_unlock(&lli->lli_och_mutex); 785 if (rc < 0) /* more than 1 opener */ 786 return ERR_PTR(rc); 787 788 LASSERT(fd->fd_och != NULL); 789 old_handle = fd->fd_och->och_fh; 790 } 791 792 OBD_ALLOC_PTR(och); 793 if (och == NULL) 794 return ERR_PTR(-ENOMEM); 795 796 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0, 797 LUSTRE_OPC_ANY, NULL); 798 if (IS_ERR(op_data)) 799 GOTO(out, rc = PTR_ERR(op_data)); 800 801 /* To tell the MDT this openhandle is from the same owner */ 802 op_data->op_handle = old_handle; 803 804 it.it_flags = fmode | open_flags; 805 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE; 806 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req, 807 ll_md_blocking_lease_ast, 808 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise 809 * it can be cancelled which may mislead applications that the lease is 810 * broken; 811 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal 812 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast 813 * doesn't deal with openhandle, so normal openhandle will be leaked. */ 814 LDLM_FL_NO_LRU | LDLM_FL_EXCL); 815 ll_finish_md_op_data(op_data); 816 if (req != NULL) { 817 ptlrpc_req_finished(req); 818 it_clear_disposition(&it, DISP_ENQ_COMPLETE); 819 } 820 if (rc < 0) 821 GOTO(out_release_it, rc); 822 823 if (it_disposition(&it, DISP_LOOKUP_NEG)) 824 GOTO(out_release_it, rc = -ENOENT); 825 826 rc = it_open_error(DISP_OPEN_OPEN, &it); 827 if (rc) 828 GOTO(out_release_it, rc); 829 830 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF)); 831 ll_och_fill(sbi->ll_md_exp, &it, och); 832 833 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */ 834 GOTO(out_close, rc = -EOPNOTSUPP); 835 836 /* already get lease, handle lease lock */ 837 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL); 838 if (it.d.lustre.it_lock_mode == 0 || 839 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) { 840 /* open lock must return for lease */ 841 CERROR(DFID "lease granted but no open lock, %d/%llu.\n", 842 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode, 843 it.d.lustre.it_lock_bits); 844 GOTO(out_close, rc = -EPROTO); 845 } 846 847 ll_intent_release(&it); 848 return och; 849 850out_close: 851 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL); 852 if (rc2) 853 CERROR("Close openhandle returned %d\n", rc2); 854 855 /* cancel open lock */ 856 if (it.d.lustre.it_lock_mode != 0) { 857 ldlm_lock_decref_and_cancel(&och->och_lease_handle, 858 it.d.lustre.it_lock_mode); 859 it.d.lustre.it_lock_mode = 0; 860 } 861out_release_it: 862 ll_intent_release(&it); 863out: 864 OBD_FREE_PTR(och); 865 return ERR_PTR(rc); 866} 867EXPORT_SYMBOL(ll_lease_open); 868 869/** 870 * Release lease and close the file. 871 * It will check if the lease has ever broken. 872 */ 873int ll_lease_close(struct obd_client_handle *och, struct inode *inode, 874 bool *lease_broken) 875{ 876 struct ldlm_lock *lock; 877 bool cancelled = true; 878 int rc; 879 880 lock = ldlm_handle2lock(&och->och_lease_handle); 881 if (lock != NULL) { 882 lock_res_and_lock(lock); 883 cancelled = ldlm_is_cancel(lock); 884 unlock_res_and_lock(lock); 885 ldlm_lock_put(lock); 886 } 887 888 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n", 889 PFID(&ll_i2info(inode)->lli_fid), cancelled); 890 891 if (!cancelled) 892 ldlm_cli_cancel(&och->och_lease_handle, 0); 893 if (lease_broken != NULL) 894 *lease_broken = cancelled; 895 896 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och, 897 NULL); 898 return rc; 899} 900EXPORT_SYMBOL(ll_lease_close); 901 902/* Fills the obdo with the attributes for the lsm */ 903static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, 904 struct obd_capa *capa, struct obdo *obdo, 905 __u64 ioepoch, int sync) 906{ 907 struct ptlrpc_request_set *set; 908 struct obd_info oinfo = { { { 0 } } }; 909 int rc; 910 911 LASSERT(lsm != NULL); 912 913 oinfo.oi_md = lsm; 914 oinfo.oi_oa = obdo; 915 oinfo.oi_oa->o_oi = lsm->lsm_oi; 916 oinfo.oi_oa->o_mode = S_IFREG; 917 oinfo.oi_oa->o_ioepoch = ioepoch; 918 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | 919 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | 920 OBD_MD_FLBLKSZ | OBD_MD_FLATIME | 921 OBD_MD_FLMTIME | OBD_MD_FLCTIME | 922 OBD_MD_FLGROUP | OBD_MD_FLEPOCH | 923 OBD_MD_FLDATAVERSION; 924 oinfo.oi_capa = capa; 925 if (sync) { 926 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS; 927 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK; 928 } 929 930 set = ptlrpc_prep_set(); 931 if (set == NULL) { 932 CERROR("can't allocate ptlrpc set\n"); 933 rc = -ENOMEM; 934 } else { 935 rc = obd_getattr_async(exp, &oinfo, set); 936 if (rc == 0) 937 rc = ptlrpc_set_wait(set); 938 ptlrpc_set_destroy(set); 939 } 940 if (rc == 0) 941 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | 942 OBD_MD_FLATIME | OBD_MD_FLMTIME | 943 OBD_MD_FLCTIME | OBD_MD_FLSIZE | 944 OBD_MD_FLDATAVERSION); 945 return rc; 946} 947 948/** 949 * Performs the getattr on the inode and updates its fields. 950 * If @sync != 0, perform the getattr under the server-side lock. 951 */ 952int ll_inode_getattr(struct inode *inode, struct obdo *obdo, 953 __u64 ioepoch, int sync) 954{ 955 struct obd_capa *capa = ll_mdscapa_get(inode); 956 struct lov_stripe_md *lsm; 957 int rc; 958 959 lsm = ccc_inode_lsm_get(inode); 960 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode), 961 capa, obdo, ioepoch, sync); 962 capa_put(capa); 963 if (rc == 0) { 964 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi; 965 966 obdo_refresh_inode(inode, obdo, obdo->o_valid); 967 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu," 968 " blksize %lu\n", POSTID(oi), i_size_read(inode), 969 (unsigned long long)inode->i_blocks, 970 (unsigned long)ll_inode_blksize(inode)); 971 } 972 ccc_inode_lsm_put(inode, lsm); 973 return rc; 974} 975 976int ll_merge_lvb(const struct lu_env *env, struct inode *inode) 977{ 978 struct ll_inode_info *lli = ll_i2info(inode); 979 struct cl_object *obj = lli->lli_clob; 980 struct cl_attr *attr = ccc_env_thread_attr(env); 981 struct ost_lvb lvb; 982 int rc = 0; 983 984 ll_inode_size_lock(inode); 985 /* merge timestamps the most recently obtained from mds with 986 timestamps obtained from osts */ 987 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime; 988 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime; 989 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime; 990 inode_init_lvb(inode, &lvb); 991 992 cl_object_attr_lock(obj); 993 rc = cl_object_attr_get(env, obj, attr); 994 cl_object_attr_unlock(obj); 995 996 if (rc == 0) { 997 if (lvb.lvb_atime < attr->cat_atime) 998 lvb.lvb_atime = attr->cat_atime; 999 if (lvb.lvb_ctime < attr->cat_ctime) 1000 lvb.lvb_ctime = attr->cat_ctime; 1001 if (lvb.lvb_mtime < attr->cat_mtime) 1002 lvb.lvb_mtime = attr->cat_mtime; 1003 1004 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n", 1005 PFID(&lli->lli_fid), attr->cat_size); 1006 cl_isize_write_nolock(inode, attr->cat_size); 1007 1008 inode->i_blocks = attr->cat_blocks; 1009 1010 LTIME_S(inode->i_mtime) = lvb.lvb_mtime; 1011 LTIME_S(inode->i_atime) = lvb.lvb_atime; 1012 LTIME_S(inode->i_ctime) = lvb.lvb_ctime; 1013 } 1014 ll_inode_size_unlock(inode); 1015 1016 return rc; 1017} 1018 1019int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm, 1020 lstat_t *st) 1021{ 1022 struct obdo obdo = { 0 }; 1023 int rc; 1024 1025 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0); 1026 if (rc == 0) { 1027 st->st_size = obdo.o_size; 1028 st->st_blocks = obdo.o_blocks; 1029 st->st_mtime = obdo.o_mtime; 1030 st->st_atime = obdo.o_atime; 1031 st->st_ctime = obdo.o_ctime; 1032 } 1033 return rc; 1034} 1035 1036void ll_io_init(struct cl_io *io, const struct file *file, int write) 1037{ 1038 struct inode *inode = file->f_dentry->d_inode; 1039 1040 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK; 1041 if (write) { 1042 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND); 1043 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC || 1044 file->f_flags & O_DIRECT || 1045 IS_SYNC(inode); 1046 } 1047 io->ci_obj = ll_i2info(inode)->lli_clob; 1048 io->ci_lockreq = CILR_MAYBE; 1049 if (ll_file_nolock(file)) { 1050 io->ci_lockreq = CILR_NEVER; 1051 io->ci_no_srvlock = 1; 1052 } else if (file->f_flags & O_APPEND) { 1053 io->ci_lockreq = CILR_MANDATORY; 1054 } 1055} 1056 1057static ssize_t 1058ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, 1059 struct file *file, enum cl_io_type iot, 1060 loff_t *ppos, size_t count) 1061{ 1062 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode); 1063 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 1064 struct cl_io *io; 1065 ssize_t result; 1066 1067restart: 1068 io = ccc_env_thread_io(env); 1069 ll_io_init(io, file, iot == CIT_WRITE); 1070 1071 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) { 1072 struct vvp_io *vio = vvp_env_io(env); 1073 struct ccc_io *cio = ccc_env_io(env); 1074 int write_mutex_locked = 0; 1075 1076 cio->cui_fd = LUSTRE_FPRIVATE(file); 1077 vio->cui_io_subtype = args->via_io_subtype; 1078 1079 switch (vio->cui_io_subtype) { 1080 case IO_NORMAL: 1081 cio->cui_iov = args->u.normal.via_iov; 1082 cio->cui_nrsegs = args->u.normal.via_nrsegs; 1083 cio->cui_tot_nrsegs = cio->cui_nrsegs; 1084 cio->cui_iocb = args->u.normal.via_iocb; 1085 if ((iot == CIT_WRITE) && 1086 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { 1087 if (mutex_lock_interruptible(&lli-> 1088 lli_write_mutex)) 1089 GOTO(out, result = -ERESTARTSYS); 1090 write_mutex_locked = 1; 1091 } else if (iot == CIT_READ) { 1092 down_read(&lli->lli_trunc_sem); 1093 } 1094 break; 1095 case IO_SENDFILE: 1096 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor; 1097 vio->u.sendfile.cui_target = args->u.sendfile.via_target; 1098 break; 1099 case IO_SPLICE: 1100 vio->u.splice.cui_pipe = args->u.splice.via_pipe; 1101 vio->u.splice.cui_flags = args->u.splice.via_flags; 1102 break; 1103 default: 1104 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype); 1105 LBUG(); 1106 } 1107 result = cl_io_loop(env, io); 1108 if (write_mutex_locked) 1109 mutex_unlock(&lli->lli_write_mutex); 1110 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ) 1111 up_read(&lli->lli_trunc_sem); 1112 } else { 1113 /* cl_io_rw_init() handled IO */ 1114 result = io->ci_result; 1115 } 1116 1117 if (io->ci_nob > 0) { 1118 result = io->ci_nob; 1119 *ppos = io->u.ci_wr.wr.crw_pos; 1120 } 1121 GOTO(out, result); 1122out: 1123 cl_io_fini(env, io); 1124 /* If any bit been read/written (result != 0), we just return 1125 * short read/write instead of restart io. */ 1126 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) { 1127 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n", 1128 iot == CIT_READ ? "read" : "write", 1129 file->f_dentry->d_name.name, *ppos, count); 1130 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob); 1131 goto restart; 1132 } 1133 1134 if (iot == CIT_READ) { 1135 if (result >= 0) 1136 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), 1137 LPROC_LL_READ_BYTES, result); 1138 } else if (iot == CIT_WRITE) { 1139 if (result >= 0) { 1140 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), 1141 LPROC_LL_WRITE_BYTES, result); 1142 fd->fd_write_failed = false; 1143 } else if (result != -ERESTARTSYS) { 1144 fd->fd_write_failed = true; 1145 } 1146 } 1147 1148 return result; 1149} 1150 1151static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov, 1152 unsigned long nr_segs, loff_t pos) 1153{ 1154 struct lu_env *env; 1155 struct vvp_io_args *args; 1156 size_t count = 0; 1157 ssize_t result; 1158 int refcheck; 1159 1160 result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); 1161 if (result) 1162 return result; 1163 1164 env = cl_env_get(&refcheck); 1165 if (IS_ERR(env)) 1166 return PTR_ERR(env); 1167 1168 args = vvp_env_args(env, IO_NORMAL); 1169 args->u.normal.via_iov = (struct iovec *)iov; 1170 args->u.normal.via_nrsegs = nr_segs; 1171 args->u.normal.via_iocb = iocb; 1172 1173 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ, 1174 &iocb->ki_pos, count); 1175 cl_env_put(env, &refcheck); 1176 return result; 1177} 1178 1179static ssize_t ll_file_read(struct file *file, char *buf, size_t count, 1180 loff_t *ppos) 1181{ 1182 struct lu_env *env; 1183 struct iovec *local_iov; 1184 struct kiocb *kiocb; 1185 ssize_t result; 1186 int refcheck; 1187 1188 env = cl_env_get(&refcheck); 1189 if (IS_ERR(env)) 1190 return PTR_ERR(env); 1191 1192 local_iov = &vvp_env_info(env)->vti_local_iov; 1193 kiocb = &vvp_env_info(env)->vti_kiocb; 1194 local_iov->iov_base = (void __user *)buf; 1195 local_iov->iov_len = count; 1196 init_sync_kiocb(kiocb, file); 1197 kiocb->ki_pos = *ppos; 1198 kiocb->ki_nbytes = count; 1199 1200 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos); 1201 *ppos = kiocb->ki_pos; 1202 1203 cl_env_put(env, &refcheck); 1204 return result; 1205} 1206 1207/* 1208 * Write to a file (through the page cache). 1209 */ 1210static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 1211 unsigned long nr_segs, loff_t pos) 1212{ 1213 struct lu_env *env; 1214 struct vvp_io_args *args; 1215 size_t count = 0; 1216 ssize_t result; 1217 int refcheck; 1218 1219 result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); 1220 if (result) 1221 return result; 1222 1223 env = cl_env_get(&refcheck); 1224 if (IS_ERR(env)) 1225 return PTR_ERR(env); 1226 1227 args = vvp_env_args(env, IO_NORMAL); 1228 args->u.normal.via_iov = (struct iovec *)iov; 1229 args->u.normal.via_nrsegs = nr_segs; 1230 args->u.normal.via_iocb = iocb; 1231 1232 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE, 1233 &iocb->ki_pos, count); 1234 cl_env_put(env, &refcheck); 1235 return result; 1236} 1237 1238static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, 1239 loff_t *ppos) 1240{ 1241 struct lu_env *env; 1242 struct iovec *local_iov; 1243 struct kiocb *kiocb; 1244 ssize_t result; 1245 int refcheck; 1246 1247 env = cl_env_get(&refcheck); 1248 if (IS_ERR(env)) 1249 return PTR_ERR(env); 1250 1251 local_iov = &vvp_env_info(env)->vti_local_iov; 1252 kiocb = &vvp_env_info(env)->vti_kiocb; 1253 local_iov->iov_base = (void __user *)buf; 1254 local_iov->iov_len = count; 1255 init_sync_kiocb(kiocb, file); 1256 kiocb->ki_pos = *ppos; 1257 kiocb->ki_nbytes = count; 1258 1259 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos); 1260 *ppos = kiocb->ki_pos; 1261 1262 cl_env_put(env, &refcheck); 1263 return result; 1264} 1265 1266 1267 1268/* 1269 * Send file content (through pagecache) somewhere with helper 1270 */ 1271static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos, 1272 struct pipe_inode_info *pipe, size_t count, 1273 unsigned int flags) 1274{ 1275 struct lu_env *env; 1276 struct vvp_io_args *args; 1277 ssize_t result; 1278 int refcheck; 1279 1280 env = cl_env_get(&refcheck); 1281 if (IS_ERR(env)) 1282 return PTR_ERR(env); 1283 1284 args = vvp_env_args(env, IO_SPLICE); 1285 args->u.splice.via_pipe = pipe; 1286 args->u.splice.via_flags = flags; 1287 1288 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count); 1289 cl_env_put(env, &refcheck); 1290 return result; 1291} 1292 1293static int ll_lov_recreate(struct inode *inode, struct ost_id *oi, 1294 obd_count ost_idx) 1295{ 1296 struct obd_export *exp = ll_i2dtexp(inode); 1297 struct obd_trans_info oti = { 0 }; 1298 struct obdo *oa = NULL; 1299 int lsm_size; 1300 int rc = 0; 1301 struct lov_stripe_md *lsm = NULL, *lsm2; 1302 1303 OBDO_ALLOC(oa); 1304 if (oa == NULL) 1305 return -ENOMEM; 1306 1307 lsm = ccc_inode_lsm_get(inode); 1308 if (!lsm_has_objects(lsm)) 1309 GOTO(out, rc = -ENOENT); 1310 1311 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) * 1312 (lsm->lsm_stripe_count)); 1313 1314 OBD_ALLOC_LARGE(lsm2, lsm_size); 1315 if (lsm2 == NULL) 1316 GOTO(out, rc = -ENOMEM); 1317 1318 oa->o_oi = *oi; 1319 oa->o_nlink = ost_idx; 1320 oa->o_flags |= OBD_FL_RECREATE_OBJS; 1321 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP; 1322 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | 1323 OBD_MD_FLMTIME | OBD_MD_FLCTIME); 1324 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid); 1325 memcpy(lsm2, lsm, lsm_size); 1326 ll_inode_size_lock(inode); 1327 rc = obd_create(NULL, exp, oa, &lsm2, &oti); 1328 ll_inode_size_unlock(inode); 1329 1330 OBD_FREE_LARGE(lsm2, lsm_size); 1331 GOTO(out, rc); 1332out: 1333 ccc_inode_lsm_put(inode, lsm); 1334 OBDO_FREE(oa); 1335 return rc; 1336} 1337 1338static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg) 1339{ 1340 struct ll_recreate_obj ucreat; 1341 struct ost_id oi; 1342 1343 if (!cfs_capable(CFS_CAP_SYS_ADMIN)) 1344 return -EPERM; 1345 1346 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg, 1347 sizeof(ucreat))) 1348 return -EFAULT; 1349 1350 ostid_set_seq_mdt0(&oi); 1351 ostid_set_id(&oi, ucreat.lrc_id); 1352 return ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx); 1353} 1354 1355static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg) 1356{ 1357 struct lu_fid fid; 1358 struct ost_id oi; 1359 obd_count ost_idx; 1360 1361 if (!cfs_capable(CFS_CAP_SYS_ADMIN)) 1362 return -EPERM; 1363 1364 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid))) 1365 return -EFAULT; 1366 1367 fid_to_ostid(&fid, &oi); 1368 ost_idx = (fid_seq(&fid) >> 16) & 0xffff; 1369 return ll_lov_recreate(inode, &oi, ost_idx); 1370} 1371 1372int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, 1373 int flags, struct lov_user_md *lum, int lum_size) 1374{ 1375 struct lov_stripe_md *lsm = NULL; 1376 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags}; 1377 int rc = 0; 1378 1379 lsm = ccc_inode_lsm_get(inode); 1380 if (lsm != NULL) { 1381 ccc_inode_lsm_put(inode, lsm); 1382 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n", 1383 inode->i_ino); 1384 return -EEXIST; 1385 } 1386 1387 ll_inode_size_lock(inode); 1388 rc = ll_intent_file_open(file, lum, lum_size, &oit); 1389 if (rc) 1390 GOTO(out, rc); 1391 rc = oit.d.lustre.it_status; 1392 if (rc < 0) 1393 GOTO(out_req_free, rc); 1394 1395 ll_release_openhandle(file->f_dentry, &oit); 1396 1397 out: 1398 ll_inode_size_unlock(inode); 1399 ll_intent_release(&oit); 1400 ccc_inode_lsm_put(inode, lsm); 1401 return rc; 1402out_req_free: 1403 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data); 1404 goto out; 1405} 1406 1407int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, 1408 struct lov_mds_md **lmmp, int *lmm_size, 1409 struct ptlrpc_request **request) 1410{ 1411 struct ll_sb_info *sbi = ll_i2sbi(inode); 1412 struct mdt_body *body; 1413 struct lov_mds_md *lmm = NULL; 1414 struct ptlrpc_request *req = NULL; 1415 struct md_op_data *op_data; 1416 int rc, lmmsize; 1417 1418 rc = ll_get_max_mdsize(sbi, &lmmsize); 1419 if (rc) 1420 return rc; 1421 1422 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename, 1423 strlen(filename), lmmsize, 1424 LUSTRE_OPC_ANY, NULL); 1425 if (IS_ERR(op_data)) 1426 return PTR_ERR(op_data); 1427 1428 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA; 1429 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req); 1430 ll_finish_md_op_data(op_data); 1431 if (rc < 0) { 1432 CDEBUG(D_INFO, "md_getattr_name failed " 1433 "on %s: rc %d\n", filename, rc); 1434 GOTO(out, rc); 1435 } 1436 1437 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 1438 LASSERT(body != NULL); /* checked by mdc_getattr_name */ 1439 1440 lmmsize = body->eadatasize; 1441 1442 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) || 1443 lmmsize == 0) { 1444 GOTO(out, rc = -ENODATA); 1445 } 1446 1447 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize); 1448 LASSERT(lmm != NULL); 1449 1450 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) && 1451 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) { 1452 GOTO(out, rc = -EPROTO); 1453 } 1454 1455 /* 1456 * This is coming from the MDS, so is probably in 1457 * little endian. We convert it to host endian before 1458 * passing it to userspace. 1459 */ 1460 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) { 1461 int stripe_count; 1462 1463 stripe_count = le16_to_cpu(lmm->lmm_stripe_count); 1464 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED) 1465 stripe_count = 0; 1466 1467 /* if function called for directory - we should 1468 * avoid swab not existent lsm objects */ 1469 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) { 1470 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm); 1471 if (S_ISREG(body->mode)) 1472 lustre_swab_lov_user_md_objects( 1473 ((struct lov_user_md_v1 *)lmm)->lmm_objects, 1474 stripe_count); 1475 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) { 1476 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm); 1477 if (S_ISREG(body->mode)) 1478 lustre_swab_lov_user_md_objects( 1479 ((struct lov_user_md_v3 *)lmm)->lmm_objects, 1480 stripe_count); 1481 } 1482 } 1483 1484out: 1485 *lmmp = lmm; 1486 *lmm_size = lmmsize; 1487 *request = req; 1488 return rc; 1489} 1490 1491static int ll_lov_setea(struct inode *inode, struct file *file, 1492 unsigned long arg) 1493{ 1494 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE; 1495 struct lov_user_md *lump; 1496 int lum_size = sizeof(struct lov_user_md) + 1497 sizeof(struct lov_user_ost_data); 1498 int rc; 1499 1500 if (!cfs_capable(CFS_CAP_SYS_ADMIN)) 1501 return -EPERM; 1502 1503 OBD_ALLOC_LARGE(lump, lum_size); 1504 if (lump == NULL) 1505 return -ENOMEM; 1506 1507 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) { 1508 OBD_FREE_LARGE(lump, lum_size); 1509 return -EFAULT; 1510 } 1511 1512 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size); 1513 1514 OBD_FREE_LARGE(lump, lum_size); 1515 return rc; 1516} 1517 1518static int ll_lov_setstripe(struct inode *inode, struct file *file, 1519 unsigned long arg) 1520{ 1521 struct lov_user_md_v3 lumv3; 1522 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3; 1523 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg; 1524 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg; 1525 int lum_size, rc; 1526 int flags = FMODE_WRITE; 1527 1528 /* first try with v1 which is smaller than v3 */ 1529 lum_size = sizeof(struct lov_user_md_v1); 1530 if (copy_from_user(lumv1, lumv1p, lum_size)) 1531 return -EFAULT; 1532 1533 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) { 1534 lum_size = sizeof(struct lov_user_md_v3); 1535 if (copy_from_user(&lumv3, lumv3p, lum_size)) 1536 return -EFAULT; 1537 } 1538 1539 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size); 1540 if (rc == 0) { 1541 struct lov_stripe_md *lsm; 1542 __u32 gen; 1543 1544 put_user(0, &lumv1p->lmm_stripe_count); 1545 1546 ll_layout_refresh(inode, &gen); 1547 lsm = ccc_inode_lsm_get(inode); 1548 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 1549 0, lsm, (void *)arg); 1550 ccc_inode_lsm_put(inode, lsm); 1551 } 1552 return rc; 1553} 1554 1555static int ll_lov_getstripe(struct inode *inode, unsigned long arg) 1556{ 1557 struct lov_stripe_md *lsm; 1558 int rc = -ENODATA; 1559 1560 lsm = ccc_inode_lsm_get(inode); 1561 if (lsm != NULL) 1562 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, 1563 lsm, (void *)arg); 1564 ccc_inode_lsm_put(inode, lsm); 1565 return rc; 1566} 1567 1568int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg) 1569{ 1570 struct ll_inode_info *lli = ll_i2info(inode); 1571 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 1572 struct ccc_grouplock grouplock; 1573 int rc; 1574 1575 if (ll_file_nolock(file)) 1576 return -EOPNOTSUPP; 1577 1578 spin_lock(&lli->lli_lock); 1579 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { 1580 CWARN("group lock already existed with gid %lu\n", 1581 fd->fd_grouplock.cg_gid); 1582 spin_unlock(&lli->lli_lock); 1583 return -EINVAL; 1584 } 1585 LASSERT(fd->fd_grouplock.cg_lock == NULL); 1586 spin_unlock(&lli->lli_lock); 1587 1588 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob, 1589 arg, (file->f_flags & O_NONBLOCK), &grouplock); 1590 if (rc) 1591 return rc; 1592 1593 spin_lock(&lli->lli_lock); 1594 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { 1595 spin_unlock(&lli->lli_lock); 1596 CERROR("another thread just won the race\n"); 1597 cl_put_grouplock(&grouplock); 1598 return -EINVAL; 1599 } 1600 1601 fd->fd_flags |= LL_FILE_GROUP_LOCKED; 1602 fd->fd_grouplock = grouplock; 1603 spin_unlock(&lli->lli_lock); 1604 1605 CDEBUG(D_INFO, "group lock %lu obtained\n", arg); 1606 return 0; 1607} 1608 1609int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg) 1610{ 1611 struct ll_inode_info *lli = ll_i2info(inode); 1612 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 1613 struct ccc_grouplock grouplock; 1614 1615 spin_lock(&lli->lli_lock); 1616 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) { 1617 spin_unlock(&lli->lli_lock); 1618 CWARN("no group lock held\n"); 1619 return -EINVAL; 1620 } 1621 LASSERT(fd->fd_grouplock.cg_lock != NULL); 1622 1623 if (fd->fd_grouplock.cg_gid != arg) { 1624 CWARN("group lock %lu doesn't match current id %lu\n", 1625 arg, fd->fd_grouplock.cg_gid); 1626 spin_unlock(&lli->lli_lock); 1627 return -EINVAL; 1628 } 1629 1630 grouplock = fd->fd_grouplock; 1631 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock)); 1632 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED; 1633 spin_unlock(&lli->lli_lock); 1634 1635 cl_put_grouplock(&grouplock); 1636 CDEBUG(D_INFO, "group lock %lu released\n", arg); 1637 return 0; 1638} 1639 1640/** 1641 * Close inode open handle 1642 * 1643 * \param dentry [in] dentry which contains the inode 1644 * \param it [in,out] intent which contains open info and result 1645 * 1646 * \retval 0 success 1647 * \retval <0 failure 1648 */ 1649int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it) 1650{ 1651 struct inode *inode = dentry->d_inode; 1652 struct obd_client_handle *och; 1653 int rc; 1654 1655 LASSERT(inode); 1656 1657 /* Root ? Do nothing. */ 1658 if (dentry->d_inode->i_sb->s_root == dentry) 1659 return 0; 1660 1661 /* No open handle to close? Move away */ 1662 if (!it_disposition(it, DISP_OPEN_OPEN)) 1663 return 0; 1664 1665 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0); 1666 1667 OBD_ALLOC(och, sizeof(*och)); 1668 if (!och) 1669 GOTO(out, rc = -ENOMEM); 1670 1671 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och); 1672 1673 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, 1674 inode, och, NULL); 1675out: 1676 /* this one is in place of ll_file_open */ 1677 if (it_disposition(it, DISP_ENQ_OPEN_REF)) { 1678 ptlrpc_req_finished(it->d.lustre.it_data); 1679 it_clear_disposition(it, DISP_ENQ_OPEN_REF); 1680 } 1681 return rc; 1682} 1683 1684/** 1685 * Get size for inode for which FIEMAP mapping is requested. 1686 * Make the FIEMAP get_info call and returns the result. 1687 */ 1688int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap, 1689 int num_bytes) 1690{ 1691 struct obd_export *exp = ll_i2dtexp(inode); 1692 struct lov_stripe_md *lsm = NULL; 1693 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, }; 1694 int vallen = num_bytes; 1695 int rc; 1696 1697 /* Checks for fiemap flags */ 1698 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) { 1699 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT; 1700 return -EBADR; 1701 } 1702 1703 /* Check for FIEMAP_FLAG_SYNC */ 1704 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) { 1705 rc = filemap_fdatawrite(inode->i_mapping); 1706 if (rc) 1707 return rc; 1708 } 1709 1710 lsm = ccc_inode_lsm_get(inode); 1711 if (lsm == NULL) 1712 return -ENOENT; 1713 1714 /* If the stripe_count > 1 and the application does not understand 1715 * DEVICE_ORDER flag, then it cannot interpret the extents correctly. 1716 */ 1717 if (lsm->lsm_stripe_count > 1 && 1718 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) 1719 GOTO(out, rc = -EOPNOTSUPP); 1720 1721 fm_key.oa.o_oi = lsm->lsm_oi; 1722 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; 1723 1724 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE); 1725 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid); 1726 /* If filesize is 0, then there would be no objects for mapping */ 1727 if (fm_key.oa.o_size == 0) { 1728 fiemap->fm_mapped_extents = 0; 1729 GOTO(out, rc = 0); 1730 } 1731 1732 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap)); 1733 1734 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen, 1735 fiemap, lsm); 1736 if (rc) 1737 CERROR("obd_get_info failed: rc = %d\n", rc); 1738 1739out: 1740 ccc_inode_lsm_put(inode, lsm); 1741 return rc; 1742} 1743 1744int ll_fid2path(struct inode *inode, void *arg) 1745{ 1746 struct obd_export *exp = ll_i2mdexp(inode); 1747 struct getinfo_fid2path *gfout, *gfin; 1748 int outsize, rc; 1749 1750 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) && 1751 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH)) 1752 return -EPERM; 1753 1754 /* Need to get the buflen */ 1755 OBD_ALLOC_PTR(gfin); 1756 if (gfin == NULL) 1757 return -ENOMEM; 1758 if (copy_from_user(gfin, arg, sizeof(*gfin))) { 1759 OBD_FREE_PTR(gfin); 1760 return -EFAULT; 1761 } 1762 1763 outsize = sizeof(*gfout) + gfin->gf_pathlen; 1764 OBD_ALLOC(gfout, outsize); 1765 if (gfout == NULL) { 1766 OBD_FREE_PTR(gfin); 1767 return -ENOMEM; 1768 } 1769 memcpy(gfout, gfin, sizeof(*gfout)); 1770 OBD_FREE_PTR(gfin); 1771 1772 /* Call mdc_iocontrol */ 1773 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL); 1774 if (rc) 1775 GOTO(gf_free, rc); 1776 1777 if (copy_to_user(arg, gfout, outsize)) 1778 rc = -EFAULT; 1779 1780gf_free: 1781 OBD_FREE(gfout, outsize); 1782 return rc; 1783} 1784 1785static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg) 1786{ 1787 struct ll_user_fiemap *fiemap_s; 1788 size_t num_bytes, ret_bytes; 1789 unsigned int extent_count; 1790 int rc = 0; 1791 1792 /* Get the extent count so we can calculate the size of 1793 * required fiemap buffer */ 1794 if (get_user(extent_count, 1795 &((struct ll_user_fiemap __user *)arg)->fm_extent_count)) 1796 return -EFAULT; 1797 num_bytes = sizeof(*fiemap_s) + (extent_count * 1798 sizeof(struct ll_fiemap_extent)); 1799 1800 OBD_ALLOC_LARGE(fiemap_s, num_bytes); 1801 if (fiemap_s == NULL) 1802 return -ENOMEM; 1803 1804 /* get the fiemap value */ 1805 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg, 1806 sizeof(*fiemap_s))) 1807 GOTO(error, rc = -EFAULT); 1808 1809 /* If fm_extent_count is non-zero, read the first extent since 1810 * it is used to calculate end_offset and device from previous 1811 * fiemap call. */ 1812 if (extent_count) { 1813 if (copy_from_user(&fiemap_s->fm_extents[0], 1814 (char __user *)arg + sizeof(*fiemap_s), 1815 sizeof(struct ll_fiemap_extent))) 1816 GOTO(error, rc = -EFAULT); 1817 } 1818 1819 rc = ll_do_fiemap(inode, fiemap_s, num_bytes); 1820 if (rc) 1821 GOTO(error, rc); 1822 1823 ret_bytes = sizeof(struct ll_user_fiemap); 1824 1825 if (extent_count != 0) 1826 ret_bytes += (fiemap_s->fm_mapped_extents * 1827 sizeof(struct ll_fiemap_extent)); 1828 1829 if (copy_to_user((void *)arg, fiemap_s, ret_bytes)) 1830 rc = -EFAULT; 1831 1832error: 1833 OBD_FREE_LARGE(fiemap_s, num_bytes); 1834 return rc; 1835} 1836 1837/* 1838 * Read the data_version for inode. 1839 * 1840 * This value is computed using stripe object version on OST. 1841 * Version is computed using server side locking. 1842 * 1843 * @param extent_lock Take extent lock. Not needed if a process is already 1844 * holding the OST object group locks. 1845 */ 1846int ll_data_version(struct inode *inode, __u64 *data_version, 1847 int extent_lock) 1848{ 1849 struct lov_stripe_md *lsm = NULL; 1850 struct ll_sb_info *sbi = ll_i2sbi(inode); 1851 struct obdo *obdo = NULL; 1852 int rc; 1853 1854 /* If no stripe, we consider version is 0. */ 1855 lsm = ccc_inode_lsm_get(inode); 1856 if (!lsm_has_objects(lsm)) { 1857 *data_version = 0; 1858 CDEBUG(D_INODE, "No object for inode\n"); 1859 GOTO(out, rc = 0); 1860 } 1861 1862 OBD_ALLOC_PTR(obdo); 1863 if (obdo == NULL) 1864 GOTO(out, rc = -ENOMEM); 1865 1866 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock); 1867 if (rc == 0) { 1868 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION)) 1869 rc = -EOPNOTSUPP; 1870 else 1871 *data_version = obdo->o_data_version; 1872 } 1873 1874 OBD_FREE_PTR(obdo); 1875out: 1876 ccc_inode_lsm_put(inode, lsm); 1877 return rc; 1878} 1879 1880/* 1881 * Trigger a HSM release request for the provided inode. 1882 */ 1883int ll_hsm_release(struct inode *inode) 1884{ 1885 struct cl_env_nest nest; 1886 struct lu_env *env; 1887 struct obd_client_handle *och = NULL; 1888 __u64 data_version = 0; 1889 int rc; 1890 1891 1892 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n", 1893 ll_get_fsname(inode->i_sb, NULL, 0), 1894 PFID(&ll_i2info(inode)->lli_fid)); 1895 1896 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE); 1897 if (IS_ERR(och)) 1898 GOTO(out, rc = PTR_ERR(och)); 1899 1900 /* Grab latest data_version and [am]time values */ 1901 rc = ll_data_version(inode, &data_version, 1); 1902 if (rc != 0) 1903 GOTO(out, rc); 1904 1905 env = cl_env_nested_get(&nest); 1906 if (IS_ERR(env)) 1907 GOTO(out, rc = PTR_ERR(env)); 1908 1909 ll_merge_lvb(env, inode); 1910 cl_env_nested_put(&nest, env); 1911 1912 /* Release the file. 1913 * NB: lease lock handle is released in mdc_hsm_release_pack() because 1914 * we still need it to pack l_remote_handle to MDT. */ 1915 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och, 1916 &data_version); 1917 och = NULL; 1918 1919 1920out: 1921 if (och != NULL && !IS_ERR(och)) /* close the file */ 1922 ll_lease_close(och, inode, NULL); 1923 1924 return rc; 1925} 1926 1927struct ll_swap_stack { 1928 struct iattr ia1, ia2; 1929 __u64 dv1, dv2; 1930 struct inode *inode1, *inode2; 1931 bool check_dv1, check_dv2; 1932}; 1933 1934static int ll_swap_layouts(struct file *file1, struct file *file2, 1935 struct lustre_swap_layouts *lsl) 1936{ 1937 struct mdc_swap_layouts msl; 1938 struct md_op_data *op_data; 1939 __u32 gid; 1940 __u64 dv; 1941 struct ll_swap_stack *llss = NULL; 1942 int rc; 1943 1944 OBD_ALLOC_PTR(llss); 1945 if (llss == NULL) 1946 return -ENOMEM; 1947 1948 llss->inode1 = file1->f_dentry->d_inode; 1949 llss->inode2 = file2->f_dentry->d_inode; 1950 1951 if (!S_ISREG(llss->inode2->i_mode)) 1952 GOTO(free, rc = -EINVAL); 1953 1954 if (inode_permission(llss->inode1, MAY_WRITE) || 1955 inode_permission(llss->inode2, MAY_WRITE)) 1956 GOTO(free, rc = -EPERM); 1957 1958 if (llss->inode2->i_sb != llss->inode1->i_sb) 1959 GOTO(free, rc = -EXDEV); 1960 1961 /* we use 2 bool because it is easier to swap than 2 bits */ 1962 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1) 1963 llss->check_dv1 = true; 1964 1965 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2) 1966 llss->check_dv2 = true; 1967 1968 /* we cannot use lsl->sl_dvX directly because we may swap them */ 1969 llss->dv1 = lsl->sl_dv1; 1970 llss->dv2 = lsl->sl_dv2; 1971 1972 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2)); 1973 if (rc == 0) /* same file, done! */ 1974 GOTO(free, rc = 0); 1975 1976 if (rc < 0) { /* sequentialize it */ 1977 swap(llss->inode1, llss->inode2); 1978 swap(file1, file2); 1979 swap(llss->dv1, llss->dv2); 1980 swap(llss->check_dv1, llss->check_dv2); 1981 } 1982 1983 gid = lsl->sl_gid; 1984 if (gid != 0) { /* application asks to flush dirty cache */ 1985 rc = ll_get_grouplock(llss->inode1, file1, gid); 1986 if (rc < 0) 1987 GOTO(free, rc); 1988 1989 rc = ll_get_grouplock(llss->inode2, file2, gid); 1990 if (rc < 0) { 1991 ll_put_grouplock(llss->inode1, file1, gid); 1992 GOTO(free, rc); 1993 } 1994 } 1995 1996 /* to be able to restore mtime and atime after swap 1997 * we need to first save them */ 1998 if (lsl->sl_flags & 1999 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) { 2000 llss->ia1.ia_mtime = llss->inode1->i_mtime; 2001 llss->ia1.ia_atime = llss->inode1->i_atime; 2002 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME; 2003 llss->ia2.ia_mtime = llss->inode2->i_mtime; 2004 llss->ia2.ia_atime = llss->inode2->i_atime; 2005 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME; 2006 } 2007 2008 /* ultimate check, before swaping the layouts we check if 2009 * dataversion has changed (if requested) */ 2010 if (llss->check_dv1) { 2011 rc = ll_data_version(llss->inode1, &dv, 0); 2012 if (rc) 2013 GOTO(putgl, rc); 2014 if (dv != llss->dv1) 2015 GOTO(putgl, rc = -EAGAIN); 2016 } 2017 2018 if (llss->check_dv2) { 2019 rc = ll_data_version(llss->inode2, &dv, 0); 2020 if (rc) 2021 GOTO(putgl, rc); 2022 if (dv != llss->dv2) 2023 GOTO(putgl, rc = -EAGAIN); 2024 } 2025 2026 /* struct md_op_data is used to send the swap args to the mdt 2027 * only flags is missing, so we use struct mdc_swap_layouts 2028 * through the md_op_data->op_data */ 2029 /* flags from user space have to be converted before they are send to 2030 * server, no flag is sent today, they are only used on the client */ 2031 msl.msl_flags = 0; 2032 rc = -ENOMEM; 2033 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0, 2034 0, LUSTRE_OPC_ANY, &msl); 2035 if (IS_ERR(op_data)) 2036 GOTO(free, rc = PTR_ERR(op_data)); 2037 2038 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1), 2039 sizeof(*op_data), op_data, NULL); 2040 ll_finish_md_op_data(op_data); 2041 2042putgl: 2043 if (gid != 0) { 2044 ll_put_grouplock(llss->inode2, file2, gid); 2045 ll_put_grouplock(llss->inode1, file1, gid); 2046 } 2047 2048 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */ 2049 if (rc != 0) 2050 GOTO(free, rc); 2051 2052 /* clear useless flags */ 2053 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) { 2054 llss->ia1.ia_valid &= ~ATTR_MTIME; 2055 llss->ia2.ia_valid &= ~ATTR_MTIME; 2056 } 2057 2058 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) { 2059 llss->ia1.ia_valid &= ~ATTR_ATIME; 2060 llss->ia2.ia_valid &= ~ATTR_ATIME; 2061 } 2062 2063 /* update time if requested */ 2064 rc = 0; 2065 if (llss->ia2.ia_valid != 0) { 2066 mutex_lock(&llss->inode1->i_mutex); 2067 rc = ll_setattr(file1->f_dentry, &llss->ia2); 2068 mutex_unlock(&llss->inode1->i_mutex); 2069 } 2070 2071 if (llss->ia1.ia_valid != 0) { 2072 int rc1; 2073 2074 mutex_lock(&llss->inode2->i_mutex); 2075 rc1 = ll_setattr(file2->f_dentry, &llss->ia1); 2076 mutex_unlock(&llss->inode2->i_mutex); 2077 if (rc == 0) 2078 rc = rc1; 2079 } 2080 2081free: 2082 if (llss != NULL) 2083 OBD_FREE_PTR(llss); 2084 2085 return rc; 2086} 2087 2088static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss) 2089{ 2090 struct md_op_data *op_data; 2091 int rc; 2092 2093 /* Non-root users are forbidden to set or clear flags which are 2094 * NOT defined in HSM_USER_MASK. */ 2095 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) && 2096 !cfs_capable(CFS_CAP_SYS_ADMIN)) 2097 return -EPERM; 2098 2099 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, 2100 LUSTRE_OPC_ANY, hss); 2101 if (IS_ERR(op_data)) 2102 return PTR_ERR(op_data); 2103 2104 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode), 2105 sizeof(*op_data), op_data, NULL); 2106 2107 ll_finish_md_op_data(op_data); 2108 2109 return rc; 2110} 2111 2112static int ll_hsm_import(struct inode *inode, struct file *file, 2113 struct hsm_user_import *hui) 2114{ 2115 struct hsm_state_set *hss = NULL; 2116 struct iattr *attr = NULL; 2117 int rc; 2118 2119 2120 if (!S_ISREG(inode->i_mode)) 2121 return -EINVAL; 2122 2123 /* set HSM flags */ 2124 OBD_ALLOC_PTR(hss); 2125 if (hss == NULL) 2126 GOTO(out, rc = -ENOMEM); 2127 2128 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID; 2129 hss->hss_archive_id = hui->hui_archive_id; 2130 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED; 2131 rc = ll_hsm_state_set(inode, hss); 2132 if (rc != 0) 2133 GOTO(out, rc); 2134 2135 OBD_ALLOC_PTR(attr); 2136 if (attr == NULL) 2137 GOTO(out, rc = -ENOMEM); 2138 2139 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO); 2140 attr->ia_mode |= S_IFREG; 2141 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid); 2142 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid); 2143 attr->ia_size = hui->hui_size; 2144 attr->ia_mtime.tv_sec = hui->hui_mtime; 2145 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns; 2146 attr->ia_atime.tv_sec = hui->hui_atime; 2147 attr->ia_atime.tv_nsec = hui->hui_atime_ns; 2148 2149 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE | 2150 ATTR_UID | ATTR_GID | 2151 ATTR_MTIME | ATTR_MTIME_SET | 2152 ATTR_ATIME | ATTR_ATIME_SET; 2153 2154 rc = ll_setattr_raw(file->f_dentry, attr, true); 2155 if (rc == -ENODATA) 2156 rc = 0; 2157 2158out: 2159 if (hss != NULL) 2160 OBD_FREE_PTR(hss); 2161 2162 if (attr != NULL) 2163 OBD_FREE_PTR(attr); 2164 2165 return rc; 2166} 2167 2168long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2169{ 2170 struct inode *inode = file->f_dentry->d_inode; 2171 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 2172 int flags, rc; 2173 2174 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino, 2175 inode->i_generation, inode, cmd); 2176 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1); 2177 2178 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ 2179 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */ 2180 return -ENOTTY; 2181 2182 switch(cmd) { 2183 case LL_IOC_GETFLAGS: 2184 /* Get the current value of the file flags */ 2185 return put_user(fd->fd_flags, (int *)arg); 2186 case LL_IOC_SETFLAGS: 2187 case LL_IOC_CLRFLAGS: 2188 /* Set or clear specific file flags */ 2189 /* XXX This probably needs checks to ensure the flags are 2190 * not abused, and to handle any flag side effects. 2191 */ 2192 if (get_user(flags, (int *) arg)) 2193 return -EFAULT; 2194 2195 if (cmd == LL_IOC_SETFLAGS) { 2196 if ((flags & LL_FILE_IGNORE_LOCK) && 2197 !(file->f_flags & O_DIRECT)) { 2198 CERROR("%s: unable to disable locking on " 2199 "non-O_DIRECT file\n", current->comm); 2200 return -EINVAL; 2201 } 2202 2203 fd->fd_flags |= flags; 2204 } else { 2205 fd->fd_flags &= ~flags; 2206 } 2207 return 0; 2208 case LL_IOC_LOV_SETSTRIPE: 2209 return ll_lov_setstripe(inode, file, arg); 2210 case LL_IOC_LOV_SETEA: 2211 return ll_lov_setea(inode, file, arg); 2212 case LL_IOC_LOV_SWAP_LAYOUTS: { 2213 struct file *file2; 2214 struct lustre_swap_layouts lsl; 2215 2216 if (copy_from_user(&lsl, (char *)arg, 2217 sizeof(struct lustre_swap_layouts))) 2218 return -EFAULT; 2219 2220 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */ 2221 return -EPERM; 2222 2223 file2 = fget(lsl.sl_fd); 2224 if (file2 == NULL) 2225 return -EBADF; 2226 2227 rc = -EPERM; 2228 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */ 2229 rc = ll_swap_layouts(file, file2, &lsl); 2230 fput(file2); 2231 return rc; 2232 } 2233 case LL_IOC_LOV_GETSTRIPE: 2234 return ll_lov_getstripe(inode, arg); 2235 case LL_IOC_RECREATE_OBJ: 2236 return ll_lov_recreate_obj(inode, arg); 2237 case LL_IOC_RECREATE_FID: 2238 return ll_lov_recreate_fid(inode, arg); 2239 case FSFILT_IOC_FIEMAP: 2240 return ll_ioctl_fiemap(inode, arg); 2241 case FSFILT_IOC_GETFLAGS: 2242 case FSFILT_IOC_SETFLAGS: 2243 return ll_iocontrol(inode, file, cmd, arg); 2244 case FSFILT_IOC_GETVERSION_OLD: 2245 case FSFILT_IOC_GETVERSION: 2246 return put_user(inode->i_generation, (int *)arg); 2247 case LL_IOC_GROUP_LOCK: 2248 return ll_get_grouplock(inode, file, arg); 2249 case LL_IOC_GROUP_UNLOCK: 2250 return ll_put_grouplock(inode, file, arg); 2251 case IOC_OBD_STATFS: 2252 return ll_obd_statfs(inode, (void *)arg); 2253 2254 /* We need to special case any other ioctls we want to handle, 2255 * to send them to the MDS/OST as appropriate and to properly 2256 * network encode the arg field. 2257 case FSFILT_IOC_SETVERSION_OLD: 2258 case FSFILT_IOC_SETVERSION: 2259 */ 2260 case LL_IOC_FLUSHCTX: 2261 return ll_flush_ctx(inode); 2262 case LL_IOC_PATH2FID: { 2263 if (copy_to_user((void *)arg, ll_inode2fid(inode), 2264 sizeof(struct lu_fid))) 2265 return -EFAULT; 2266 2267 return 0; 2268 } 2269 case OBD_IOC_FID2PATH: 2270 return ll_fid2path(inode, (void *)arg); 2271 case LL_IOC_DATA_VERSION: { 2272 struct ioc_data_version idv; 2273 int rc; 2274 2275 if (copy_from_user(&idv, (char *)arg, sizeof(idv))) 2276 return -EFAULT; 2277 2278 rc = ll_data_version(inode, &idv.idv_version, 2279 !(idv.idv_flags & LL_DV_NOFLUSH)); 2280 2281 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv))) 2282 return -EFAULT; 2283 2284 return rc; 2285 } 2286 2287 case LL_IOC_GET_MDTIDX: { 2288 int mdtidx; 2289 2290 mdtidx = ll_get_mdt_idx(inode); 2291 if (mdtidx < 0) 2292 return mdtidx; 2293 2294 if (put_user((int)mdtidx, (int*)arg)) 2295 return -EFAULT; 2296 2297 return 0; 2298 } 2299 case OBD_IOC_GETDTNAME: 2300 case OBD_IOC_GETMDNAME: 2301 return ll_get_obd_name(inode, cmd, arg); 2302 case LL_IOC_HSM_STATE_GET: { 2303 struct md_op_data *op_data; 2304 struct hsm_user_state *hus; 2305 int rc; 2306 2307 OBD_ALLOC_PTR(hus); 2308 if (hus == NULL) 2309 return -ENOMEM; 2310 2311 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, 2312 LUSTRE_OPC_ANY, hus); 2313 if (IS_ERR(op_data)) { 2314 OBD_FREE_PTR(hus); 2315 return PTR_ERR(op_data); 2316 } 2317 2318 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data), 2319 op_data, NULL); 2320 2321 if (copy_to_user((void *)arg, hus, sizeof(*hus))) 2322 rc = -EFAULT; 2323 2324 ll_finish_md_op_data(op_data); 2325 OBD_FREE_PTR(hus); 2326 return rc; 2327 } 2328 case LL_IOC_HSM_STATE_SET: { 2329 struct hsm_state_set *hss; 2330 int rc; 2331 2332 OBD_ALLOC_PTR(hss); 2333 if (hss == NULL) 2334 return -ENOMEM; 2335 2336 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) { 2337 OBD_FREE_PTR(hss); 2338 return -EFAULT; 2339 } 2340 2341 rc = ll_hsm_state_set(inode, hss); 2342 2343 OBD_FREE_PTR(hss); 2344 return rc; 2345 } 2346 case LL_IOC_HSM_ACTION: { 2347 struct md_op_data *op_data; 2348 struct hsm_current_action *hca; 2349 int rc; 2350 2351 OBD_ALLOC_PTR(hca); 2352 if (hca == NULL) 2353 return -ENOMEM; 2354 2355 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, 2356 LUSTRE_OPC_ANY, hca); 2357 if (IS_ERR(op_data)) { 2358 OBD_FREE_PTR(hca); 2359 return PTR_ERR(op_data); 2360 } 2361 2362 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data), 2363 op_data, NULL); 2364 2365 if (copy_to_user((char *)arg, hca, sizeof(*hca))) 2366 rc = -EFAULT; 2367 2368 ll_finish_md_op_data(op_data); 2369 OBD_FREE_PTR(hca); 2370 return rc; 2371 } 2372 case LL_IOC_SET_LEASE: { 2373 struct ll_inode_info *lli = ll_i2info(inode); 2374 struct obd_client_handle *och = NULL; 2375 bool lease_broken; 2376 fmode_t mode = 0; 2377 2378 switch (arg) { 2379 case F_WRLCK: 2380 if (!(file->f_mode & FMODE_WRITE)) 2381 return -EPERM; 2382 mode = FMODE_WRITE; 2383 break; 2384 case F_RDLCK: 2385 if (!(file->f_mode & FMODE_READ)) 2386 return -EPERM; 2387 mode = FMODE_READ; 2388 break; 2389 case F_UNLCK: 2390 mutex_lock(&lli->lli_och_mutex); 2391 if (fd->fd_lease_och != NULL) { 2392 och = fd->fd_lease_och; 2393 fd->fd_lease_och = NULL; 2394 } 2395 mutex_unlock(&lli->lli_och_mutex); 2396 2397 if (och != NULL) { 2398 mode = och->och_flags & 2399 (FMODE_READ|FMODE_WRITE); 2400 rc = ll_lease_close(och, inode, &lease_broken); 2401 if (rc == 0 && lease_broken) 2402 mode = 0; 2403 } else { 2404 rc = -ENOLCK; 2405 } 2406 2407 /* return the type of lease or error */ 2408 return rc < 0 ? rc : (int)mode; 2409 default: 2410 return -EINVAL; 2411 } 2412 2413 CDEBUG(D_INODE, "Set lease with mode %d\n", mode); 2414 2415 /* apply for lease */ 2416 och = ll_lease_open(inode, file, mode, 0); 2417 if (IS_ERR(och)) 2418 return PTR_ERR(och); 2419 2420 rc = 0; 2421 mutex_lock(&lli->lli_och_mutex); 2422 if (fd->fd_lease_och == NULL) { 2423 fd->fd_lease_och = och; 2424 och = NULL; 2425 } 2426 mutex_unlock(&lli->lli_och_mutex); 2427 if (och != NULL) { 2428 /* impossible now that only excl is supported for now */ 2429 ll_lease_close(och, inode, &lease_broken); 2430 rc = -EBUSY; 2431 } 2432 return rc; 2433 } 2434 case LL_IOC_GET_LEASE: { 2435 struct ll_inode_info *lli = ll_i2info(inode); 2436 struct ldlm_lock *lock = NULL; 2437 2438 rc = 0; 2439 mutex_lock(&lli->lli_och_mutex); 2440 if (fd->fd_lease_och != NULL) { 2441 struct obd_client_handle *och = fd->fd_lease_och; 2442 2443 lock = ldlm_handle2lock(&och->och_lease_handle); 2444 if (lock != NULL) { 2445 lock_res_and_lock(lock); 2446 if (!ldlm_is_cancel(lock)) 2447 rc = och->och_flags & 2448 (FMODE_READ | FMODE_WRITE); 2449 unlock_res_and_lock(lock); 2450 ldlm_lock_put(lock); 2451 } 2452 } 2453 mutex_unlock(&lli->lli_och_mutex); 2454 return rc; 2455 } 2456 case LL_IOC_HSM_IMPORT: { 2457 struct hsm_user_import *hui; 2458 2459 OBD_ALLOC_PTR(hui); 2460 if (hui == NULL) 2461 return -ENOMEM; 2462 2463 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) { 2464 OBD_FREE_PTR(hui); 2465 return -EFAULT; 2466 } 2467 2468 rc = ll_hsm_import(inode, file, hui); 2469 2470 OBD_FREE_PTR(hui); 2471 return rc; 2472 } 2473 default: { 2474 int err; 2475 2476 if (LLIOC_STOP == 2477 ll_iocontrol_call(inode, file, cmd, arg, &err)) 2478 return err; 2479 2480 return obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL, 2481 (void *)arg); 2482 } 2483 } 2484} 2485 2486 2487loff_t ll_file_seek(struct file *file, loff_t offset, int origin) 2488{ 2489 struct inode *inode = file->f_dentry->d_inode; 2490 loff_t retval, eof = 0; 2491 2492 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) : 2493 (origin == SEEK_CUR) ? file->f_pos : 0); 2494 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n", 2495 inode->i_ino, inode->i_generation, inode, retval, retval, 2496 origin); 2497 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1); 2498 2499 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) { 2500 retval = ll_glimpse_size(inode); 2501 if (retval != 0) 2502 return retval; 2503 eof = i_size_read(inode); 2504 } 2505 2506 retval = generic_file_llseek_size(file, offset, origin, 2507 ll_file_maxbytes(inode), eof); 2508 return retval; 2509} 2510 2511int ll_flush(struct file *file, fl_owner_t id) 2512{ 2513 struct inode *inode = file->f_dentry->d_inode; 2514 struct ll_inode_info *lli = ll_i2info(inode); 2515 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 2516 int rc, err; 2517 2518 LASSERT(!S_ISDIR(inode->i_mode)); 2519 2520 /* catch async errors that were recorded back when async writeback 2521 * failed for pages in this mapping. */ 2522 rc = lli->lli_async_rc; 2523 lli->lli_async_rc = 0; 2524 err = lov_read_and_clear_async_rc(lli->lli_clob); 2525 if (rc == 0) 2526 rc = err; 2527 2528 /* The application has been told write failure already. 2529 * Do not report failure again. */ 2530 if (fd->fd_write_failed) 2531 return 0; 2532 return rc ? -EIO : 0; 2533} 2534 2535/** 2536 * Called to make sure a portion of file has been written out. 2537 * if @local_only is not true, it will send OST_SYNC RPCs to ost. 2538 * 2539 * Return how many pages have been written. 2540 */ 2541int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, 2542 enum cl_fsync_mode mode, int ignore_layout) 2543{ 2544 struct cl_env_nest nest; 2545 struct lu_env *env; 2546 struct cl_io *io; 2547 struct obd_capa *capa = NULL; 2548 struct cl_fsync_io *fio; 2549 int result; 2550 2551 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL && 2552 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL) 2553 return -EINVAL; 2554 2555 env = cl_env_nested_get(&nest); 2556 if (IS_ERR(env)) 2557 return PTR_ERR(env); 2558 2559 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE); 2560 2561 io = ccc_env_thread_io(env); 2562 io->ci_obj = cl_i2info(inode)->lli_clob; 2563 io->ci_ignore_layout = ignore_layout; 2564 2565 /* initialize parameters for sync */ 2566 fio = &io->u.ci_fsync; 2567 fio->fi_capa = capa; 2568 fio->fi_start = start; 2569 fio->fi_end = end; 2570 fio->fi_fid = ll_inode2fid(inode); 2571 fio->fi_mode = mode; 2572 fio->fi_nr_written = 0; 2573 2574 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0) 2575 result = cl_io_loop(env, io); 2576 else 2577 result = io->ci_result; 2578 if (result == 0) 2579 result = fio->fi_nr_written; 2580 cl_io_fini(env, io); 2581 cl_env_nested_put(&nest, env); 2582 2583 capa_put(capa); 2584 2585 return result; 2586} 2587 2588/* 2589 * When dentry is provided (the 'else' case), *file->f_dentry may be 2590 * null and dentry must be used directly rather than pulled from 2591 * *file->f_dentry as is done otherwise. 2592 */ 2593 2594int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync) 2595{ 2596 struct dentry *dentry = file->f_dentry; 2597 struct inode *inode = dentry->d_inode; 2598 struct ll_inode_info *lli = ll_i2info(inode); 2599 struct ptlrpc_request *req; 2600 struct obd_capa *oc; 2601 int rc, err; 2602 2603 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, 2604 inode->i_generation, inode); 2605 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1); 2606 2607 rc = filemap_write_and_wait_range(inode->i_mapping, start, end); 2608 mutex_lock(&inode->i_mutex); 2609 2610 /* catch async errors that were recorded back when async writeback 2611 * failed for pages in this mapping. */ 2612 if (!S_ISDIR(inode->i_mode)) { 2613 err = lli->lli_async_rc; 2614 lli->lli_async_rc = 0; 2615 if (rc == 0) 2616 rc = err; 2617 err = lov_read_and_clear_async_rc(lli->lli_clob); 2618 if (rc == 0) 2619 rc = err; 2620 } 2621 2622 oc = ll_mdscapa_get(inode); 2623 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc, 2624 &req); 2625 capa_put(oc); 2626 if (!rc) 2627 rc = err; 2628 if (!err) 2629 ptlrpc_req_finished(req); 2630 2631 if (datasync && S_ISREG(inode->i_mode)) { 2632 struct ll_file_data *fd = LUSTRE_FPRIVATE(file); 2633 2634 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, 2635 CL_FSYNC_ALL, 0); 2636 if (rc == 0 && err < 0) 2637 rc = err; 2638 if (rc < 0) 2639 fd->fd_write_failed = true; 2640 else 2641 fd->fd_write_failed = false; 2642 } 2643 2644 mutex_unlock(&inode->i_mutex); 2645 return rc; 2646} 2647 2648int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) 2649{ 2650 struct inode *inode = file->f_dentry->d_inode; 2651 struct ll_sb_info *sbi = ll_i2sbi(inode); 2652 struct ldlm_enqueue_info einfo = { 2653 .ei_type = LDLM_FLOCK, 2654 .ei_cb_cp = ldlm_flock_completion_ast, 2655 .ei_cbdata = file_lock, 2656 }; 2657 struct md_op_data *op_data; 2658 struct lustre_handle lockh = {0}; 2659 ldlm_policy_data_t flock = {{0}}; 2660 int flags = 0; 2661 int rc; 2662 int rc2 = 0; 2663 2664 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n", 2665 inode->i_ino, file_lock); 2666 2667 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1); 2668 2669 if (file_lock->fl_flags & FL_FLOCK) { 2670 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK)); 2671 /* flocks are whole-file locks */ 2672 flock.l_flock.end = OFFSET_MAX; 2673 /* For flocks owner is determined by the local file desctiptor*/ 2674 flock.l_flock.owner = (unsigned long)file_lock->fl_file; 2675 } else if (file_lock->fl_flags & FL_POSIX) { 2676 flock.l_flock.owner = (unsigned long)file_lock->fl_owner; 2677 flock.l_flock.start = file_lock->fl_start; 2678 flock.l_flock.end = file_lock->fl_end; 2679 } else { 2680 return -EINVAL; 2681 } 2682 flock.l_flock.pid = file_lock->fl_pid; 2683 2684 /* Somewhat ugly workaround for svc lockd. 2685 * lockd installs custom fl_lmops->lm_compare_owner that checks 2686 * for the fl_owner to be the same (which it always is on local node 2687 * I guess between lockd processes) and then compares pid. 2688 * As such we assign pid to the owner field to make it all work, 2689 * conflict with normal locks is unlikely since pid space and 2690 * pointer space for current->files are not intersecting */ 2691 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner) 2692 flock.l_flock.owner = (unsigned long)file_lock->fl_pid; 2693 2694 switch (file_lock->fl_type) { 2695 case F_RDLCK: 2696 einfo.ei_mode = LCK_PR; 2697 break; 2698 case F_UNLCK: 2699 /* An unlock request may or may not have any relation to 2700 * existing locks so we may not be able to pass a lock handle 2701 * via a normal ldlm_lock_cancel() request. The request may even 2702 * unlock a byte range in the middle of an existing lock. In 2703 * order to process an unlock request we need all of the same 2704 * information that is given with a normal read or write record 2705 * lock request. To avoid creating another ldlm unlock (cancel) 2706 * message we'll treat a LCK_NL flock request as an unlock. */ 2707 einfo.ei_mode = LCK_NL; 2708 break; 2709 case F_WRLCK: 2710 einfo.ei_mode = LCK_PW; 2711 break; 2712 default: 2713 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", 2714 file_lock->fl_type); 2715 return -ENOTSUPP; 2716 } 2717 2718 switch (cmd) { 2719 case F_SETLKW: 2720#ifdef F_SETLKW64 2721 case F_SETLKW64: 2722#endif 2723 flags = 0; 2724 break; 2725 case F_SETLK: 2726#ifdef F_SETLK64 2727 case F_SETLK64: 2728#endif 2729 flags = LDLM_FL_BLOCK_NOWAIT; 2730 break; 2731 case F_GETLK: 2732#ifdef F_GETLK64 2733 case F_GETLK64: 2734#endif 2735 flags = LDLM_FL_TEST_LOCK; 2736 /* Save the old mode so that if the mode in the lock changes we 2737 * can decrement the appropriate reader or writer refcount. */ 2738 file_lock->fl_type = einfo.ei_mode; 2739 break; 2740 default: 2741 CERROR("unknown fcntl lock command: %d\n", cmd); 2742 return -EINVAL; 2743 } 2744 2745 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, 2746 LUSTRE_OPC_ANY, NULL); 2747 if (IS_ERR(op_data)) 2748 return PTR_ERR(op_data); 2749 2750 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, " 2751 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid, 2752 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end); 2753 2754 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, 2755 op_data, &lockh, &flock, 0, NULL /* req */, flags); 2756 2757 if ((file_lock->fl_flags & FL_FLOCK) && 2758 (rc == 0 || file_lock->fl_type == F_UNLCK)) 2759 rc2 = flock_lock_file_wait(file, file_lock); 2760 if ((file_lock->fl_flags & FL_POSIX) && 2761 (rc == 0 || file_lock->fl_type == F_UNLCK) && 2762 !(flags & LDLM_FL_TEST_LOCK)) 2763 rc2 = posix_lock_file_wait(file, file_lock); 2764 2765 if (rc2 && file_lock->fl_type != F_UNLCK) { 2766 einfo.ei_mode = LCK_NL; 2767 md_enqueue(sbi->ll_md_exp, &einfo, NULL, 2768 op_data, &lockh, &flock, 0, NULL /* req */, flags); 2769 rc = rc2; 2770 } 2771 2772 ll_finish_md_op_data(op_data); 2773 2774 return rc; 2775} 2776 2777int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock) 2778{ 2779 return -ENOSYS; 2780} 2781 2782/** 2783 * test if some locks matching bits and l_req_mode are acquired 2784 * - bits can be in different locks 2785 * - if found clear the common lock bits in *bits 2786 * - the bits not found, are kept in *bits 2787 * \param inode [IN] 2788 * \param bits [IN] searched lock bits [IN] 2789 * \param l_req_mode [IN] searched lock mode 2790 * \retval boolean, true iff all bits are found 2791 */ 2792int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode) 2793{ 2794 struct lustre_handle lockh; 2795 ldlm_policy_data_t policy; 2796 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ? 2797 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode; 2798 struct lu_fid *fid; 2799 __u64 flags; 2800 int i; 2801 2802 if (!inode) 2803 return 0; 2804 2805 fid = &ll_i2info(inode)->lli_fid; 2806 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid), 2807 ldlm_lockname[mode]); 2808 2809 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK; 2810 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) { 2811 policy.l_inodebits.bits = *bits & (1 << i); 2812 if (policy.l_inodebits.bits == 0) 2813 continue; 2814 2815 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, 2816 &policy, mode, &lockh)) { 2817 struct ldlm_lock *lock; 2818 2819 lock = ldlm_handle2lock(&lockh); 2820 if (lock) { 2821 *bits &= 2822 ~(lock->l_policy_data.l_inodebits.bits); 2823 LDLM_LOCK_PUT(lock); 2824 } else { 2825 *bits &= ~policy.l_inodebits.bits; 2826 } 2827 } 2828 } 2829 return *bits == 0; 2830} 2831 2832ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits, 2833 struct lustre_handle *lockh, __u64 flags, 2834 ldlm_mode_t mode) 2835{ 2836 ldlm_policy_data_t policy = { .l_inodebits = {bits}}; 2837 struct lu_fid *fid; 2838 ldlm_mode_t rc; 2839 2840 fid = &ll_i2info(inode)->lli_fid; 2841 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid)); 2842 2843 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags, 2844 fid, LDLM_IBITS, &policy, mode, lockh); 2845 2846 return rc; 2847} 2848 2849static int ll_inode_revalidate_fini(struct inode *inode, int rc) 2850{ 2851 /* Already unlinked. Just update nlink and return success */ 2852 if (rc == -ENOENT) { 2853 clear_nlink(inode); 2854 /* This path cannot be hit for regular files unless in 2855 * case of obscure races, so no need to validate size. 2856 */ 2857 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) 2858 return 0; 2859 } else if (rc != 0) { 2860 CERROR("%s: revalidate FID "DFID" error: rc = %d\n", 2861 ll_get_fsname(inode->i_sb, NULL, 0), 2862 PFID(ll_inode2fid(inode)), rc); 2863 } 2864 2865 return rc; 2866} 2867 2868int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it, 2869 __u64 ibits) 2870{ 2871 struct inode *inode = dentry->d_inode; 2872 struct ptlrpc_request *req = NULL; 2873 struct obd_export *exp; 2874 int rc = 0; 2875 2876 LASSERT(inode != NULL); 2877 2878 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n", 2879 inode->i_ino, inode->i_generation, inode, dentry->d_name.name); 2880 2881 exp = ll_i2mdexp(inode); 2882 2883 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC. 2884 * But under CMD case, it caused some lock issues, should be fixed 2885 * with new CMD ibits lock. See bug 12718 */ 2886 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) { 2887 struct lookup_intent oit = { .it_op = IT_GETATTR }; 2888 struct md_op_data *op_data; 2889 2890 if (ibits == MDS_INODELOCK_LOOKUP) 2891 oit.it_op = IT_LOOKUP; 2892 2893 /* Call getattr by fid, so do not provide name at all. */ 2894 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode, 2895 dentry->d_inode, NULL, 0, 0, 2896 LUSTRE_OPC_ANY, NULL); 2897 if (IS_ERR(op_data)) 2898 return PTR_ERR(op_data); 2899 2900 oit.it_create_mode |= M_CHECK_STALE; 2901 rc = md_intent_lock(exp, op_data, NULL, 0, 2902 /* we are not interested in name 2903 based lookup */ 2904 &oit, 0, &req, 2905 ll_md_blocking_ast, 0); 2906 ll_finish_md_op_data(op_data); 2907 oit.it_create_mode &= ~M_CHECK_STALE; 2908 if (rc < 0) { 2909 rc = ll_inode_revalidate_fini(inode, rc); 2910 GOTO (out, rc); 2911 } 2912 2913 rc = ll_revalidate_it_finish(req, &oit, dentry); 2914 if (rc != 0) { 2915 ll_intent_release(&oit); 2916 GOTO(out, rc); 2917 } 2918 2919 /* Unlinked? Unhash dentry, so it is not picked up later by 2920 do_lookup() -> ll_revalidate_it(). We cannot use d_drop 2921 here to preserve get_cwd functionality on 2.6. 2922 Bug 10503 */ 2923 if (!dentry->d_inode->i_nlink) 2924 d_lustre_invalidate(dentry, 0); 2925 2926 ll_lookup_finish_locks(&oit, dentry); 2927 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) { 2928 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode); 2929 obd_valid valid = OBD_MD_FLGETATTR; 2930 struct md_op_data *op_data; 2931 int ealen = 0; 2932 2933 if (S_ISREG(inode->i_mode)) { 2934 rc = ll_get_max_mdsize(sbi, &ealen); 2935 if (rc) 2936 return rc; 2937 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE; 2938 } 2939 2940 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 2941 0, ealen, LUSTRE_OPC_ANY, 2942 NULL); 2943 if (IS_ERR(op_data)) 2944 return PTR_ERR(op_data); 2945 2946 op_data->op_valid = valid; 2947 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one 2948 * capa for this inode. Because we only keep capas of dirs 2949 * fresh. */ 2950 rc = md_getattr(sbi->ll_md_exp, op_data, &req); 2951 ll_finish_md_op_data(op_data); 2952 if (rc) { 2953 rc = ll_inode_revalidate_fini(inode, rc); 2954 return rc; 2955 } 2956 2957 rc = ll_prep_inode(&inode, req, NULL, NULL); 2958 } 2959out: 2960 ptlrpc_req_finished(req); 2961 return rc; 2962} 2963 2964int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it, 2965 __u64 ibits) 2966{ 2967 struct inode *inode = dentry->d_inode; 2968 int rc; 2969 2970 rc = __ll_inode_revalidate_it(dentry, it, ibits); 2971 if (rc != 0) 2972 return rc; 2973 2974 /* if object isn't regular file, don't validate size */ 2975 if (!S_ISREG(inode->i_mode)) { 2976 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime; 2977 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime; 2978 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime; 2979 } else { 2980 /* In case of restore, the MDT has the right size and has 2981 * already send it back without granting the layout lock, 2982 * inode is up-to-date so glimpse is useless. 2983 * Also to glimpse we need the layout, in case of a running 2984 * restore the MDT holds the layout lock so the glimpse will 2985 * block up to the end of restore (getattr will block) 2986 */ 2987 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING)) 2988 rc = ll_glimpse_size(inode); 2989 } 2990 return rc; 2991} 2992 2993int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, 2994 struct lookup_intent *it, struct kstat *stat) 2995{ 2996 struct inode *inode = de->d_inode; 2997 struct ll_sb_info *sbi = ll_i2sbi(inode); 2998 struct ll_inode_info *lli = ll_i2info(inode); 2999 int res = 0; 3000 3001 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE | 3002 MDS_INODELOCK_LOOKUP); 3003 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1); 3004 3005 if (res) 3006 return res; 3007 3008 stat->dev = inode->i_sb->s_dev; 3009 if (ll_need_32bit_api(sbi)) 3010 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1); 3011 else 3012 stat->ino = inode->i_ino; 3013 stat->mode = inode->i_mode; 3014 stat->nlink = inode->i_nlink; 3015 stat->uid = inode->i_uid; 3016 stat->gid = inode->i_gid; 3017 stat->rdev = inode->i_rdev; 3018 stat->atime = inode->i_atime; 3019 stat->mtime = inode->i_mtime; 3020 stat->ctime = inode->i_ctime; 3021 stat->blksize = 1 << inode->i_blkbits; 3022 3023 stat->size = i_size_read(inode); 3024 stat->blocks = inode->i_blocks; 3025 3026 return 0; 3027} 3028int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) 3029{ 3030 struct lookup_intent it = { .it_op = IT_GETATTR }; 3031 3032 return ll_getattr_it(mnt, de, &it, stat); 3033} 3034 3035int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 3036 __u64 start, __u64 len) 3037{ 3038 int rc; 3039 size_t num_bytes; 3040 struct ll_user_fiemap *fiemap; 3041 unsigned int extent_count = fieinfo->fi_extents_max; 3042 3043 num_bytes = sizeof(*fiemap) + (extent_count * 3044 sizeof(struct ll_fiemap_extent)); 3045 OBD_ALLOC_LARGE(fiemap, num_bytes); 3046 3047 if (fiemap == NULL) 3048 return -ENOMEM; 3049 3050 fiemap->fm_flags = fieinfo->fi_flags; 3051 fiemap->fm_extent_count = fieinfo->fi_extents_max; 3052 fiemap->fm_start = start; 3053 fiemap->fm_length = len; 3054 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start, 3055 sizeof(struct ll_fiemap_extent)); 3056 3057 rc = ll_do_fiemap(inode, fiemap, num_bytes); 3058 3059 fieinfo->fi_flags = fiemap->fm_flags; 3060 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents; 3061 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0], 3062 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent)); 3063 3064 OBD_FREE_LARGE(fiemap, num_bytes); 3065 return rc; 3066} 3067 3068struct posix_acl * ll_get_acl(struct inode *inode, int type) 3069{ 3070 struct ll_inode_info *lli = ll_i2info(inode); 3071 struct posix_acl *acl = NULL; 3072 3073 spin_lock(&lli->lli_lock); 3074 /* VFS' acl_permission_check->check_acl will release the refcount */ 3075 acl = posix_acl_dup(lli->lli_posix_acl); 3076 spin_unlock(&lli->lli_lock); 3077 3078 return acl; 3079} 3080 3081 3082int ll_inode_permission(struct inode *inode, int mask) 3083{ 3084 int rc = 0; 3085 3086#ifdef MAY_NOT_BLOCK 3087 if (mask & MAY_NOT_BLOCK) 3088 return -ECHILD; 3089#endif 3090 3091 /* as root inode are NOT getting validated in lookup operation, 3092 * need to do it before permission check. */ 3093 3094 if (inode == inode->i_sb->s_root->d_inode) { 3095 struct lookup_intent it = { .it_op = IT_LOOKUP }; 3096 3097 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it, 3098 MDS_INODELOCK_LOOKUP); 3099 if (rc) 3100 return rc; 3101 } 3102 3103 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n", 3104 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask); 3105 3106 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT) 3107 return lustre_check_remote_perm(inode, mask); 3108 3109 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1); 3110 rc = generic_permission(inode, mask); 3111 3112 return rc; 3113} 3114 3115/* -o localflock - only provides locally consistent flock locks */ 3116struct file_operations ll_file_operations = { 3117 .read = ll_file_read, 3118 .aio_read = ll_file_aio_read, 3119 .write = ll_file_write, 3120 .aio_write = ll_file_aio_write, 3121 .unlocked_ioctl = ll_file_ioctl, 3122 .open = ll_file_open, 3123 .release = ll_file_release, 3124 .mmap = ll_file_mmap, 3125 .llseek = ll_file_seek, 3126 .splice_read = ll_file_splice_read, 3127 .fsync = ll_fsync, 3128 .flush = ll_flush 3129}; 3130 3131struct file_operations ll_file_operations_flock = { 3132 .read = ll_file_read, 3133 .aio_read = ll_file_aio_read, 3134 .write = ll_file_write, 3135 .aio_write = ll_file_aio_write, 3136 .unlocked_ioctl = ll_file_ioctl, 3137 .open = ll_file_open, 3138 .release = ll_file_release, 3139 .mmap = ll_file_mmap, 3140 .llseek = ll_file_seek, 3141 .splice_read = ll_file_splice_read, 3142 .fsync = ll_fsync, 3143 .flush = ll_flush, 3144 .flock = ll_file_flock, 3145 .lock = ll_file_flock 3146}; 3147 3148/* These are for -o noflock - to return ENOSYS on flock calls */ 3149struct file_operations ll_file_operations_noflock = { 3150 .read = ll_file_read, 3151 .aio_read = ll_file_aio_read, 3152 .write = ll_file_write, 3153 .aio_write = ll_file_aio_write, 3154 .unlocked_ioctl = ll_file_ioctl, 3155 .open = ll_file_open, 3156 .release = ll_file_release, 3157 .mmap = ll_file_mmap, 3158 .llseek = ll_file_seek, 3159 .splice_read = ll_file_splice_read, 3160 .fsync = ll_fsync, 3161 .flush = ll_flush, 3162 .flock = ll_file_noflock, 3163 .lock = ll_file_noflock 3164}; 3165 3166struct inode_operations ll_file_inode_operations = { 3167 .setattr = ll_setattr, 3168 .getattr = ll_getattr, 3169 .permission = ll_inode_permission, 3170 .setxattr = ll_setxattr, 3171 .getxattr = ll_getxattr, 3172 .listxattr = ll_listxattr, 3173 .removexattr = ll_removexattr, 3174 .fiemap = ll_fiemap, 3175 .get_acl = ll_get_acl, 3176}; 3177 3178/* dynamic ioctl number support routins */ 3179static struct llioc_ctl_data { 3180 struct rw_semaphore ioc_sem; 3181 struct list_head ioc_head; 3182} llioc = { 3183 __RWSEM_INITIALIZER(llioc.ioc_sem), 3184 LIST_HEAD_INIT(llioc.ioc_head) 3185}; 3186 3187 3188struct llioc_data { 3189 struct list_head iocd_list; 3190 unsigned int iocd_size; 3191 llioc_callback_t iocd_cb; 3192 unsigned int iocd_count; 3193 unsigned int iocd_cmd[0]; 3194}; 3195 3196void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd) 3197{ 3198 unsigned int size; 3199 struct llioc_data *in_data = NULL; 3200 3201 if (cb == NULL || cmd == NULL || 3202 count > LLIOC_MAX_CMD || count < 0) 3203 return NULL; 3204 3205 size = sizeof(*in_data) + count * sizeof(unsigned int); 3206 OBD_ALLOC(in_data, size); 3207 if (in_data == NULL) 3208 return NULL; 3209 3210 memset(in_data, 0, sizeof(*in_data)); 3211 in_data->iocd_size = size; 3212 in_data->iocd_cb = cb; 3213 in_data->iocd_count = count; 3214 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count); 3215 3216 down_write(&llioc.ioc_sem); 3217 list_add_tail(&in_data->iocd_list, &llioc.ioc_head); 3218 up_write(&llioc.ioc_sem); 3219 3220 return in_data; 3221} 3222 3223void ll_iocontrol_unregister(void *magic) 3224{ 3225 struct llioc_data *tmp; 3226 3227 if (magic == NULL) 3228 return; 3229 3230 down_write(&llioc.ioc_sem); 3231 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) { 3232 if (tmp == magic) { 3233 unsigned int size = tmp->iocd_size; 3234 3235 list_del(&tmp->iocd_list); 3236 up_write(&llioc.ioc_sem); 3237 3238 OBD_FREE(tmp, size); 3239 return; 3240 } 3241 } 3242 up_write(&llioc.ioc_sem); 3243 3244 CWARN("didn't find iocontrol register block with magic: %p\n", magic); 3245} 3246 3247EXPORT_SYMBOL(ll_iocontrol_register); 3248EXPORT_SYMBOL(ll_iocontrol_unregister); 3249 3250enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file, 3251 unsigned int cmd, unsigned long arg, int *rcp) 3252{ 3253 enum llioc_iter ret = LLIOC_CONT; 3254 struct llioc_data *data; 3255 int rc = -EINVAL, i; 3256 3257 down_read(&llioc.ioc_sem); 3258 list_for_each_entry(data, &llioc.ioc_head, iocd_list) { 3259 for (i = 0; i < data->iocd_count; i++) { 3260 if (cmd != data->iocd_cmd[i]) 3261 continue; 3262 3263 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc); 3264 break; 3265 } 3266 3267 if (ret == LLIOC_STOP) 3268 break; 3269 } 3270 up_read(&llioc.ioc_sem); 3271 3272 if (rcp) 3273 *rcp = rc; 3274 return ret; 3275} 3276 3277int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf) 3278{ 3279 struct ll_inode_info *lli = ll_i2info(inode); 3280 struct cl_env_nest nest; 3281 struct lu_env *env; 3282 int result; 3283 3284 if (lli->lli_clob == NULL) 3285 return 0; 3286 3287 env = cl_env_nested_get(&nest); 3288 if (IS_ERR(env)) 3289 return PTR_ERR(env); 3290 3291 result = cl_conf_set(env, lli->lli_clob, conf); 3292 cl_env_nested_put(&nest, env); 3293 3294 if (conf->coc_opc == OBJECT_CONF_SET) { 3295 struct ldlm_lock *lock = conf->coc_lock; 3296 3297 LASSERT(lock != NULL); 3298 LASSERT(ldlm_has_layout(lock)); 3299 if (result == 0) { 3300 /* it can only be allowed to match after layout is 3301 * applied to inode otherwise false layout would be 3302 * seen. Applying layout shoud happen before dropping 3303 * the intent lock. */ 3304 ldlm_lock_allow_match(lock); 3305 } 3306 } 3307 return result; 3308} 3309 3310/* Fetch layout from MDT with getxattr request, if it's not ready yet */ 3311static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) 3312 3313{ 3314 struct ll_sb_info *sbi = ll_i2sbi(inode); 3315 struct obd_capa *oc; 3316 struct ptlrpc_request *req; 3317 struct mdt_body *body; 3318 void *lvbdata; 3319 void *lmm; 3320 int lmmsize; 3321 int rc; 3322 3323 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n", 3324 PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY), 3325 lock->l_lvb_data, lock->l_lvb_len); 3326 3327 if ((lock->l_lvb_data != NULL) && (lock->l_flags & LDLM_FL_LVB_READY)) 3328 return 0; 3329 3330 /* if layout lock was granted right away, the layout is returned 3331 * within DLM_LVB of dlm reply; otherwise if the lock was ever 3332 * blocked and then granted via completion ast, we have to fetch 3333 * layout here. Please note that we can't use the LVB buffer in 3334 * completion AST because it doesn't have a large enough buffer */ 3335 oc = ll_mdscapa_get(inode); 3336 rc = ll_get_max_mdsize(sbi, &lmmsize); 3337 if (rc == 0) 3338 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, 3339 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0, 3340 lmmsize, 0, &req); 3341 capa_put(oc); 3342 if (rc < 0) 3343 return rc; 3344 3345 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); 3346 if (body == NULL || body->eadatasize > lmmsize) 3347 GOTO(out, rc = -EPROTO); 3348 3349 lmmsize = body->eadatasize; 3350 if (lmmsize == 0) /* empty layout */ 3351 GOTO(out, rc = 0); 3352 3353 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize); 3354 if (lmm == NULL) 3355 GOTO(out, rc = -EFAULT); 3356 3357 OBD_ALLOC_LARGE(lvbdata, lmmsize); 3358 if (lvbdata == NULL) 3359 GOTO(out, rc = -ENOMEM); 3360 3361 memcpy(lvbdata, lmm, lmmsize); 3362 lock_res_and_lock(lock); 3363 if (lock->l_lvb_data != NULL) 3364 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len); 3365 3366 lock->l_lvb_data = lvbdata; 3367 lock->l_lvb_len = lmmsize; 3368 unlock_res_and_lock(lock); 3369 3370out: 3371 ptlrpc_req_finished(req); 3372 return rc; 3373} 3374 3375/** 3376 * Apply the layout to the inode. Layout lock is held and will be released 3377 * in this function. 3378 */ 3379static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode, 3380 struct inode *inode, __u32 *gen, bool reconf) 3381{ 3382 struct ll_inode_info *lli = ll_i2info(inode); 3383 struct ll_sb_info *sbi = ll_i2sbi(inode); 3384 struct ldlm_lock *lock; 3385 struct lustre_md md = { NULL }; 3386 struct cl_object_conf conf; 3387 int rc = 0; 3388 bool lvb_ready; 3389 bool wait_layout = false; 3390 3391 LASSERT(lustre_handle_is_used(lockh)); 3392 3393 lock = ldlm_handle2lock(lockh); 3394 LASSERT(lock != NULL); 3395 LASSERT(ldlm_has_layout(lock)); 3396 3397 LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n", 3398 inode, PFID(&lli->lli_fid), reconf); 3399 3400 /* in case this is a caching lock and reinstate with new inode */ 3401 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL); 3402 3403 lock_res_and_lock(lock); 3404 lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY); 3405 unlock_res_and_lock(lock); 3406 /* checking lvb_ready is racy but this is okay. The worst case is 3407 * that multi processes may configure the file on the same time. */ 3408 if (lvb_ready || !reconf) { 3409 rc = -ENODATA; 3410 if (lvb_ready) { 3411 /* layout_gen must be valid if layout lock is not 3412 * cancelled and stripe has already set */ 3413 *gen = lli->lli_layout_gen; 3414 rc = 0; 3415 } 3416 GOTO(out, rc); 3417 } 3418 3419 rc = ll_layout_fetch(inode, lock); 3420 if (rc < 0) 3421 GOTO(out, rc); 3422 3423 /* for layout lock, lmm is returned in lock's lvb. 3424 * lvb_data is immutable if the lock is held so it's safe to access it 3425 * without res lock. See the description in ldlm_lock_decref_internal() 3426 * for the condition to free lvb_data of layout lock */ 3427 if (lock->l_lvb_data != NULL) { 3428 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm, 3429 lock->l_lvb_data, lock->l_lvb_len); 3430 if (rc >= 0) { 3431 *gen = LL_LAYOUT_GEN_EMPTY; 3432 if (md.lsm != NULL) 3433 *gen = md.lsm->lsm_layout_gen; 3434 rc = 0; 3435 } else { 3436 CERROR("%s: file "DFID" unpackmd error: %d\n", 3437 ll_get_fsname(inode->i_sb, NULL, 0), 3438 PFID(&lli->lli_fid), rc); 3439 } 3440 } 3441 if (rc < 0) 3442 GOTO(out, rc); 3443 3444 /* set layout to file. Unlikely this will fail as old layout was 3445 * surely eliminated */ 3446 memset(&conf, 0, sizeof(conf)); 3447 conf.coc_opc = OBJECT_CONF_SET; 3448 conf.coc_inode = inode; 3449 conf.coc_lock = lock; 3450 conf.u.coc_md = &md; 3451 rc = ll_layout_conf(inode, &conf); 3452 3453 if (md.lsm != NULL) 3454 obd_free_memmd(sbi->ll_dt_exp, &md.lsm); 3455 3456 /* refresh layout failed, need to wait */ 3457 wait_layout = rc == -EBUSY; 3458 3459out: 3460 LDLM_LOCK_PUT(lock); 3461 ldlm_lock_decref(lockh, mode); 3462 3463 /* wait for IO to complete if it's still being used. */ 3464 if (wait_layout) { 3465 CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n", 3466 ll_get_fsname(inode->i_sb, NULL, 0), 3467 inode, PFID(&lli->lli_fid)); 3468 3469 memset(&conf, 0, sizeof(conf)); 3470 conf.coc_opc = OBJECT_CONF_WAIT; 3471 conf.coc_inode = inode; 3472 rc = ll_layout_conf(inode, &conf); 3473 if (rc == 0) 3474 rc = -EAGAIN; 3475 3476 CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n", 3477 PFID(&lli->lli_fid), rc); 3478 } 3479 return rc; 3480} 3481 3482/** 3483 * This function checks if there exists a LAYOUT lock on the client side, 3484 * or enqueues it if it doesn't have one in cache. 3485 * 3486 * This function will not hold layout lock so it may be revoked any time after 3487 * this function returns. Any operations depend on layout should be redone 3488 * in that case. 3489 * 3490 * This function should be called before lov_io_init() to get an uptodate 3491 * layout version, the caller should save the version number and after IO 3492 * is finished, this function should be called again to verify that layout 3493 * is not changed during IO time. 3494 */ 3495int ll_layout_refresh(struct inode *inode, __u32 *gen) 3496{ 3497 struct ll_inode_info *lli = ll_i2info(inode); 3498 struct ll_sb_info *sbi = ll_i2sbi(inode); 3499 struct md_op_data *op_data; 3500 struct lookup_intent it; 3501 struct lustre_handle lockh; 3502 ldlm_mode_t mode; 3503 struct ldlm_enqueue_info einfo = { 3504 .ei_type = LDLM_IBITS, 3505 .ei_mode = LCK_CR, 3506 .ei_cb_bl = ll_md_blocking_ast, 3507 .ei_cb_cp = ldlm_completion_ast, 3508 }; 3509 int rc; 3510 3511 *gen = lli->lli_layout_gen; 3512 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK)) 3513 return 0; 3514 3515 /* sanity checks */ 3516 LASSERT(fid_is_sane(ll_inode2fid(inode))); 3517 LASSERT(S_ISREG(inode->i_mode)); 3518 3519 /* mostly layout lock is caching on the local side, so try to match 3520 * it before grabbing layout lock mutex. */ 3521 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0, 3522 LCK_CR | LCK_CW | LCK_PR | LCK_PW); 3523 if (mode != 0) { /* hit cached lock */ 3524 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false); 3525 if (rc == 0) 3526 return 0; 3527 3528 /* better hold lli_layout_mutex to try again otherwise 3529 * it will have starvation problem. */ 3530 } 3531 3532 /* take layout lock mutex to enqueue layout lock exclusively. */ 3533 mutex_lock(&lli->lli_layout_mutex); 3534 3535again: 3536 /* try again. Maybe somebody else has done this. */ 3537 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0, 3538 LCK_CR | LCK_CW | LCK_PR | LCK_PW); 3539 if (mode != 0) { /* hit cached lock */ 3540 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true); 3541 if (rc == -EAGAIN) 3542 goto again; 3543 3544 mutex_unlock(&lli->lli_layout_mutex); 3545 return rc; 3546 } 3547 3548 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 3549 0, 0, LUSTRE_OPC_ANY, NULL); 3550 if (IS_ERR(op_data)) { 3551 mutex_unlock(&lli->lli_layout_mutex); 3552 return PTR_ERR(op_data); 3553 } 3554 3555 /* have to enqueue one */ 3556 memset(&it, 0, sizeof(it)); 3557 it.it_op = IT_LAYOUT; 3558 lockh.cookie = 0ULL; 3559 3560 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n", 3561 ll_get_fsname(inode->i_sb, NULL, 0), inode, 3562 PFID(&lli->lli_fid)); 3563 3564 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh, 3565 NULL, 0, NULL, 0); 3566 if (it.d.lustre.it_data != NULL) 3567 ptlrpc_req_finished(it.d.lustre.it_data); 3568 it.d.lustre.it_data = NULL; 3569 3570 ll_finish_md_op_data(op_data); 3571 3572 mode = it.d.lustre.it_lock_mode; 3573 it.d.lustre.it_lock_mode = 0; 3574 ll_intent_drop_lock(&it); 3575 3576 if (rc == 0) { 3577 /* set lock data in case this is a new lock */ 3578 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL); 3579 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true); 3580 if (rc == -EAGAIN) 3581 goto again; 3582 } 3583 mutex_unlock(&lli->lli_layout_mutex); 3584 3585 return rc; 3586} 3587 3588/** 3589 * This function send a restore request to the MDT 3590 */ 3591int ll_layout_restore(struct inode *inode) 3592{ 3593 struct hsm_user_request *hur; 3594 int len, rc; 3595 3596 len = sizeof(struct hsm_user_request) + 3597 sizeof(struct hsm_user_item); 3598 OBD_ALLOC(hur, len); 3599 if (hur == NULL) 3600 return -ENOMEM; 3601 3602 hur->hur_request.hr_action = HUA_RESTORE; 3603 hur->hur_request.hr_archive_id = 0; 3604 hur->hur_request.hr_flags = 0; 3605 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid, 3606 sizeof(hur->hur_user_item[0].hui_fid)); 3607 hur->hur_user_item[0].hui_extent.length = -1; 3608 hur->hur_request.hr_itemcount = 1; 3609 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp, 3610 len, hur, NULL); 3611 OBD_FREE(hur, len); 3612 return rc; 3613} 3614