filemap_xip.c revision 870f481793b585323fbda3e87c54efc116f46351
1/* 2 * linux/mm/filemap_xip.c 3 * 4 * Copyright (C) 2005 IBM Corporation 5 * Author: Carsten Otte <cotte@de.ibm.com> 6 * 7 * derived from linux/mm/filemap.c - Copyright (C) Linus Torvalds 8 * 9 */ 10 11#include <linux/fs.h> 12#include <linux/pagemap.h> 13#include <linux/module.h> 14#include <linux/uio.h> 15#include <linux/rmap.h> 16#include <asm/tlbflush.h> 17#include "filemap.h" 18 19/* 20 * This is a file read routine for execute in place files, and uses 21 * the mapping->a_ops->get_xip_page() function for the actual low-level 22 * stuff. 23 * 24 * Note the struct file* is not used at all. It may be NULL. 25 */ 26static void 27do_xip_mapping_read(struct address_space *mapping, 28 struct file_ra_state *_ra, 29 struct file *filp, 30 loff_t *ppos, 31 read_descriptor_t *desc, 32 read_actor_t actor) 33{ 34 struct inode *inode = mapping->host; 35 unsigned long index, end_index, offset; 36 loff_t isize; 37 38 BUG_ON(!mapping->a_ops->get_xip_page); 39 40 index = *ppos >> PAGE_CACHE_SHIFT; 41 offset = *ppos & ~PAGE_CACHE_MASK; 42 43 isize = i_size_read(inode); 44 if (!isize) 45 goto out; 46 47 end_index = (isize - 1) >> PAGE_CACHE_SHIFT; 48 for (;;) { 49 struct page *page; 50 unsigned long nr, ret; 51 52 /* nr is the maximum number of bytes to copy from this page */ 53 nr = PAGE_CACHE_SIZE; 54 if (index >= end_index) { 55 if (index > end_index) 56 goto out; 57 nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; 58 if (nr <= offset) { 59 goto out; 60 } 61 } 62 nr = nr - offset; 63 64 page = mapping->a_ops->get_xip_page(mapping, 65 index*(PAGE_SIZE/512), 0); 66 if (!page) 67 goto no_xip_page; 68 if (unlikely(IS_ERR(page))) { 69 if (PTR_ERR(page) == -ENODATA) { 70 /* sparse */ 71 page = ZERO_PAGE(0); 72 } else { 73 desc->error = PTR_ERR(page); 74 goto out; 75 } 76 } 77 78 /* If users can be writing to this page using arbitrary 79 * virtual addresses, take care about potential aliasing 80 * before reading the page on the kernel side. 81 */ 82 if (mapping_writably_mapped(mapping)) 83 flush_dcache_page(page); 84 85 /* 86 * Ok, we have the page, so now we can copy it to user space... 87 * 88 * The actor routine returns how many bytes were actually used.. 89 * NOTE! This may not be the same as how much of a user buffer 90 * we filled up (we may be padding etc), so we can only update 91 * "pos" here (the actor routine has to update the user buffer 92 * pointers and the remaining count). 93 */ 94 ret = actor(desc, page, offset, nr); 95 offset += ret; 96 index += offset >> PAGE_CACHE_SHIFT; 97 offset &= ~PAGE_CACHE_MASK; 98 99 if (ret == nr && desc->count) 100 continue; 101 goto out; 102 103no_xip_page: 104 /* Did not get the page. Report it */ 105 desc->error = -EIO; 106 goto out; 107 } 108 109out: 110 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; 111 if (filp) 112 file_accessed(filp); 113} 114 115ssize_t 116xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 117{ 118 read_descriptor_t desc; 119 120 if (!access_ok(VERIFY_WRITE, buf, len)) 121 return -EFAULT; 122 123 desc.written = 0; 124 desc.arg.buf = buf; 125 desc.count = len; 126 desc.error = 0; 127 128 do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp, 129 ppos, &desc, file_read_actor); 130 131 if (desc.written) 132 return desc.written; 133 else 134 return desc.error; 135} 136EXPORT_SYMBOL_GPL(xip_file_read); 137 138ssize_t 139xip_file_sendfile(struct file *in_file, loff_t *ppos, 140 size_t count, read_actor_t actor, void *target) 141{ 142 read_descriptor_t desc; 143 144 if (!count) 145 return 0; 146 147 desc.written = 0; 148 desc.count = count; 149 desc.arg.data = target; 150 desc.error = 0; 151 152 do_xip_mapping_read(in_file->f_mapping, &in_file->f_ra, in_file, 153 ppos, &desc, actor); 154 if (desc.written) 155 return desc.written; 156 return desc.error; 157} 158EXPORT_SYMBOL_GPL(xip_file_sendfile); 159 160/* 161 * __xip_unmap is invoked from xip_unmap and 162 * xip_write 163 * 164 * This function walks all vmas of the address_space and unmaps the 165 * ZERO_PAGE when found at pgoff. Should it go in rmap.c? 166 */ 167static void 168__xip_unmap (struct address_space * mapping, 169 unsigned long pgoff) 170{ 171 struct vm_area_struct *vma; 172 struct mm_struct *mm; 173 struct prio_tree_iter iter; 174 unsigned long address; 175 pte_t *pte; 176 pte_t pteval; 177 spinlock_t *ptl; 178 struct page *page; 179 180 spin_lock(&mapping->i_mmap_lock); 181 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 182 mm = vma->vm_mm; 183 address = vma->vm_start + 184 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 185 BUG_ON(address < vma->vm_start || address >= vma->vm_end); 186 page = ZERO_PAGE(address); 187 pte = page_check_address(page, mm, address, &ptl); 188 if (pte) { 189 /* Nuke the page table entry. */ 190 flush_cache_page(vma, address, pte_pfn(*pte)); 191 pteval = ptep_clear_flush(vma, address, pte); 192 page_remove_rmap(page); 193 dec_mm_counter(mm, file_rss); 194 BUG_ON(pte_dirty(pteval)); 195 pte_unmap_unlock(pte, ptl); 196 page_cache_release(page); 197 } 198 } 199 spin_unlock(&mapping->i_mmap_lock); 200} 201 202/* 203 * xip_nopage() is invoked via the vma operations vector for a 204 * mapped memory region to read in file data during a page fault. 205 * 206 * This function is derived from filemap_nopage, but used for execute in place 207 */ 208static struct page * 209xip_file_nopage(struct vm_area_struct * area, 210 unsigned long address, 211 int *type) 212{ 213 struct file *file = area->vm_file; 214 struct address_space *mapping = file->f_mapping; 215 struct inode *inode = mapping->host; 216 struct page *page; 217 unsigned long size, pgoff, endoff; 218 219 pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) 220 + area->vm_pgoff; 221 endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) 222 + area->vm_pgoff; 223 224 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 225 if (pgoff >= size) { 226 return NULL; 227 } 228 229 page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); 230 if (!IS_ERR(page)) { 231 goto out; 232 } 233 if (PTR_ERR(page) != -ENODATA) 234 return NULL; 235 236 /* sparse block */ 237 if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && 238 (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) && 239 (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { 240 /* maybe shared writable, allocate new block */ 241 page = mapping->a_ops->get_xip_page (mapping, 242 pgoff*(PAGE_SIZE/512), 1); 243 if (IS_ERR(page)) 244 return NULL; 245 /* unmap page at pgoff from all other vmas */ 246 __xip_unmap(mapping, pgoff); 247 } else { 248 /* not shared and writable, use ZERO_PAGE() */ 249 page = ZERO_PAGE(address); 250 } 251 252out: 253 page_cache_get(page); 254 return page; 255} 256 257static struct vm_operations_struct xip_file_vm_ops = { 258 .nopage = xip_file_nopage, 259}; 260 261int xip_file_mmap(struct file * file, struct vm_area_struct * vma) 262{ 263 BUG_ON(!file->f_mapping->a_ops->get_xip_page); 264 265 file_accessed(file); 266 vma->vm_ops = &xip_file_vm_ops; 267 return 0; 268} 269EXPORT_SYMBOL_GPL(xip_file_mmap); 270 271static ssize_t 272__xip_file_write(struct file *filp, const char __user *buf, 273 size_t count, loff_t pos, loff_t *ppos) 274{ 275 struct address_space * mapping = filp->f_mapping; 276 struct address_space_operations *a_ops = mapping->a_ops; 277 struct inode *inode = mapping->host; 278 long status = 0; 279 struct page *page; 280 size_t bytes; 281 ssize_t written = 0; 282 283 BUG_ON(!mapping->a_ops->get_xip_page); 284 285 do { 286 unsigned long index; 287 unsigned long offset; 288 size_t copied; 289 290 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ 291 index = pos >> PAGE_CACHE_SHIFT; 292 bytes = PAGE_CACHE_SIZE - offset; 293 if (bytes > count) 294 bytes = count; 295 296 /* 297 * Bring in the user page that we will copy from _first_. 298 * Otherwise there's a nasty deadlock on copying from the 299 * same page as we're writing to, without it being marked 300 * up-to-date. 301 */ 302 fault_in_pages_readable(buf, bytes); 303 304 page = a_ops->get_xip_page(mapping, 305 index*(PAGE_SIZE/512), 0); 306 if (IS_ERR(page) && (PTR_ERR(page) == -ENODATA)) { 307 /* we allocate a new page unmap it */ 308 page = a_ops->get_xip_page(mapping, 309 index*(PAGE_SIZE/512), 1); 310 if (!IS_ERR(page)) 311 /* unmap page at pgoff from all other vmas */ 312 __xip_unmap(mapping, index); 313 } 314 315 if (IS_ERR(page)) { 316 status = PTR_ERR(page); 317 break; 318 } 319 320 copied = filemap_copy_from_user(page, offset, buf, bytes); 321 flush_dcache_page(page); 322 if (likely(copied > 0)) { 323 status = copied; 324 325 if (status >= 0) { 326 written += status; 327 count -= status; 328 pos += status; 329 buf += status; 330 } 331 } 332 if (unlikely(copied != bytes)) 333 if (status >= 0) 334 status = -EFAULT; 335 if (status < 0) 336 break; 337 } while (count); 338 *ppos = pos; 339 /* 340 * No need to use i_size_read() here, the i_size 341 * cannot change under us because we hold i_mutex. 342 */ 343 if (pos > inode->i_size) { 344 i_size_write(inode, pos); 345 mark_inode_dirty(inode); 346 } 347 348 return written ? written : status; 349} 350 351ssize_t 352xip_file_write(struct file *filp, const char __user *buf, size_t len, 353 loff_t *ppos) 354{ 355 struct address_space *mapping = filp->f_mapping; 356 struct inode *inode = mapping->host; 357 size_t count; 358 loff_t pos; 359 ssize_t ret; 360 361 mutex_lock(&inode->i_mutex); 362 363 if (!access_ok(VERIFY_READ, buf, len)) { 364 ret=-EFAULT; 365 goto out_up; 366 } 367 368 pos = *ppos; 369 count = len; 370 371 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 372 373 /* We can write back this queue in page reclaim */ 374 current->backing_dev_info = mapping->backing_dev_info; 375 376 ret = generic_write_checks(filp, &pos, &count, S_ISBLK(inode->i_mode)); 377 if (ret) 378 goto out_backing; 379 if (count == 0) 380 goto out_backing; 381 382 ret = remove_suid(filp->f_dentry); 383 if (ret) 384 goto out_backing; 385 386 file_update_time(filp); 387 388 ret = __xip_file_write (filp, buf, count, pos, ppos); 389 390 out_backing: 391 current->backing_dev_info = NULL; 392 out_up: 393 mutex_unlock(&inode->i_mutex); 394 return ret; 395} 396EXPORT_SYMBOL_GPL(xip_file_write); 397 398/* 399 * truncate a page used for execute in place 400 * functionality is analog to block_truncate_page but does use get_xip_page 401 * to get the page instead of page cache 402 */ 403int 404xip_truncate_page(struct address_space *mapping, loff_t from) 405{ 406 pgoff_t index = from >> PAGE_CACHE_SHIFT; 407 unsigned offset = from & (PAGE_CACHE_SIZE-1); 408 unsigned blocksize; 409 unsigned length; 410 struct page *page; 411 void *kaddr; 412 413 BUG_ON(!mapping->a_ops->get_xip_page); 414 415 blocksize = 1 << mapping->host->i_blkbits; 416 length = offset & (blocksize - 1); 417 418 /* Block boundary? Nothing to do */ 419 if (!length) 420 return 0; 421 422 length = blocksize - length; 423 424 page = mapping->a_ops->get_xip_page(mapping, 425 index*(PAGE_SIZE/512), 0); 426 if (!page) 427 return -ENOMEM; 428 if (unlikely(IS_ERR(page))) { 429 if (PTR_ERR(page) == -ENODATA) 430 /* Hole? No need to truncate */ 431 return 0; 432 else 433 return PTR_ERR(page); 434 } 435 kaddr = kmap_atomic(page, KM_USER0); 436 memset(kaddr + offset, 0, length); 437 kunmap_atomic(kaddr, KM_USER0); 438 439 flush_dcache_page(page); 440 return 0; 441} 442EXPORT_SYMBOL_GPL(xip_truncate_page); 443