file.c revision 8b0e330b7720a206339887044fa275bf537a5264
1/* 2 * linux/fs/file.c 3 * 4 * Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes 5 * 6 * Manage the dynamic fd arrays in the process files_struct. 7 */ 8 9#include <linux/fs.h> 10#include <linux/mm.h> 11#include <linux/time.h> 12#include <linux/slab.h> 13#include <linux/vmalloc.h> 14#include <linux/file.h> 15#include <linux/bitops.h> 16#include <linux/interrupt.h> 17#include <linux/spinlock.h> 18#include <linux/rcupdate.h> 19#include <linux/workqueue.h> 20 21struct fdtable_defer { 22 spinlock_t lock; 23 struct work_struct wq; 24 struct timer_list timer; 25 struct fdtable *next; 26}; 27 28/* 29 * We use this list to defer free fdtables that have vmalloced 30 * sets/arrays. By keeping a per-cpu list, we avoid having to embed 31 * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in 32 * this per-task structure. 33 */ 34static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); 35 36 37/* 38 * Allocate an fd array, using kmalloc or vmalloc. 39 * Note: the array isn't cleared at allocation time. 40 */ 41struct file ** alloc_fd_array(int num) 42{ 43 struct file **new_fds; 44 int size = num * sizeof(struct file *); 45 46 if (size <= PAGE_SIZE) 47 new_fds = (struct file **) kmalloc(size, GFP_KERNEL); 48 else 49 new_fds = (struct file **) vmalloc(size); 50 return new_fds; 51} 52 53void free_fd_array(struct file **array, int num) 54{ 55 int size = num * sizeof(struct file *); 56 57 if (!array) { 58 printk (KERN_ERR "free_fd_array: array = 0 (num = %d)\n", num); 59 return; 60 } 61 62 if (num <= NR_OPEN_DEFAULT) /* Don't free the embedded fd array! */ 63 return; 64 else if (size <= PAGE_SIZE) 65 kfree(array); 66 else 67 vfree(array); 68} 69 70static void __free_fdtable(struct fdtable *fdt) 71{ 72 free_fdset(fdt->open_fds, fdt->max_fdset); 73 free_fdset(fdt->close_on_exec, fdt->max_fdset); 74 free_fd_array(fdt->fd, fdt->max_fds); 75 kfree(fdt); 76} 77 78static void fdtable_timer(unsigned long data) 79{ 80 struct fdtable_defer *fddef = (struct fdtable_defer *)data; 81 82 spin_lock(&fddef->lock); 83 /* 84 * If someone already emptied the queue return. 85 */ 86 if (!fddef->next) 87 goto out; 88 if (!schedule_work(&fddef->wq)) 89 mod_timer(&fddef->timer, 5); 90out: 91 spin_unlock(&fddef->lock); 92} 93 94static void free_fdtable_work(struct fdtable_defer *f) 95{ 96 struct fdtable *fdt; 97 98 spin_lock_bh(&f->lock); 99 fdt = f->next; 100 f->next = NULL; 101 spin_unlock_bh(&f->lock); 102 while(fdt) { 103 struct fdtable *next = fdt->next; 104 __free_fdtable(fdt); 105 fdt = next; 106 } 107} 108 109static void free_fdtable_rcu(struct rcu_head *rcu) 110{ 111 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); 112 int fdset_size, fdarray_size; 113 struct fdtable_defer *fddef; 114 115 BUG_ON(!fdt); 116 fdset_size = fdt->max_fdset / 8; 117 fdarray_size = fdt->max_fds * sizeof(struct file *); 118 119 if (fdt->free_files) { 120 /* 121 * The this fdtable was embedded in the files structure 122 * and the files structure itself was getting destroyed. 123 * It is now safe to free the files structure. 124 */ 125 kmem_cache_free(files_cachep, fdt->free_files); 126 return; 127 } 128 if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE && 129 fdt->max_fds <= NR_OPEN_DEFAULT) { 130 /* 131 * The fdtable was embedded 132 */ 133 return; 134 } 135 if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) { 136 kfree(fdt->open_fds); 137 kfree(fdt->close_on_exec); 138 kfree(fdt->fd); 139 kfree(fdt); 140 } else { 141 fddef = &get_cpu_var(fdtable_defer_list); 142 spin_lock(&fddef->lock); 143 fdt->next = fddef->next; 144 fddef->next = fdt; 145 /* 146 * vmallocs are handled from the workqueue context. 147 * If the per-cpu workqueue is running, then we 148 * defer work scheduling through a timer. 149 */ 150 if (!schedule_work(&fddef->wq)) 151 mod_timer(&fddef->timer, 5); 152 spin_unlock(&fddef->lock); 153 put_cpu_var(fdtable_defer_list); 154 } 155} 156 157void free_fdtable(struct fdtable *fdt) 158{ 159 if (fdt->free_files || 160 fdt->max_fdset > EMBEDDED_FD_SET_SIZE || 161 fdt->max_fds > NR_OPEN_DEFAULT) 162 call_rcu(&fdt->rcu, free_fdtable_rcu); 163} 164 165/* 166 * Expand the fdset in the files_struct. Called with the files spinlock 167 * held for write. 168 */ 169static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt) 170{ 171 int i; 172 int count; 173 174 BUG_ON(nfdt->max_fdset < fdt->max_fdset); 175 BUG_ON(nfdt->max_fds < fdt->max_fds); 176 /* Copy the existing tables and install the new pointers */ 177 178 i = fdt->max_fdset / (sizeof(unsigned long) * 8); 179 count = (nfdt->max_fdset - fdt->max_fdset) / 8; 180 181 /* 182 * Don't copy the entire array if the current fdset is 183 * not yet initialised. 184 */ 185 if (i) { 186 memcpy (nfdt->open_fds, fdt->open_fds, 187 fdt->max_fdset/8); 188 memcpy (nfdt->close_on_exec, fdt->close_on_exec, 189 fdt->max_fdset/8); 190 memset (&nfdt->open_fds->fds_bits[i], 0, count); 191 memset (&nfdt->close_on_exec->fds_bits[i], 0, count); 192 } 193 194 /* Don't copy/clear the array if we are creating a new 195 fd array for fork() */ 196 if (fdt->max_fds) { 197 memcpy(nfdt->fd, fdt->fd, 198 fdt->max_fds * sizeof(struct file *)); 199 /* clear the remainder of the array */ 200 memset(&nfdt->fd[fdt->max_fds], 0, 201 (nfdt->max_fds - fdt->max_fds) * 202 sizeof(struct file *)); 203 } 204} 205 206/* 207 * Allocate an fdset array, using kmalloc or vmalloc. 208 * Note: the array isn't cleared at allocation time. 209 */ 210fd_set * alloc_fdset(int num) 211{ 212 fd_set *new_fdset; 213 int size = num / 8; 214 215 if (size <= PAGE_SIZE) 216 new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL); 217 else 218 new_fdset = (fd_set *) vmalloc(size); 219 return new_fdset; 220} 221 222void free_fdset(fd_set *array, int num) 223{ 224 if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */ 225 return; 226 else if (num <= 8 * PAGE_SIZE) 227 kfree(array); 228 else 229 vfree(array); 230} 231 232static struct fdtable *alloc_fdtable(int nr) 233{ 234 struct fdtable *fdt = NULL; 235 int nfds = 0; 236 fd_set *new_openset = NULL, *new_execset = NULL; 237 struct file **new_fds; 238 239 fdt = kzalloc(sizeof(*fdt), GFP_KERNEL); 240 if (!fdt) 241 goto out; 242 243 nfds = max_t(int, 8 * L1_CACHE_BYTES, roundup_pow_of_two(nr + 1)); 244 if (nfds > NR_OPEN) 245 nfds = NR_OPEN; 246 247 new_openset = alloc_fdset(nfds); 248 new_execset = alloc_fdset(nfds); 249 if (!new_openset || !new_execset) 250 goto out; 251 fdt->open_fds = new_openset; 252 fdt->close_on_exec = new_execset; 253 fdt->max_fdset = nfds; 254 255 nfds = NR_OPEN_DEFAULT; 256 /* 257 * Expand to the max in easy steps, and keep expanding it until 258 * we have enough for the requested fd array size. 259 */ 260 do { 261#if NR_OPEN_DEFAULT < 256 262 if (nfds < 256) 263 nfds = 256; 264 else 265#endif 266 if (nfds < (PAGE_SIZE / sizeof(struct file *))) 267 nfds = PAGE_SIZE / sizeof(struct file *); 268 else { 269 nfds = nfds * 2; 270 if (nfds > NR_OPEN) 271 nfds = NR_OPEN; 272 } 273 } while (nfds <= nr); 274 new_fds = alloc_fd_array(nfds); 275 if (!new_fds) 276 goto out2; 277 fdt->fd = new_fds; 278 fdt->max_fds = nfds; 279 fdt->free_files = NULL; 280 return fdt; 281out2: 282 nfds = fdt->max_fdset; 283out: 284 free_fdset(new_openset, nfds); 285 free_fdset(new_execset, nfds); 286 kfree(fdt); 287 return NULL; 288} 289 290/* 291 * Expands the file descriptor table - it will allocate a new fdtable and 292 * both fd array and fdset. It is expected to be called with the 293 * files_lock held. 294 */ 295static int expand_fdtable(struct files_struct *files, int nr) 296 __releases(files->file_lock) 297 __acquires(files->file_lock) 298{ 299 int error = 0; 300 struct fdtable *fdt; 301 struct fdtable *nfdt = NULL; 302 303 spin_unlock(&files->file_lock); 304 nfdt = alloc_fdtable(nr); 305 if (!nfdt) { 306 error = -ENOMEM; 307 spin_lock(&files->file_lock); 308 goto out; 309 } 310 311 spin_lock(&files->file_lock); 312 fdt = files_fdtable(files); 313 /* 314 * Check again since another task may have expanded the 315 * fd table while we dropped the lock 316 */ 317 if (nr >= fdt->max_fds || nr >= fdt->max_fdset) { 318 copy_fdtable(nfdt, fdt); 319 } else { 320 /* Somebody expanded while we dropped file_lock */ 321 spin_unlock(&files->file_lock); 322 __free_fdtable(nfdt); 323 spin_lock(&files->file_lock); 324 goto out; 325 } 326 rcu_assign_pointer(files->fdt, nfdt); 327 free_fdtable(fdt); 328out: 329 return error; 330} 331 332/* 333 * Expand files. 334 * Return <0 on error; 0 nothing done; 1 files expanded, we may have blocked. 335 * Should be called with the files->file_lock spinlock held for write. 336 */ 337int expand_files(struct files_struct *files, int nr) 338{ 339 int err, expand = 0; 340 struct fdtable *fdt; 341 342 fdt = files_fdtable(files); 343 if (nr >= fdt->max_fdset || nr >= fdt->max_fds) { 344 if (fdt->max_fdset >= NR_OPEN || 345 fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) { 346 err = -EMFILE; 347 goto out; 348 } 349 expand = 1; 350 if ((err = expand_fdtable(files, nr))) 351 goto out; 352 } 353 err = expand; 354out: 355 return err; 356} 357 358static void __devinit fdtable_defer_list_init(int cpu) 359{ 360 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); 361 spin_lock_init(&fddef->lock); 362 INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef); 363 init_timer(&fddef->timer); 364 fddef->timer.data = (unsigned long)fddef; 365 fddef->timer.function = fdtable_timer; 366 fddef->next = NULL; 367} 368 369void __init files_defer_init(void) 370{ 371 int i; 372 for_each_possible_cpu(i) 373 fdtable_defer_list_init(i); 374} 375