ipath_file_ops.c revision d21c95c569c462da20d491b75d0a45bd70ddc1bf
1/* 2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <linux/pci.h> 35#include <linux/poll.h> 36#include <linux/cdev.h> 37#include <linux/swap.h> 38#include <linux/vmalloc.h> 39#include <linux/highmem.h> 40#include <linux/io.h> 41#include <linux/jiffies.h> 42#include <asm/pgtable.h> 43 44#include "ipath_kernel.h" 45#include "ipath_common.h" 46#include "ipath_user_sdma.h" 47 48static int ipath_open(struct inode *, struct file *); 49static int ipath_close(struct inode *, struct file *); 50static ssize_t ipath_write(struct file *, const char __user *, size_t, 51 loff_t *); 52static ssize_t ipath_writev(struct kiocb *, const struct iovec *, 53 unsigned long , loff_t); 54static unsigned int ipath_poll(struct file *, struct poll_table_struct *); 55static int ipath_mmap(struct file *, struct vm_area_struct *); 56 57static const struct file_operations ipath_file_ops = { 58 .owner = THIS_MODULE, 59 .write = ipath_write, 60 .aio_write = ipath_writev, 61 .open = ipath_open, 62 .release = ipath_close, 63 .poll = ipath_poll, 64 .mmap = ipath_mmap 65}; 66 67/* 68 * Convert kernel virtual addresses to physical addresses so they don't 69 * potentially conflict with the chip addresses used as mmap offsets. 70 * It doesn't really matter what mmap offset we use as long as we can 71 * interpret it correctly. 72 */ 73static u64 cvt_kvaddr(void *p) 74{ 75 struct page *page; 76 u64 paddr = 0; 77 78 page = vmalloc_to_page(p); 79 if (page) 80 paddr = page_to_pfn(page) << PAGE_SHIFT; 81 82 return paddr; 83} 84 85static int ipath_get_base_info(struct file *fp, 86 void __user *ubase, size_t ubase_size) 87{ 88 struct ipath_portdata *pd = port_fp(fp); 89 int ret = 0; 90 struct ipath_base_info *kinfo = NULL; 91 struct ipath_devdata *dd = pd->port_dd; 92 unsigned subport_cnt; 93 int shared, master; 94 size_t sz; 95 96 subport_cnt = pd->port_subport_cnt; 97 if (!subport_cnt) { 98 shared = 0; 99 master = 0; 100 subport_cnt = 1; 101 } else { 102 shared = 1; 103 master = !subport_fp(fp); 104 } 105 106 sz = sizeof(*kinfo); 107 /* If port sharing is not requested, allow the old size structure */ 108 if (!shared) 109 sz -= 7 * sizeof(u64); 110 if (ubase_size < sz) { 111 ipath_cdbg(PROC, 112 "Base size %zu, need %zu (version mismatch?)\n", 113 ubase_size, sz); 114 ret = -EINVAL; 115 goto bail; 116 } 117 118 kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL); 119 if (kinfo == NULL) { 120 ret = -ENOMEM; 121 goto bail; 122 } 123 124 ret = dd->ipath_f_get_base_info(pd, kinfo); 125 if (ret < 0) 126 goto bail; 127 128 kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt; 129 kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize; 130 kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt; 131 kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize; 132 /* 133 * have to mmap whole thing 134 */ 135 kinfo->spi_rcv_egrbuftotlen = 136 pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; 137 kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk; 138 kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen / 139 pd->port_rcvegrbuf_chunks; 140 kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt; 141 if (master) 142 kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt; 143 /* 144 * for this use, may be ipath_cfgports summed over all chips that 145 * are are configured and present 146 */ 147 kinfo->spi_nports = dd->ipath_cfgports; 148 /* unit (chip/board) our port is on */ 149 kinfo->spi_unit = dd->ipath_unit; 150 /* for now, only a single page */ 151 kinfo->spi_tid_maxsize = PAGE_SIZE; 152 153 /* 154 * Doing this per port, and based on the skip value, etc. This has 155 * to be the actual buffer size, since the protocol code treats it 156 * as an array. 157 * 158 * These have to be set to user addresses in the user code via mmap. 159 * These values are used on return to user code for the mmap target 160 * addresses only. For 32 bit, same 44 bit address problem, so use 161 * the physical address, not virtual. Before 2.6.11, using the 162 * page_address() macro worked, but in 2.6.11, even that returns the 163 * full 64 bit address (upper bits all 1's). So far, using the 164 * physical addresses (or chip offsets, for chip mapping) works, but 165 * no doubt some future kernel release will change that, and we'll be 166 * on to yet another method of dealing with this. 167 */ 168 kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys; 169 kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys; 170 kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys; 171 kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys; 172 kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + 173 (void *) dd->ipath_statusp - 174 (void *) dd->ipath_pioavailregs_dma; 175 if (!shared) { 176 kinfo->spi_piocnt = pd->port_piocnt; 177 kinfo->spi_piobufbase = (u64) pd->port_piobufs; 178 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + 179 dd->ipath_ureg_align * pd->port_port; 180 } else if (master) { 181 kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) + 182 (pd->port_piocnt % subport_cnt); 183 /* Master's PIO buffers are after all the slave's */ 184 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 185 dd->ipath_palign * 186 (pd->port_piocnt - kinfo->spi_piocnt); 187 } else { 188 unsigned slave = subport_fp(fp) - 1; 189 190 kinfo->spi_piocnt = pd->port_piocnt / subport_cnt; 191 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 192 dd->ipath_palign * kinfo->spi_piocnt * slave; 193 } 194 195 if (shared) { 196 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + 197 dd->ipath_ureg_align * pd->port_port; 198 kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs; 199 kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base; 200 kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr; 201 202 kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase + 203 PAGE_SIZE * subport_fp(fp)); 204 205 kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base + 206 pd->port_rcvhdrq_size * subport_fp(fp)); 207 kinfo->spi_rcvhdr_tailaddr = 0; 208 kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf + 209 pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size * 210 subport_fp(fp)); 211 212 kinfo->spi_subport_uregbase = 213 cvt_kvaddr(pd->subport_uregbase); 214 kinfo->spi_subport_rcvegrbuf = 215 cvt_kvaddr(pd->subport_rcvegrbuf); 216 kinfo->spi_subport_rcvhdr_base = 217 cvt_kvaddr(pd->subport_rcvhdr_base); 218 ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n", 219 kinfo->spi_port, kinfo->spi_runtime_flags, 220 (unsigned long long) kinfo->spi_subport_uregbase, 221 (unsigned long long) kinfo->spi_subport_rcvegrbuf, 222 (unsigned long long) kinfo->spi_subport_rcvhdr_base); 223 } 224 225 kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / 226 dd->ipath_palign; 227 kinfo->spi_pioalign = dd->ipath_palign; 228 229 kinfo->spi_qpair = IPATH_KD_QP; 230 /* 231 * user mode PIO buffers are always 2KB, even when 4KB can 232 * be received, and sent via the kernel; this is ibmaxlen 233 * for 2K MTU. 234 */ 235 kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32); 236 kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */ 237 kinfo->spi_port = pd->port_port; 238 kinfo->spi_subport = subport_fp(fp); 239 kinfo->spi_sw_version = IPATH_KERN_SWVERSION; 240 kinfo->spi_hw_version = dd->ipath_revision; 241 242 if (master) { 243 kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; 244 } 245 246 sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo); 247 if (copy_to_user(ubase, kinfo, sz)) 248 ret = -EFAULT; 249 250bail: 251 kfree(kinfo); 252 return ret; 253} 254 255/** 256 * ipath_tid_update - update a port TID 257 * @pd: the port 258 * @fp: the ipath device file 259 * @ti: the TID information 260 * 261 * The new implementation as of Oct 2004 is that the driver assigns 262 * the tid and returns it to the caller. To make it easier to 263 * catch bugs, and to reduce search time, we keep a cursor for 264 * each port, walking the shadow tid array to find one that's not 265 * in use. 266 * 267 * For now, if we can't allocate the full list, we fail, although 268 * in the long run, we'll allocate as many as we can, and the 269 * caller will deal with that by trying the remaining pages later. 270 * That means that when we fail, we have to mark the tids as not in 271 * use again, in our shadow copy. 272 * 273 * It's up to the caller to free the tids when they are done. 274 * We'll unlock the pages as they free them. 275 * 276 * Also, right now we are locking one page at a time, but since 277 * the intended use of this routine is for a single group of 278 * virtually contiguous pages, that should change to improve 279 * performance. 280 */ 281static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp, 282 const struct ipath_tid_info *ti) 283{ 284 int ret = 0, ntids; 285 u32 tid, porttid, cnt, i, tidcnt, tidoff; 286 u16 *tidlist; 287 struct ipath_devdata *dd = pd->port_dd; 288 u64 physaddr; 289 unsigned long vaddr; 290 u64 __iomem *tidbase; 291 unsigned long tidmap[8]; 292 struct page **pagep = NULL; 293 unsigned subport = subport_fp(fp); 294 295 if (!dd->ipath_pageshadow) { 296 ret = -ENOMEM; 297 goto done; 298 } 299 300 cnt = ti->tidcnt; 301 if (!cnt) { 302 ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n", 303 (unsigned long long) ti->tidlist); 304 /* 305 * Should we treat as success? likely a bug 306 */ 307 ret = -EFAULT; 308 goto done; 309 } 310 porttid = pd->port_port * dd->ipath_rcvtidcnt; 311 if (!pd->port_subport_cnt) { 312 tidcnt = dd->ipath_rcvtidcnt; 313 tid = pd->port_tidcursor; 314 tidoff = 0; 315 } else if (!subport) { 316 tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + 317 (dd->ipath_rcvtidcnt % pd->port_subport_cnt); 318 tidoff = dd->ipath_rcvtidcnt - tidcnt; 319 porttid += tidoff; 320 tid = tidcursor_fp(fp); 321 } else { 322 tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; 323 tidoff = tidcnt * (subport - 1); 324 porttid += tidoff; 325 tid = tidcursor_fp(fp); 326 } 327 if (cnt > tidcnt) { 328 /* make sure it all fits in port_tid_pg_list */ 329 dev_info(&dd->pcidev->dev, "Process tried to allocate %u " 330 "TIDs, only trying max (%u)\n", cnt, tidcnt); 331 cnt = tidcnt; 332 } 333 pagep = &((struct page **) pd->port_tid_pg_list)[tidoff]; 334 tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff]; 335 336 memset(tidmap, 0, sizeof(tidmap)); 337 /* before decrement; chip actual # */ 338 ntids = tidcnt; 339 tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) + 340 dd->ipath_rcvtidbase + 341 porttid * sizeof(*tidbase)); 342 343 ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n", 344 pd->port_port, cnt, tid, tidbase); 345 346 /* virtual address of first page in transfer */ 347 vaddr = ti->tidvaddr; 348 if (!access_ok(VERIFY_WRITE, (void __user *) vaddr, 349 cnt * PAGE_SIZE)) { 350 ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n", 351 (void *)vaddr, cnt); 352 ret = -EFAULT; 353 goto done; 354 } 355 ret = ipath_get_user_pages(vaddr, cnt, pagep); 356 if (ret) { 357 if (ret == -EBUSY) { 358 ipath_dbg("Failed to lock addr %p, %u pages " 359 "(already locked)\n", 360 (void *) vaddr, cnt); 361 /* 362 * for now, continue, and see what happens but with 363 * the new implementation, this should never happen, 364 * unless perhaps the user has mpin'ed the pages 365 * themselves (something we need to test) 366 */ 367 ret = 0; 368 } else { 369 dev_info(&dd->pcidev->dev, 370 "Failed to lock addr %p, %u pages: " 371 "errno %d\n", (void *) vaddr, cnt, -ret); 372 goto done; 373 } 374 } 375 for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) { 376 for (; ntids--; tid++) { 377 if (tid == tidcnt) 378 tid = 0; 379 if (!dd->ipath_pageshadow[porttid + tid]) 380 break; 381 } 382 if (ntids < 0) { 383 /* 384 * oops, wrapped all the way through their TIDs, 385 * and didn't have enough free; see comments at 386 * start of routine 387 */ 388 ipath_dbg("Not enough free TIDs for %u pages " 389 "(index %d), failing\n", cnt, i); 390 i--; /* last tidlist[i] not filled in */ 391 ret = -ENOMEM; 392 break; 393 } 394 tidlist[i] = tid + tidoff; 395 ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, " 396 "vaddr %lx\n", i, tid + tidoff, vaddr); 397 /* we "know" system pages and TID pages are same size */ 398 dd->ipath_pageshadow[porttid + tid] = pagep[i]; 399 dd->ipath_physshadow[porttid + tid] = ipath_map_page( 400 dd->pcidev, pagep[i], 0, PAGE_SIZE, 401 PCI_DMA_FROMDEVICE); 402 /* 403 * don't need atomic or it's overhead 404 */ 405 __set_bit(tid, tidmap); 406 physaddr = dd->ipath_physshadow[porttid + tid]; 407 ipath_stats.sps_pagelocks++; 408 ipath_cdbg(VERBOSE, 409 "TID %u, vaddr %lx, physaddr %llx pgp %p\n", 410 tid, vaddr, (unsigned long long) physaddr, 411 pagep[i]); 412 dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED, 413 physaddr); 414 /* 415 * don't check this tid in ipath_portshadow, since we 416 * just filled it in; start with the next one. 417 */ 418 tid++; 419 } 420 421 if (ret) { 422 u32 limit; 423 cleanup: 424 /* jump here if copy out of updated info failed... */ 425 ipath_dbg("After failure (ret=%d), undo %d of %d entries\n", 426 -ret, i, cnt); 427 /* same code that's in ipath_free_tid() */ 428 limit = sizeof(tidmap) * BITS_PER_BYTE; 429 if (limit > tidcnt) 430 /* just in case size changes in future */ 431 limit = tidcnt; 432 tid = find_first_bit((const unsigned long *)tidmap, limit); 433 for (; tid < limit; tid++) { 434 if (!test_bit(tid, tidmap)) 435 continue; 436 if (dd->ipath_pageshadow[porttid + tid]) { 437 ipath_cdbg(VERBOSE, "Freeing TID %u\n", 438 tid); 439 dd->ipath_f_put_tid(dd, &tidbase[tid], 440 RCVHQ_RCV_TYPE_EXPECTED, 441 dd->ipath_tidinvalid); 442 pci_unmap_page(dd->pcidev, 443 dd->ipath_physshadow[porttid + tid], 444 PAGE_SIZE, PCI_DMA_FROMDEVICE); 445 dd->ipath_pageshadow[porttid + tid] = NULL; 446 ipath_stats.sps_pageunlocks++; 447 } 448 } 449 ipath_release_user_pages(pagep, cnt); 450 } else { 451 /* 452 * Copy the updated array, with ipath_tid's filled in, back 453 * to user. Since we did the copy in already, this "should 454 * never fail" If it does, we have to clean up... 455 */ 456 if (copy_to_user((void __user *) 457 (unsigned long) ti->tidlist, 458 tidlist, cnt * sizeof(*tidlist))) { 459 ret = -EFAULT; 460 goto cleanup; 461 } 462 if (copy_to_user((void __user *) (unsigned long) ti->tidmap, 463 tidmap, sizeof tidmap)) { 464 ret = -EFAULT; 465 goto cleanup; 466 } 467 if (tid == tidcnt) 468 tid = 0; 469 if (!pd->port_subport_cnt) 470 pd->port_tidcursor = tid; 471 else 472 tidcursor_fp(fp) = tid; 473 } 474 475done: 476 if (ret) 477 ipath_dbg("Failed to map %u TID pages, failing with %d\n", 478 ti->tidcnt, -ret); 479 return ret; 480} 481 482/** 483 * ipath_tid_free - free a port TID 484 * @pd: the port 485 * @subport: the subport 486 * @ti: the TID info 487 * 488 * right now we are unlocking one page at a time, but since 489 * the intended use of this routine is for a single group of 490 * virtually contiguous pages, that should change to improve 491 * performance. We check that the TID is in range for this port 492 * but otherwise don't check validity; if user has an error and 493 * frees the wrong tid, it's only their own data that can thereby 494 * be corrupted. We do check that the TID was in use, for sanity 495 * We always use our idea of the saved address, not the address that 496 * they pass in to us. 497 */ 498 499static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, 500 const struct ipath_tid_info *ti) 501{ 502 int ret = 0; 503 u32 tid, porttid, cnt, limit, tidcnt; 504 struct ipath_devdata *dd = pd->port_dd; 505 u64 __iomem *tidbase; 506 unsigned long tidmap[8]; 507 508 if (!dd->ipath_pageshadow) { 509 ret = -ENOMEM; 510 goto done; 511 } 512 513 if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap, 514 sizeof tidmap)) { 515 ret = -EFAULT; 516 goto done; 517 } 518 519 porttid = pd->port_port * dd->ipath_rcvtidcnt; 520 if (!pd->port_subport_cnt) 521 tidcnt = dd->ipath_rcvtidcnt; 522 else if (!subport) { 523 tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + 524 (dd->ipath_rcvtidcnt % pd->port_subport_cnt); 525 porttid += dd->ipath_rcvtidcnt - tidcnt; 526 } else { 527 tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; 528 porttid += tidcnt * (subport - 1); 529 } 530 tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + 531 dd->ipath_rcvtidbase + 532 porttid * sizeof(*tidbase)); 533 534 limit = sizeof(tidmap) * BITS_PER_BYTE; 535 if (limit > tidcnt) 536 /* just in case size changes in future */ 537 limit = tidcnt; 538 tid = find_first_bit(tidmap, limit); 539 ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) " 540 "set is %d, porttid %u\n", pd->port_port, ti->tidcnt, 541 limit, tid, porttid); 542 for (cnt = 0; tid < limit; tid++) { 543 /* 544 * small optimization; if we detect a run of 3 or so without 545 * any set, use find_first_bit again. That's mainly to 546 * accelerate the case where we wrapped, so we have some at 547 * the beginning, and some at the end, and a big gap 548 * in the middle. 549 */ 550 if (!test_bit(tid, tidmap)) 551 continue; 552 cnt++; 553 if (dd->ipath_pageshadow[porttid + tid]) { 554 struct page *p; 555 p = dd->ipath_pageshadow[porttid + tid]; 556 dd->ipath_pageshadow[porttid + tid] = NULL; 557 ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", 558 pid_nr(pd->port_pid), tid); 559 dd->ipath_f_put_tid(dd, &tidbase[tid], 560 RCVHQ_RCV_TYPE_EXPECTED, 561 dd->ipath_tidinvalid); 562 pci_unmap_page(dd->pcidev, 563 dd->ipath_physshadow[porttid + tid], 564 PAGE_SIZE, PCI_DMA_FROMDEVICE); 565 ipath_release_user_pages(&p, 1); 566 ipath_stats.sps_pageunlocks++; 567 } else 568 ipath_dbg("Unused tid %u, ignoring\n", tid); 569 } 570 if (cnt != ti->tidcnt) 571 ipath_dbg("passed in tidcnt %d, only %d bits set in map\n", 572 ti->tidcnt, cnt); 573done: 574 if (ret) 575 ipath_dbg("Failed to unmap %u TID pages, failing with %d\n", 576 ti->tidcnt, -ret); 577 return ret; 578} 579 580/** 581 * ipath_set_part_key - set a partition key 582 * @pd: the port 583 * @key: the key 584 * 585 * We can have up to 4 active at a time (other than the default, which is 586 * always allowed). This is somewhat tricky, since multiple ports may set 587 * the same key, so we reference count them, and clean up at exit. All 4 588 * partition keys are packed into a single infinipath register. It's an 589 * error for a process to set the same pkey multiple times. We provide no 590 * mechanism to de-allocate a pkey at this time, we may eventually need to 591 * do that. I've used the atomic operations, and no locking, and only make 592 * a single pass through what's available. This should be more than 593 * adequate for some time. I'll think about spinlocks or the like if and as 594 * it's necessary. 595 */ 596static int ipath_set_part_key(struct ipath_portdata *pd, u16 key) 597{ 598 struct ipath_devdata *dd = pd->port_dd; 599 int i, any = 0, pidx = -1; 600 u16 lkey = key & 0x7FFF; 601 int ret; 602 603 if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) { 604 /* nothing to do; this key always valid */ 605 ret = 0; 606 goto bail; 607 } 608 609 ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys " 610 "%hx:%x %hx:%x %hx:%x %hx:%x\n", 611 pd->port_port, key, dd->ipath_pkeys[0], 612 atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1], 613 atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2], 614 atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3], 615 atomic_read(&dd->ipath_pkeyrefs[3])); 616 617 if (!lkey) { 618 ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n", 619 pd->port_port); 620 ret = -EINVAL; 621 goto bail; 622 } 623 624 /* 625 * Set the full membership bit, because it has to be 626 * set in the register or the packet, and it seems 627 * cleaner to set in the register than to force all 628 * callers to set it. (see bug 4331) 629 */ 630 key |= 0x8000; 631 632 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { 633 if (!pd->port_pkeys[i] && pidx == -1) 634 pidx = i; 635 if (pd->port_pkeys[i] == key) { 636 ipath_cdbg(VERBOSE, "p%u tries to set same pkey " 637 "(%x) more than once\n", 638 pd->port_port, key); 639 ret = -EEXIST; 640 goto bail; 641 } 642 } 643 if (pidx == -1) { 644 ipath_dbg("All pkeys for port %u already in use, " 645 "can't set %x\n", pd->port_port, key); 646 ret = -EBUSY; 647 goto bail; 648 } 649 for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { 650 if (!dd->ipath_pkeys[i]) { 651 any++; 652 continue; 653 } 654 if (dd->ipath_pkeys[i] == key) { 655 atomic_t *pkrefs = &dd->ipath_pkeyrefs[i]; 656 657 if (atomic_inc_return(pkrefs) > 1) { 658 pd->port_pkeys[pidx] = key; 659 ipath_cdbg(VERBOSE, "p%u set key %x " 660 "matches #%d, count now %d\n", 661 pd->port_port, key, i, 662 atomic_read(pkrefs)); 663 ret = 0; 664 goto bail; 665 } else { 666 /* 667 * lost race, decrement count, catch below 668 */ 669 atomic_dec(pkrefs); 670 ipath_cdbg(VERBOSE, "Lost race, count was " 671 "0, after dec, it's %d\n", 672 atomic_read(pkrefs)); 673 any++; 674 } 675 } 676 if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { 677 /* 678 * It makes no sense to have both the limited and 679 * full membership PKEY set at the same time since 680 * the unlimited one will disable the limited one. 681 */ 682 ret = -EEXIST; 683 goto bail; 684 } 685 } 686 if (!any) { 687 ipath_dbg("port %u, all pkeys already in use, " 688 "can't set %x\n", pd->port_port, key); 689 ret = -EBUSY; 690 goto bail; 691 } 692 for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { 693 if (!dd->ipath_pkeys[i] && 694 atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { 695 u64 pkey; 696 697 /* for ipathstats, etc. */ 698 ipath_stats.sps_pkeys[i] = lkey; 699 pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key; 700 pkey = 701 (u64) dd->ipath_pkeys[0] | 702 ((u64) dd->ipath_pkeys[1] << 16) | 703 ((u64) dd->ipath_pkeys[2] << 32) | 704 ((u64) dd->ipath_pkeys[3] << 48); 705 ipath_cdbg(PROC, "p%u set key %x in #%d, " 706 "portidx %d, new pkey reg %llx\n", 707 pd->port_port, key, i, pidx, 708 (unsigned long long) pkey); 709 ipath_write_kreg( 710 dd, dd->ipath_kregs->kr_partitionkey, pkey); 711 712 ret = 0; 713 goto bail; 714 } 715 } 716 ipath_dbg("port %u, all pkeys already in use 2nd pass, " 717 "can't set %x\n", pd->port_port, key); 718 ret = -EBUSY; 719 720bail: 721 return ret; 722} 723 724/** 725 * ipath_manage_rcvq - manage a port's receive queue 726 * @pd: the port 727 * @subport: the subport 728 * @start_stop: action to carry out 729 * 730 * start_stop == 0 disables receive on the port, for use in queue 731 * overflow conditions. start_stop==1 re-enables, to be used to 732 * re-init the software copy of the head register 733 */ 734static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, 735 int start_stop) 736{ 737 struct ipath_devdata *dd = pd->port_dd; 738 739 ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n", 740 start_stop ? "en" : "dis", dd->ipath_unit, 741 pd->port_port, subport); 742 if (subport) 743 goto bail; 744 /* atomically clear receive enable port. */ 745 if (start_stop) { 746 /* 747 * On enable, force in-memory copy of the tail register to 748 * 0, so that protocol code doesn't have to worry about 749 * whether or not the chip has yet updated the in-memory 750 * copy or not on return from the system call. The chip 751 * always resets it's tail register back to 0 on a 752 * transition from disabled to enabled. This could cause a 753 * problem if software was broken, and did the enable w/o 754 * the disable, but eventually the in-memory copy will be 755 * updated and correct itself, even in the face of software 756 * bugs. 757 */ 758 if (pd->port_rcvhdrtail_kvaddr) 759 ipath_clear_rcvhdrtail(pd); 760 set_bit(dd->ipath_r_portenable_shift + pd->port_port, 761 &dd->ipath_rcvctrl); 762 } else 763 clear_bit(dd->ipath_r_portenable_shift + pd->port_port, 764 &dd->ipath_rcvctrl); 765 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 766 dd->ipath_rcvctrl); 767 /* now be sure chip saw it before we return */ 768 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 769 if (start_stop) { 770 /* 771 * And try to be sure that tail reg update has happened too. 772 * This should in theory interlock with the RXE changes to 773 * the tail register. Don't assign it to the tail register 774 * in memory copy, since we could overwrite an update by the 775 * chip if we did. 776 */ 777 ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); 778 } 779 /* always; new head should be equal to new tail; see above */ 780bail: 781 return 0; 782} 783 784static void ipath_clean_part_key(struct ipath_portdata *pd, 785 struct ipath_devdata *dd) 786{ 787 int i, j, pchanged = 0; 788 u64 oldpkey; 789 790 /* for debugging only */ 791 oldpkey = (u64) dd->ipath_pkeys[0] | 792 ((u64) dd->ipath_pkeys[1] << 16) | 793 ((u64) dd->ipath_pkeys[2] << 32) | 794 ((u64) dd->ipath_pkeys[3] << 48); 795 796 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { 797 if (!pd->port_pkeys[i]) 798 continue; 799 ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i, 800 pd->port_pkeys[i]); 801 for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) { 802 /* check for match independent of the global bit */ 803 if ((dd->ipath_pkeys[j] & 0x7fff) != 804 (pd->port_pkeys[i] & 0x7fff)) 805 continue; 806 if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) { 807 ipath_cdbg(VERBOSE, "p%u clear key " 808 "%x matches #%d\n", 809 pd->port_port, 810 pd->port_pkeys[i], j); 811 ipath_stats.sps_pkeys[j] = 812 dd->ipath_pkeys[j] = 0; 813 pchanged++; 814 } 815 else ipath_cdbg( 816 VERBOSE, "p%u key %x matches #%d, " 817 "but ref still %d\n", pd->port_port, 818 pd->port_pkeys[i], j, 819 atomic_read(&dd->ipath_pkeyrefs[j])); 820 break; 821 } 822 pd->port_pkeys[i] = 0; 823 } 824 if (pchanged) { 825 u64 pkey = (u64) dd->ipath_pkeys[0] | 826 ((u64) dd->ipath_pkeys[1] << 16) | 827 ((u64) dd->ipath_pkeys[2] << 32) | 828 ((u64) dd->ipath_pkeys[3] << 48); 829 ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, " 830 "new pkey reg %llx\n", pd->port_port, 831 (unsigned long long) oldpkey, 832 (unsigned long long) pkey); 833 ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, 834 pkey); 835 } 836} 837 838/* 839 * Initialize the port data with the receive buffer sizes 840 * so this can be done while the master port is locked. 841 * Otherwise, there is a race with a slave opening the port 842 * and seeing these fields uninitialized. 843 */ 844static void init_user_egr_sizes(struct ipath_portdata *pd) 845{ 846 struct ipath_devdata *dd = pd->port_dd; 847 unsigned egrperchunk, egrcnt, size; 848 849 /* 850 * to avoid wasting a lot of memory, we allocate 32KB chunks of 851 * physically contiguous memory, advance through it until used up 852 * and then allocate more. Of course, we need memory to store those 853 * extra pointers, now. Started out with 256KB, but under heavy 854 * memory pressure (creating large files and then copying them over 855 * NFS while doing lots of MPI jobs), we hit some allocation 856 * failures, even though we can sleep... (2.6.10) Still get 857 * failures at 64K. 32K is the lowest we can go without wasting 858 * additional memory. 859 */ 860 size = 0x8000; 861 egrperchunk = size / dd->ipath_rcvegrbufsize; 862 egrcnt = dd->ipath_rcvegrcnt; 863 pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk; 864 pd->port_rcvegrbufs_perchunk = egrperchunk; 865 pd->port_rcvegrbuf_size = size; 866} 867 868/** 869 * ipath_create_user_egr - allocate eager TID buffers 870 * @pd: the port to allocate TID buffers for 871 * 872 * This routine is now quite different for user and kernel, because 873 * the kernel uses skb's, for the accelerated network performance 874 * This is the user port version 875 * 876 * Allocate the eager TID buffers and program them into infinipath 877 * They are no longer completely contiguous, we do multiple allocation 878 * calls. 879 */ 880static int ipath_create_user_egr(struct ipath_portdata *pd) 881{ 882 struct ipath_devdata *dd = pd->port_dd; 883 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; 884 size_t size; 885 int ret; 886 gfp_t gfp_flags; 887 888 /* 889 * GFP_USER, but without GFP_FS, so buffer cache can be 890 * coalesced (we hope); otherwise, even at order 4, 891 * heavy filesystem activity makes these fail, and we can 892 * use compound pages. 893 */ 894 gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; 895 896 egrcnt = dd->ipath_rcvegrcnt; 897 /* TID number offset for this port */ 898 egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt; 899 egrsize = dd->ipath_rcvegrbufsize; 900 ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " 901 "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); 902 903 chunk = pd->port_rcvegrbuf_chunks; 904 egrperchunk = pd->port_rcvegrbufs_perchunk; 905 size = pd->port_rcvegrbuf_size; 906 pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]), 907 GFP_KERNEL); 908 if (!pd->port_rcvegrbuf) { 909 ret = -ENOMEM; 910 goto bail; 911 } 912 pd->port_rcvegrbuf_phys = 913 kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]), 914 GFP_KERNEL); 915 if (!pd->port_rcvegrbuf_phys) { 916 ret = -ENOMEM; 917 goto bail_rcvegrbuf; 918 } 919 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { 920 921 pd->port_rcvegrbuf[e] = dma_alloc_coherent( 922 &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e], 923 gfp_flags); 924 925 if (!pd->port_rcvegrbuf[e]) { 926 ret = -ENOMEM; 927 goto bail_rcvegrbuf_phys; 928 } 929 } 930 931 pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0]; 932 933 for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) { 934 dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk]; 935 unsigned i; 936 937 for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { 938 dd->ipath_f_put_tid(dd, e + egroff + 939 (u64 __iomem *) 940 ((char __iomem *) 941 dd->ipath_kregbase + 942 dd->ipath_rcvegrbase), 943 RCVHQ_RCV_TYPE_EAGER, pa); 944 pa += egrsize; 945 } 946 cond_resched(); /* don't hog the cpu */ 947 } 948 949 ret = 0; 950 goto bail; 951 952bail_rcvegrbuf_phys: 953 for (e = 0; e < pd->port_rcvegrbuf_chunks && 954 pd->port_rcvegrbuf[e]; e++) { 955 dma_free_coherent(&dd->pcidev->dev, size, 956 pd->port_rcvegrbuf[e], 957 pd->port_rcvegrbuf_phys[e]); 958 959 } 960 kfree(pd->port_rcvegrbuf_phys); 961 pd->port_rcvegrbuf_phys = NULL; 962bail_rcvegrbuf: 963 kfree(pd->port_rcvegrbuf); 964 pd->port_rcvegrbuf = NULL; 965bail: 966 return ret; 967} 968 969 970/* common code for the mappings on dma_alloc_coherent mem */ 971static int ipath_mmap_mem(struct vm_area_struct *vma, 972 struct ipath_portdata *pd, unsigned len, int write_ok, 973 void *kvaddr, char *what) 974{ 975 struct ipath_devdata *dd = pd->port_dd; 976 unsigned long pfn; 977 int ret; 978 979 if ((vma->vm_end - vma->vm_start) > len) { 980 dev_info(&dd->pcidev->dev, 981 "FAIL on %s: len %lx > %x\n", what, 982 vma->vm_end - vma->vm_start, len); 983 ret = -EFAULT; 984 goto bail; 985 } 986 987 if (!write_ok) { 988 if (vma->vm_flags & VM_WRITE) { 989 dev_info(&dd->pcidev->dev, 990 "%s must be mapped readonly\n", what); 991 ret = -EPERM; 992 goto bail; 993 } 994 995 /* don't allow them to later change with mprotect */ 996 vma->vm_flags &= ~VM_MAYWRITE; 997 } 998 999 pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT; 1000 ret = remap_pfn_range(vma, vma->vm_start, pfn, 1001 len, vma->vm_page_prot); 1002 if (ret) 1003 dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x " 1004 "bytes r%c failed: %d\n", what, pd->port_port, 1005 pfn, len, write_ok?'w':'o', ret); 1006 else 1007 ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes " 1008 "r%c\n", what, pd->port_port, pfn, len, 1009 write_ok?'w':'o'); 1010bail: 1011 return ret; 1012} 1013 1014static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd, 1015 u64 ureg) 1016{ 1017 unsigned long phys; 1018 int ret; 1019 1020 /* 1021 * This is real hardware, so use io_remap. This is the mechanism 1022 * for the user process to update the head registers for their port 1023 * in the chip. 1024 */ 1025 if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { 1026 dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen " 1027 "%lx > PAGE\n", vma->vm_end - vma->vm_start); 1028 ret = -EFAULT; 1029 } else { 1030 phys = dd->ipath_physaddr + ureg; 1031 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1032 1033 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; 1034 ret = io_remap_pfn_range(vma, vma->vm_start, 1035 phys >> PAGE_SHIFT, 1036 vma->vm_end - vma->vm_start, 1037 vma->vm_page_prot); 1038 } 1039 return ret; 1040} 1041 1042static int mmap_piobufs(struct vm_area_struct *vma, 1043 struct ipath_devdata *dd, 1044 struct ipath_portdata *pd, 1045 unsigned piobufs, unsigned piocnt) 1046{ 1047 unsigned long phys; 1048 int ret; 1049 1050 /* 1051 * When we map the PIO buffers in the chip, we want to map them as 1052 * writeonly, no read possible. This prevents access to previous 1053 * process data, and catches users who might try to read the i/o 1054 * space due to a bug. 1055 */ 1056 if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) { 1057 dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: " 1058 "reqlen %lx > PAGE\n", 1059 vma->vm_end - vma->vm_start); 1060 ret = -EINVAL; 1061 goto bail; 1062 } 1063 1064 phys = dd->ipath_physaddr + piobufs; 1065 1066#if defined(__powerpc__) 1067 /* There isn't a generic way to specify writethrough mappings */ 1068 pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; 1069 pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU; 1070 pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED; 1071#endif 1072 1073 /* 1074 * don't allow them to later change to readable with mprotect (for when 1075 * not initially mapped readable, as is normally the case) 1076 */ 1077 vma->vm_flags &= ~VM_MAYREAD; 1078 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; 1079 1080 ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, 1081 vma->vm_end - vma->vm_start, 1082 vma->vm_page_prot); 1083bail: 1084 return ret; 1085} 1086 1087static int mmap_rcvegrbufs(struct vm_area_struct *vma, 1088 struct ipath_portdata *pd) 1089{ 1090 struct ipath_devdata *dd = pd->port_dd; 1091 unsigned long start, size; 1092 size_t total_size, i; 1093 unsigned long pfn; 1094 int ret; 1095 1096 size = pd->port_rcvegrbuf_size; 1097 total_size = pd->port_rcvegrbuf_chunks * size; 1098 if ((vma->vm_end - vma->vm_start) > total_size) { 1099 dev_info(&dd->pcidev->dev, "FAIL on egr bufs: " 1100 "reqlen %lx > actual %lx\n", 1101 vma->vm_end - vma->vm_start, 1102 (unsigned long) total_size); 1103 ret = -EINVAL; 1104 goto bail; 1105 } 1106 1107 if (vma->vm_flags & VM_WRITE) { 1108 dev_info(&dd->pcidev->dev, "Can't map eager buffers as " 1109 "writable (flags=%lx)\n", vma->vm_flags); 1110 ret = -EPERM; 1111 goto bail; 1112 } 1113 /* don't allow them to later change to writeable with mprotect */ 1114 vma->vm_flags &= ~VM_MAYWRITE; 1115 1116 start = vma->vm_start; 1117 1118 for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) { 1119 pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT; 1120 ret = remap_pfn_range(vma, start, pfn, size, 1121 vma->vm_page_prot); 1122 if (ret < 0) 1123 goto bail; 1124 } 1125 ret = 0; 1126 1127bail: 1128 return ret; 1129} 1130 1131/* 1132 * ipath_file_vma_fault - handle a VMA page fault. 1133 */ 1134static int ipath_file_vma_fault(struct vm_area_struct *vma, 1135 struct vm_fault *vmf) 1136{ 1137 struct page *page; 1138 1139 page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); 1140 if (!page) 1141 return VM_FAULT_SIGBUS; 1142 get_page(page); 1143 vmf->page = page; 1144 1145 return 0; 1146} 1147 1148static struct vm_operations_struct ipath_file_vm_ops = { 1149 .fault = ipath_file_vma_fault, 1150}; 1151 1152static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, 1153 struct ipath_portdata *pd, unsigned subport) 1154{ 1155 unsigned long len; 1156 struct ipath_devdata *dd; 1157 void *addr; 1158 size_t size; 1159 int ret = 0; 1160 1161 /* If the port is not shared, all addresses should be physical */ 1162 if (!pd->port_subport_cnt) 1163 goto bail; 1164 1165 dd = pd->port_dd; 1166 size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; 1167 1168 /* 1169 * Each process has all the subport uregbase, rcvhdrq, and 1170 * rcvegrbufs mmapped - as an array for all the processes, 1171 * and also separately for this process. 1172 */ 1173 if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) { 1174 addr = pd->subport_uregbase; 1175 size = PAGE_SIZE * pd->port_subport_cnt; 1176 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) { 1177 addr = pd->subport_rcvhdr_base; 1178 size = pd->port_rcvhdrq_size * pd->port_subport_cnt; 1179 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) { 1180 addr = pd->subport_rcvegrbuf; 1181 size *= pd->port_subport_cnt; 1182 } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase + 1183 PAGE_SIZE * subport)) { 1184 addr = pd->subport_uregbase + PAGE_SIZE * subport; 1185 size = PAGE_SIZE; 1186 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base + 1187 pd->port_rcvhdrq_size * subport)) { 1188 addr = pd->subport_rcvhdr_base + 1189 pd->port_rcvhdrq_size * subport; 1190 size = pd->port_rcvhdrq_size; 1191 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf + 1192 size * subport)) { 1193 addr = pd->subport_rcvegrbuf + size * subport; 1194 /* rcvegrbufs are read-only on the slave */ 1195 if (vma->vm_flags & VM_WRITE) { 1196 dev_info(&dd->pcidev->dev, 1197 "Can't map eager buffers as " 1198 "writable (flags=%lx)\n", vma->vm_flags); 1199 ret = -EPERM; 1200 goto bail; 1201 } 1202 /* 1203 * Don't allow permission to later change to writeable 1204 * with mprotect. 1205 */ 1206 vma->vm_flags &= ~VM_MAYWRITE; 1207 } else { 1208 goto bail; 1209 } 1210 len = vma->vm_end - vma->vm_start; 1211 if (len > size) { 1212 ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size); 1213 ret = -EINVAL; 1214 goto bail; 1215 } 1216 1217 vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; 1218 vma->vm_ops = &ipath_file_vm_ops; 1219 vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; 1220 ret = 1; 1221 1222bail: 1223 return ret; 1224} 1225 1226/** 1227 * ipath_mmap - mmap various structures into user space 1228 * @fp: the file pointer 1229 * @vma: the VM area 1230 * 1231 * We use this to have a shared buffer between the kernel and the user code 1232 * for the rcvhdr queue, egr buffers, and the per-port user regs and pio 1233 * buffers in the chip. We have the open and close entries so we can bump 1234 * the ref count and keep the driver from being unloaded while still mapped. 1235 */ 1236static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) 1237{ 1238 struct ipath_portdata *pd; 1239 struct ipath_devdata *dd; 1240 u64 pgaddr, ureg; 1241 unsigned piobufs, piocnt; 1242 int ret; 1243 1244 pd = port_fp(fp); 1245 if (!pd) { 1246 ret = -EINVAL; 1247 goto bail; 1248 } 1249 dd = pd->port_dd; 1250 1251 /* 1252 * This is the ipath_do_user_init() code, mapping the shared buffers 1253 * into the user process. The address referred to by vm_pgoff is the 1254 * file offset passed via mmap(). For shared ports, this is the 1255 * kernel vmalloc() address of the pages to share with the master. 1256 * For non-shared or master ports, this is a physical address. 1257 * We only do one mmap for each space mapped. 1258 */ 1259 pgaddr = vma->vm_pgoff << PAGE_SHIFT; 1260 1261 /* 1262 * Check for 0 in case one of the allocations failed, but user 1263 * called mmap anyway. 1264 */ 1265 if (!pgaddr) { 1266 ret = -EINVAL; 1267 goto bail; 1268 } 1269 1270 ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n", 1271 (unsigned long long) pgaddr, vma->vm_start, 1272 vma->vm_end - vma->vm_start, dd->ipath_unit, 1273 pd->port_port, subport_fp(fp)); 1274 1275 /* 1276 * Physical addresses must fit in 40 bits for our hardware. 1277 * Check for kernel virtual addresses first, anything else must 1278 * match a HW or memory address. 1279 */ 1280 ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); 1281 if (ret) { 1282 if (ret > 0) 1283 ret = 0; 1284 goto bail; 1285 } 1286 1287 ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; 1288 if (!pd->port_subport_cnt) { 1289 /* port is not shared */ 1290 piocnt = pd->port_piocnt; 1291 piobufs = pd->port_piobufs; 1292 } else if (!subport_fp(fp)) { 1293 /* caller is the master */ 1294 piocnt = (pd->port_piocnt / pd->port_subport_cnt) + 1295 (pd->port_piocnt % pd->port_subport_cnt); 1296 piobufs = pd->port_piobufs + 1297 dd->ipath_palign * (pd->port_piocnt - piocnt); 1298 } else { 1299 unsigned slave = subport_fp(fp) - 1; 1300 1301 /* caller is a slave */ 1302 piocnt = pd->port_piocnt / pd->port_subport_cnt; 1303 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; 1304 } 1305 1306 if (pgaddr == ureg) 1307 ret = mmap_ureg(vma, dd, ureg); 1308 else if (pgaddr == piobufs) 1309 ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt); 1310 else if (pgaddr == dd->ipath_pioavailregs_phys) 1311 /* in-memory copy of pioavail registers */ 1312 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, 1313 (void *) dd->ipath_pioavailregs_dma, 1314 "pioavail registers"); 1315 else if (pgaddr == pd->port_rcvegr_phys) 1316 ret = mmap_rcvegrbufs(vma, pd); 1317 else if (pgaddr == (u64) pd->port_rcvhdrq_phys) 1318 /* 1319 * The rcvhdrq itself; readonly except on HT (so have 1320 * to allow writable mapping), multiple pages, contiguous 1321 * from an i/o perspective. 1322 */ 1323 ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1, 1324 pd->port_rcvhdrq, 1325 "rcvhdrq"); 1326 else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys) 1327 /* in-memory copy of rcvhdrq tail register */ 1328 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, 1329 pd->port_rcvhdrtail_kvaddr, 1330 "rcvhdrq tail"); 1331 else 1332 ret = -EINVAL; 1333 1334 vma->vm_private_data = NULL; 1335 1336 if (ret < 0) 1337 dev_info(&dd->pcidev->dev, 1338 "Failure %d on off %llx len %lx\n", 1339 -ret, (unsigned long long)pgaddr, 1340 vma->vm_end - vma->vm_start); 1341bail: 1342 return ret; 1343} 1344 1345static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd) 1346{ 1347 unsigned pollflag = 0; 1348 1349 if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) && 1350 pd->port_hdrqfull != pd->port_hdrqfull_poll) { 1351 pollflag |= POLLIN | POLLRDNORM; 1352 pd->port_hdrqfull_poll = pd->port_hdrqfull; 1353 } 1354 1355 return pollflag; 1356} 1357 1358static unsigned int ipath_poll_urgent(struct ipath_portdata *pd, 1359 struct file *fp, 1360 struct poll_table_struct *pt) 1361{ 1362 unsigned pollflag = 0; 1363 struct ipath_devdata *dd; 1364 1365 dd = pd->port_dd; 1366 1367 /* variable access in ipath_poll_hdrqfull() needs this */ 1368 rmb(); 1369 pollflag = ipath_poll_hdrqfull(pd); 1370 1371 if (pd->port_urgent != pd->port_urgent_poll) { 1372 pollflag |= POLLIN | POLLRDNORM; 1373 pd->port_urgent_poll = pd->port_urgent; 1374 } 1375 1376 if (!pollflag) { 1377 /* this saves a spin_lock/unlock in interrupt handler... */ 1378 set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag); 1379 /* flush waiting flag so don't miss an event... */ 1380 wmb(); 1381 poll_wait(fp, &pd->port_wait, pt); 1382 } 1383 1384 return pollflag; 1385} 1386 1387static unsigned int ipath_poll_next(struct ipath_portdata *pd, 1388 struct file *fp, 1389 struct poll_table_struct *pt) 1390{ 1391 u32 head; 1392 u32 tail; 1393 unsigned pollflag = 0; 1394 struct ipath_devdata *dd; 1395 1396 dd = pd->port_dd; 1397 1398 /* variable access in ipath_poll_hdrqfull() needs this */ 1399 rmb(); 1400 pollflag = ipath_poll_hdrqfull(pd); 1401 1402 head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); 1403 if (pd->port_rcvhdrtail_kvaddr) 1404 tail = ipath_get_rcvhdrtail(pd); 1405 else 1406 tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); 1407 1408 if (head != tail) 1409 pollflag |= POLLIN | POLLRDNORM; 1410 else { 1411 /* this saves a spin_lock/unlock in interrupt handler */ 1412 set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); 1413 /* flush waiting flag so we don't miss an event */ 1414 wmb(); 1415 1416 set_bit(pd->port_port + dd->ipath_r_intravail_shift, 1417 &dd->ipath_rcvctrl); 1418 1419 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1420 dd->ipath_rcvctrl); 1421 1422 if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ 1423 ipath_write_ureg(dd, ur_rcvhdrhead, 1424 dd->ipath_rhdrhead_intr_off | head, 1425 pd->port_port); 1426 1427 poll_wait(fp, &pd->port_wait, pt); 1428 } 1429 1430 return pollflag; 1431} 1432 1433static unsigned int ipath_poll(struct file *fp, 1434 struct poll_table_struct *pt) 1435{ 1436 struct ipath_portdata *pd; 1437 unsigned pollflag; 1438 1439 pd = port_fp(fp); 1440 if (!pd) 1441 pollflag = 0; 1442 else if (pd->poll_type & IPATH_POLL_TYPE_URGENT) 1443 pollflag = ipath_poll_urgent(pd, fp, pt); 1444 else 1445 pollflag = ipath_poll_next(pd, fp, pt); 1446 1447 return pollflag; 1448} 1449 1450static int ipath_supports_subports(int user_swmajor, int user_swminor) 1451{ 1452 /* no subport implementation prior to software version 1.3 */ 1453 return (user_swmajor > 1) || (user_swminor >= 3); 1454} 1455 1456static int ipath_compatible_subports(int user_swmajor, int user_swminor) 1457{ 1458 /* this code is written long-hand for clarity */ 1459 if (IPATH_USER_SWMAJOR != user_swmajor) { 1460 /* no promise of compatibility if major mismatch */ 1461 return 0; 1462 } 1463 if (IPATH_USER_SWMAJOR == 1) { 1464 switch (IPATH_USER_SWMINOR) { 1465 case 0: 1466 case 1: 1467 case 2: 1468 /* no subport implementation so cannot be compatible */ 1469 return 0; 1470 case 3: 1471 /* 3 is only compatible with itself */ 1472 return user_swminor == 3; 1473 default: 1474 /* >= 4 are compatible (or are expected to be) */ 1475 return user_swminor >= 4; 1476 } 1477 } 1478 /* make no promises yet for future major versions */ 1479 return 0; 1480} 1481 1482static int init_subports(struct ipath_devdata *dd, 1483 struct ipath_portdata *pd, 1484 const struct ipath_user_info *uinfo) 1485{ 1486 int ret = 0; 1487 unsigned num_subports; 1488 size_t size; 1489 1490 /* 1491 * If the user is requesting zero subports, 1492 * skip the subport allocation. 1493 */ 1494 if (uinfo->spu_subport_cnt <= 0) 1495 goto bail; 1496 1497 /* Self-consistency check for ipath_compatible_subports() */ 1498 if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) && 1499 !ipath_compatible_subports(IPATH_USER_SWMAJOR, 1500 IPATH_USER_SWMINOR)) { 1501 dev_info(&dd->pcidev->dev, 1502 "Inconsistent ipath_compatible_subports()\n"); 1503 goto bail; 1504 } 1505 1506 /* Check for subport compatibility */ 1507 if (!ipath_compatible_subports(uinfo->spu_userversion >> 16, 1508 uinfo->spu_userversion & 0xffff)) { 1509 dev_info(&dd->pcidev->dev, 1510 "Mismatched user version (%d.%d) and driver " 1511 "version (%d.%d) while port sharing. Ensure " 1512 "that driver and library are from the same " 1513 "release.\n", 1514 (int) (uinfo->spu_userversion >> 16), 1515 (int) (uinfo->spu_userversion & 0xffff), 1516 IPATH_USER_SWMAJOR, 1517 IPATH_USER_SWMINOR); 1518 goto bail; 1519 } 1520 if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) { 1521 ret = -EINVAL; 1522 goto bail; 1523 } 1524 1525 num_subports = uinfo->spu_subport_cnt; 1526 pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports); 1527 if (!pd->subport_uregbase) { 1528 ret = -ENOMEM; 1529 goto bail; 1530 } 1531 /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ 1532 size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * 1533 sizeof(u32), PAGE_SIZE) * num_subports; 1534 pd->subport_rcvhdr_base = vmalloc(size); 1535 if (!pd->subport_rcvhdr_base) { 1536 ret = -ENOMEM; 1537 goto bail_ureg; 1538 } 1539 1540 pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks * 1541 pd->port_rcvegrbuf_size * 1542 num_subports); 1543 if (!pd->subport_rcvegrbuf) { 1544 ret = -ENOMEM; 1545 goto bail_rhdr; 1546 } 1547 1548 pd->port_subport_cnt = uinfo->spu_subport_cnt; 1549 pd->port_subport_id = uinfo->spu_subport_id; 1550 pd->active_slaves = 1; 1551 set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); 1552 memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports); 1553 memset(pd->subport_rcvhdr_base, 0, size); 1554 memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks * 1555 pd->port_rcvegrbuf_size * 1556 num_subports); 1557 goto bail; 1558 1559bail_rhdr: 1560 vfree(pd->subport_rcvhdr_base); 1561bail_ureg: 1562 vfree(pd->subport_uregbase); 1563 pd->subport_uregbase = NULL; 1564bail: 1565 return ret; 1566} 1567 1568static int try_alloc_port(struct ipath_devdata *dd, int port, 1569 struct file *fp, 1570 const struct ipath_user_info *uinfo) 1571{ 1572 struct ipath_portdata *pd; 1573 int ret; 1574 1575 if (!(pd = dd->ipath_pd[port])) { 1576 void *ptmp; 1577 1578 pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL); 1579 1580 /* 1581 * Allocate memory for use in ipath_tid_update() just once 1582 * at open, not per call. Reduces cost of expected send 1583 * setup. 1584 */ 1585 ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) + 1586 dd->ipath_rcvtidcnt * sizeof(struct page **), 1587 GFP_KERNEL); 1588 if (!pd || !ptmp) { 1589 ipath_dev_err(dd, "Unable to allocate portdata " 1590 "memory, failing open\n"); 1591 ret = -ENOMEM; 1592 kfree(pd); 1593 kfree(ptmp); 1594 goto bail; 1595 } 1596 dd->ipath_pd[port] = pd; 1597 dd->ipath_pd[port]->port_port = port; 1598 dd->ipath_pd[port]->port_dd = dd; 1599 dd->ipath_pd[port]->port_tid_pg_list = ptmp; 1600 init_waitqueue_head(&dd->ipath_pd[port]->port_wait); 1601 } 1602 if (!pd->port_cnt) { 1603 pd->userversion = uinfo->spu_userversion; 1604 init_user_egr_sizes(pd); 1605 if ((ret = init_subports(dd, pd, uinfo)) != 0) 1606 goto bail; 1607 ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n", 1608 current->comm, current->pid, dd->ipath_unit, 1609 port); 1610 pd->port_cnt = 1; 1611 port_fp(fp) = pd; 1612 pd->port_pid = get_pid(task_pid(current)); 1613 strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); 1614 ipath_stats.sps_ports++; 1615 ret = 0; 1616 } else 1617 ret = -EBUSY; 1618 1619bail: 1620 return ret; 1621} 1622 1623static inline int usable(struct ipath_devdata *dd) 1624{ 1625 return dd && 1626 (dd->ipath_flags & IPATH_PRESENT) && 1627 dd->ipath_kregbase && 1628 dd->ipath_lid && 1629 !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED 1630 | IPATH_LINKUNK)); 1631} 1632 1633static int find_free_port(int unit, struct file *fp, 1634 const struct ipath_user_info *uinfo) 1635{ 1636 struct ipath_devdata *dd = ipath_lookup(unit); 1637 int ret, i; 1638 1639 if (!dd) { 1640 ret = -ENODEV; 1641 goto bail; 1642 } 1643 1644 if (!usable(dd)) { 1645 ret = -ENETDOWN; 1646 goto bail; 1647 } 1648 1649 for (i = 1; i < dd->ipath_cfgports; i++) { 1650 ret = try_alloc_port(dd, i, fp, uinfo); 1651 if (ret != -EBUSY) 1652 goto bail; 1653 } 1654 ret = -EBUSY; 1655 1656bail: 1657 return ret; 1658} 1659 1660static int find_best_unit(struct file *fp, 1661 const struct ipath_user_info *uinfo) 1662{ 1663 int ret = 0, i, prefunit = -1, devmax; 1664 int maxofallports, npresent, nup; 1665 int ndev; 1666 1667 devmax = ipath_count_units(&npresent, &nup, &maxofallports); 1668 1669 /* 1670 * This code is present to allow a knowledgeable person to 1671 * specify the layout of processes to processors before opening 1672 * this driver, and then we'll assign the process to the "closest" 1673 * InfiniPath chip to that processor (we assume reasonable connectivity, 1674 * for now). This code assumes that if affinity has been set 1675 * before this point, that at most one cpu is set; for now this 1676 * is reasonable. I check for both cpus_empty() and cpus_full(), 1677 * in case some kernel variant sets none of the bits when no 1678 * affinity is set. 2.6.11 and 12 kernels have all present 1679 * cpus set. Some day we'll have to fix it up further to handle 1680 * a cpu subset. This algorithm fails for two HT chips connected 1681 * in tunnel fashion. Eventually this needs real topology 1682 * information. There may be some issues with dual core numbering 1683 * as well. This needs more work prior to release. 1684 */ 1685 if (!cpus_empty(current->cpus_allowed) && 1686 !cpus_full(current->cpus_allowed)) { 1687 int ncpus = num_online_cpus(), curcpu = -1, nset = 0; 1688 for (i = 0; i < ncpus; i++) 1689 if (cpu_isset(i, current->cpus_allowed)) { 1690 ipath_cdbg(PROC, "%s[%u] affinity set for " 1691 "cpu %d/%d\n", current->comm, 1692 current->pid, i, ncpus); 1693 curcpu = i; 1694 nset++; 1695 } 1696 if (curcpu != -1 && nset != ncpus) { 1697 if (npresent) { 1698 prefunit = curcpu / (ncpus / npresent); 1699 ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, " 1700 "%d cpus/chip, select unit %d\n", 1701 current->comm, current->pid, 1702 npresent, ncpus, ncpus / npresent, 1703 prefunit); 1704 } 1705 } 1706 } 1707 1708 /* 1709 * user ports start at 1, kernel port is 0 1710 * For now, we do round-robin access across all chips 1711 */ 1712 1713 if (prefunit != -1) 1714 devmax = prefunit + 1; 1715recheck: 1716 for (i = 1; i < maxofallports; i++) { 1717 for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax; 1718 ndev++) { 1719 struct ipath_devdata *dd = ipath_lookup(ndev); 1720 1721 if (!usable(dd)) 1722 continue; /* can't use this unit */ 1723 if (i >= dd->ipath_cfgports) 1724 /* 1725 * Maxed out on users of this unit. Try 1726 * next. 1727 */ 1728 continue; 1729 ret = try_alloc_port(dd, i, fp, uinfo); 1730 if (!ret) 1731 goto done; 1732 } 1733 } 1734 1735 if (npresent) { 1736 if (nup == 0) { 1737 ret = -ENETDOWN; 1738 ipath_dbg("No ports available (none initialized " 1739 "and ready)\n"); 1740 } else { 1741 if (prefunit > 0) { 1742 /* if started above 0, retry from 0 */ 1743 ipath_cdbg(PROC, 1744 "%s[%u] no ports on prefunit " 1745 "%d, clear and re-check\n", 1746 current->comm, current->pid, 1747 prefunit); 1748 devmax = ipath_count_units(NULL, NULL, 1749 NULL); 1750 prefunit = -1; 1751 goto recheck; 1752 } 1753 ret = -EBUSY; 1754 ipath_dbg("No ports available\n"); 1755 } 1756 } else { 1757 ret = -ENXIO; 1758 ipath_dbg("No boards found\n"); 1759 } 1760 1761done: 1762 return ret; 1763} 1764 1765static int find_shared_port(struct file *fp, 1766 const struct ipath_user_info *uinfo) 1767{ 1768 int devmax, ndev, i; 1769 int ret = 0; 1770 1771 devmax = ipath_count_units(NULL, NULL, NULL); 1772 1773 for (ndev = 0; ndev < devmax; ndev++) { 1774 struct ipath_devdata *dd = ipath_lookup(ndev); 1775 1776 if (!usable(dd)) 1777 continue; 1778 for (i = 1; i < dd->ipath_cfgports; i++) { 1779 struct ipath_portdata *pd = dd->ipath_pd[i]; 1780 1781 /* Skip ports which are not yet open */ 1782 if (!pd || !pd->port_cnt) 1783 continue; 1784 /* Skip port if it doesn't match the requested one */ 1785 if (pd->port_subport_id != uinfo->spu_subport_id) 1786 continue; 1787 /* Verify the sharing process matches the master */ 1788 if (pd->port_subport_cnt != uinfo->spu_subport_cnt || 1789 pd->userversion != uinfo->spu_userversion || 1790 pd->port_cnt >= pd->port_subport_cnt) { 1791 ret = -EINVAL; 1792 goto done; 1793 } 1794 port_fp(fp) = pd; 1795 subport_fp(fp) = pd->port_cnt++; 1796 pd->port_subpid[subport_fp(fp)] = 1797 get_pid(task_pid(current)); 1798 tidcursor_fp(fp) = 0; 1799 pd->active_slaves |= 1 << subport_fp(fp); 1800 ipath_cdbg(PROC, 1801 "%s[%u] %u sharing %s[%u] unit:port %u:%u\n", 1802 current->comm, current->pid, 1803 subport_fp(fp), 1804 pd->port_comm, pid_nr(pd->port_pid), 1805 dd->ipath_unit, pd->port_port); 1806 ret = 1; 1807 goto done; 1808 } 1809 } 1810 1811done: 1812 return ret; 1813} 1814 1815/* No BKL needed here */ 1816static int ipath_open(struct inode *in, struct file *fp) 1817{ 1818 /* The real work is performed later in ipath_assign_port() */ 1819 fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL); 1820 return fp->private_data ? 0 : -ENOMEM; 1821} 1822 1823/* Get port early, so can set affinity prior to memory allocation */ 1824static int ipath_assign_port(struct file *fp, 1825 const struct ipath_user_info *uinfo) 1826{ 1827 int ret; 1828 int i_minor; 1829 unsigned swmajor, swminor; 1830 1831 /* Check to be sure we haven't already initialized this file */ 1832 if (port_fp(fp)) { 1833 ret = -EINVAL; 1834 goto done; 1835 } 1836 1837 /* for now, if major version is different, bail */ 1838 swmajor = uinfo->spu_userversion >> 16; 1839 if (swmajor != IPATH_USER_SWMAJOR) { 1840 ipath_dbg("User major version %d not same as driver " 1841 "major %d\n", uinfo->spu_userversion >> 16, 1842 IPATH_USER_SWMAJOR); 1843 ret = -ENODEV; 1844 goto done; 1845 } 1846 1847 swminor = uinfo->spu_userversion & 0xffff; 1848 if (swminor != IPATH_USER_SWMINOR) 1849 ipath_dbg("User minor version %d not same as driver " 1850 "minor %d\n", swminor, IPATH_USER_SWMINOR); 1851 1852 mutex_lock(&ipath_mutex); 1853 1854 if (ipath_compatible_subports(swmajor, swminor) && 1855 uinfo->spu_subport_cnt && 1856 (ret = find_shared_port(fp, uinfo))) { 1857 if (ret > 0) 1858 ret = 0; 1859 goto done_chk_sdma; 1860 } 1861 1862 i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE; 1863 ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", 1864 (long)fp->f_path.dentry->d_inode->i_rdev, i_minor); 1865 1866 if (i_minor) 1867 ret = find_free_port(i_minor - 1, fp, uinfo); 1868 else 1869 ret = find_best_unit(fp, uinfo); 1870 1871done_chk_sdma: 1872 if (!ret) { 1873 struct ipath_filedata *fd = fp->private_data; 1874 const struct ipath_portdata *pd = fd->pd; 1875 const struct ipath_devdata *dd = pd->port_dd; 1876 1877 fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev, 1878 dd->ipath_unit, 1879 pd->port_port, 1880 fd->subport); 1881 1882 if (!fd->pq) 1883 ret = -ENOMEM; 1884 } 1885 1886 mutex_unlock(&ipath_mutex); 1887 1888done: 1889 return ret; 1890} 1891 1892 1893static int ipath_do_user_init(struct file *fp, 1894 const struct ipath_user_info *uinfo) 1895{ 1896 int ret; 1897 struct ipath_portdata *pd = port_fp(fp); 1898 struct ipath_devdata *dd; 1899 u32 head32; 1900 1901 /* Subports don't need to initialize anything since master did it. */ 1902 if (subport_fp(fp)) { 1903 ret = wait_event_interruptible(pd->port_wait, 1904 !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag)); 1905 goto done; 1906 } 1907 1908 dd = pd->port_dd; 1909 1910 if (uinfo->spu_rcvhdrsize) { 1911 ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); 1912 if (ret) 1913 goto done; 1914 } 1915 1916 /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ 1917 1918 /* some ports may get extra buffers, calculate that here */ 1919 if (pd->port_port <= dd->ipath_ports_extrabuf) 1920 pd->port_piocnt = dd->ipath_pbufsport + 1; 1921 else 1922 pd->port_piocnt = dd->ipath_pbufsport; 1923 1924 /* for right now, kernel piobufs are at end, so port 1 is at 0 */ 1925 if (pd->port_port <= dd->ipath_ports_extrabuf) 1926 pd->port_pio_base = (dd->ipath_pbufsport + 1) 1927 * (pd->port_port - 1); 1928 else 1929 pd->port_pio_base = dd->ipath_ports_extrabuf + 1930 dd->ipath_pbufsport * (pd->port_port - 1); 1931 pd->port_piobufs = dd->ipath_piobufbase + 1932 pd->port_pio_base * dd->ipath_palign; 1933 ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u," 1934 " first pio %u\n", pd->port_port, pd->port_piobufs, 1935 pd->port_piocnt, pd->port_pio_base); 1936 ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0); 1937 1938 /* 1939 * Now allocate the rcvhdr Q and eager TIDs; skip the TID 1940 * array for time being. If pd->port_port > chip-supported, 1941 * we need to do extra stuff here to handle by handling overflow 1942 * through port 0, someday 1943 */ 1944 ret = ipath_create_rcvhdrq(dd, pd); 1945 if (!ret) 1946 ret = ipath_create_user_egr(pd); 1947 if (ret) 1948 goto done; 1949 1950 /* 1951 * set the eager head register for this port to the current values 1952 * of the tail pointers, since we don't know if they were 1953 * updated on last use of the port. 1954 */ 1955 head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); 1956 ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); 1957 pd->port_lastrcvhdrqtail = -1; 1958 ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", 1959 pd->port_port, head32); 1960 pd->port_tidcursor = 0; /* start at beginning after open */ 1961 1962 /* initialize poll variables... */ 1963 pd->port_urgent = 0; 1964 pd->port_urgent_poll = 0; 1965 pd->port_hdrqfull_poll = pd->port_hdrqfull; 1966 1967 /* 1968 * Now enable the port for receive. 1969 * For chips that are set to DMA the tail register to memory 1970 * when they change (and when the update bit transitions from 1971 * 0 to 1. So for those chips, we turn it off and then back on. 1972 * This will (very briefly) affect any other open ports, but the 1973 * duration is very short, and therefore isn't an issue. We 1974 * explictly set the in-memory tail copy to 0 beforehand, so we 1975 * don't have to wait to be sure the DMA update has happened 1976 * (chip resets head/tail to 0 on transition to enable). 1977 */ 1978 set_bit(dd->ipath_r_portenable_shift + pd->port_port, 1979 &dd->ipath_rcvctrl); 1980 if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) { 1981 if (pd->port_rcvhdrtail_kvaddr) 1982 ipath_clear_rcvhdrtail(pd); 1983 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1984 dd->ipath_rcvctrl & 1985 ~(1ULL << dd->ipath_r_tailupd_shift)); 1986 } 1987 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1988 dd->ipath_rcvctrl); 1989 /* Notify any waiting slaves */ 1990 if (pd->port_subport_cnt) { 1991 clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); 1992 wake_up(&pd->port_wait); 1993 } 1994done: 1995 return ret; 1996} 1997 1998/** 1999 * unlock_exptid - unlock any expected TID entries port still had in use 2000 * @pd: port 2001 * 2002 * We don't actually update the chip here, because we do a bulk update 2003 * below, using ipath_f_clear_tids. 2004 */ 2005static void unlock_expected_tids(struct ipath_portdata *pd) 2006{ 2007 struct ipath_devdata *dd = pd->port_dd; 2008 int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt; 2009 int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt; 2010 2011 ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n", 2012 pd->port_port); 2013 for (i = port_tidbase; i < maxtid; i++) { 2014 struct page *ps = dd->ipath_pageshadow[i]; 2015 2016 if (!ps) 2017 continue; 2018 2019 dd->ipath_pageshadow[i] = NULL; 2020 pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i], 2021 PAGE_SIZE, PCI_DMA_FROMDEVICE); 2022 ipath_release_user_pages_on_close(&ps, 1); 2023 cnt++; 2024 ipath_stats.sps_pageunlocks++; 2025 } 2026 if (cnt) 2027 ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n", 2028 pd->port_port, cnt); 2029 2030 if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks) 2031 ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n", 2032 (unsigned long long) ipath_stats.sps_pagelocks, 2033 (unsigned long long) 2034 ipath_stats.sps_pageunlocks); 2035} 2036 2037static int ipath_close(struct inode *in, struct file *fp) 2038{ 2039 int ret = 0; 2040 struct ipath_filedata *fd; 2041 struct ipath_portdata *pd; 2042 struct ipath_devdata *dd; 2043 unsigned port; 2044 2045 ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n", 2046 (long)in->i_rdev, fp->private_data); 2047 2048 mutex_lock(&ipath_mutex); 2049 2050 fd = (struct ipath_filedata *) fp->private_data; 2051 fp->private_data = NULL; 2052 pd = fd->pd; 2053 if (!pd) { 2054 mutex_unlock(&ipath_mutex); 2055 goto bail; 2056 } 2057 2058 dd = pd->port_dd; 2059 2060 /* drain user sdma queue */ 2061 ipath_user_sdma_queue_drain(dd, fd->pq); 2062 ipath_user_sdma_queue_destroy(fd->pq); 2063 2064 if (--pd->port_cnt) { 2065 /* 2066 * XXX If the master closes the port before the slave(s), 2067 * revoke the mmap for the eager receive queue so 2068 * the slave(s) don't wait for receive data forever. 2069 */ 2070 pd->active_slaves &= ~(1 << fd->subport); 2071 put_pid(pd->port_subpid[fd->subport]); 2072 pd->port_subpid[fd->subport] = NULL; 2073 mutex_unlock(&ipath_mutex); 2074 goto bail; 2075 } 2076 port = pd->port_port; 2077 2078 if (pd->port_hdrqfull) { 2079 ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " 2080 "during run\n", pd->port_comm, pid_nr(pd->port_pid), 2081 pd->port_hdrqfull); 2082 pd->port_hdrqfull = 0; 2083 } 2084 2085 if (pd->port_rcvwait_to || pd->port_piowait_to 2086 || pd->port_rcvnowait || pd->port_pionowait) { 2087 ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; " 2088 "%u rcv %u, pio already\n", 2089 pd->port_port, pd->port_rcvwait_to, 2090 pd->port_piowait_to, pd->port_rcvnowait, 2091 pd->port_pionowait); 2092 pd->port_rcvwait_to = pd->port_piowait_to = 2093 pd->port_rcvnowait = pd->port_pionowait = 0; 2094 } 2095 if (pd->port_flag) { 2096 ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n", 2097 pd->port_port, pd->port_flag); 2098 pd->port_flag = 0; 2099 } 2100 2101 if (dd->ipath_kregbase) { 2102 /* atomically clear receive enable port and intr avail. */ 2103 clear_bit(dd->ipath_r_portenable_shift + port, 2104 &dd->ipath_rcvctrl); 2105 clear_bit(pd->port_port + dd->ipath_r_intravail_shift, 2106 &dd->ipath_rcvctrl); 2107 ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl, 2108 dd->ipath_rcvctrl); 2109 /* and read back from chip to be sure that nothing 2110 * else is in flight when we do the rest */ 2111 (void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 2112 2113 /* clean up the pkeys for this port user */ 2114 ipath_clean_part_key(pd, dd); 2115 /* 2116 * be paranoid, and never write 0's to these, just use an 2117 * unused part of the port 0 tail page. Of course, 2118 * rcvhdraddr points to a large chunk of memory, so this 2119 * could still trash things, but at least it won't trash 2120 * page 0, and by disabling the port, it should stop "soon", 2121 * even if a packet or two is in already in flight after we 2122 * disabled the port. 2123 */ 2124 ipath_write_kreg_port(dd, 2125 dd->ipath_kregs->kr_rcvhdrtailaddr, port, 2126 dd->ipath_dummy_hdrq_phys); 2127 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, 2128 pd->port_port, dd->ipath_dummy_hdrq_phys); 2129 2130 ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt); 2131 ipath_chg_pioavailkernel(dd, pd->port_pio_base, 2132 pd->port_piocnt, 1); 2133 2134 dd->ipath_f_clear_tids(dd, pd->port_port); 2135 2136 if (dd->ipath_pageshadow) 2137 unlock_expected_tids(pd); 2138 ipath_stats.sps_ports--; 2139 ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", 2140 pd->port_comm, pid_nr(pd->port_pid), 2141 dd->ipath_unit, port); 2142 } 2143 2144 put_pid(pd->port_pid); 2145 pd->port_pid = NULL; 2146 dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */ 2147 mutex_unlock(&ipath_mutex); 2148 ipath_free_pddata(dd, pd); /* after releasing the mutex */ 2149 2150bail: 2151 kfree(fd); 2152 return ret; 2153} 2154 2155static int ipath_port_info(struct ipath_portdata *pd, u16 subport, 2156 struct ipath_port_info __user *uinfo) 2157{ 2158 struct ipath_port_info info; 2159 int nup; 2160 int ret; 2161 size_t sz; 2162 2163 (void) ipath_count_units(NULL, &nup, NULL); 2164 info.num_active = nup; 2165 info.unit = pd->port_dd->ipath_unit; 2166 info.port = pd->port_port; 2167 info.subport = subport; 2168 /* Don't return new fields if old library opened the port. */ 2169 if (ipath_supports_subports(pd->userversion >> 16, 2170 pd->userversion & 0xffff)) { 2171 /* Number of user ports available for this device. */ 2172 info.num_ports = pd->port_dd->ipath_cfgports - 1; 2173 info.num_subports = pd->port_subport_cnt; 2174 sz = sizeof(info); 2175 } else 2176 sz = sizeof(info) - 2 * sizeof(u16); 2177 2178 if (copy_to_user(uinfo, &info, sz)) { 2179 ret = -EFAULT; 2180 goto bail; 2181 } 2182 ret = 0; 2183 2184bail: 2185 return ret; 2186} 2187 2188static int ipath_get_slave_info(struct ipath_portdata *pd, 2189 void __user *slave_mask_addr) 2190{ 2191 int ret = 0; 2192 2193 if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32))) 2194 ret = -EFAULT; 2195 return ret; 2196} 2197 2198static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq, 2199 u32 __user *inflightp) 2200{ 2201 const u32 val = ipath_user_sdma_inflight_counter(pq); 2202 2203 if (put_user(val, inflightp)) 2204 return -EFAULT; 2205 2206 return 0; 2207} 2208 2209static int ipath_sdma_get_complete(struct ipath_devdata *dd, 2210 struct ipath_user_sdma_queue *pq, 2211 u32 __user *completep) 2212{ 2213 u32 val; 2214 int err; 2215 2216 err = ipath_user_sdma_make_progress(dd, pq); 2217 if (err < 0) 2218 return err; 2219 2220 val = ipath_user_sdma_complete_counter(pq); 2221 if (put_user(val, completep)) 2222 return -EFAULT; 2223 2224 return 0; 2225} 2226 2227static ssize_t ipath_write(struct file *fp, const char __user *data, 2228 size_t count, loff_t *off) 2229{ 2230 const struct ipath_cmd __user *ucmd; 2231 struct ipath_portdata *pd; 2232 const void __user *src; 2233 size_t consumed, copy; 2234 struct ipath_cmd cmd; 2235 ssize_t ret = 0; 2236 void *dest; 2237 2238 if (count < sizeof(cmd.type)) { 2239 ret = -EINVAL; 2240 goto bail; 2241 } 2242 2243 ucmd = (const struct ipath_cmd __user *) data; 2244 2245 if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) { 2246 ret = -EFAULT; 2247 goto bail; 2248 } 2249 2250 consumed = sizeof(cmd.type); 2251 2252 switch (cmd.type) { 2253 case IPATH_CMD_ASSIGN_PORT: 2254 case __IPATH_CMD_USER_INIT: 2255 case IPATH_CMD_USER_INIT: 2256 copy = sizeof(cmd.cmd.user_info); 2257 dest = &cmd.cmd.user_info; 2258 src = &ucmd->cmd.user_info; 2259 break; 2260 case IPATH_CMD_RECV_CTRL: 2261 copy = sizeof(cmd.cmd.recv_ctrl); 2262 dest = &cmd.cmd.recv_ctrl; 2263 src = &ucmd->cmd.recv_ctrl; 2264 break; 2265 case IPATH_CMD_PORT_INFO: 2266 copy = sizeof(cmd.cmd.port_info); 2267 dest = &cmd.cmd.port_info; 2268 src = &ucmd->cmd.port_info; 2269 break; 2270 case IPATH_CMD_TID_UPDATE: 2271 case IPATH_CMD_TID_FREE: 2272 copy = sizeof(cmd.cmd.tid_info); 2273 dest = &cmd.cmd.tid_info; 2274 src = &ucmd->cmd.tid_info; 2275 break; 2276 case IPATH_CMD_SET_PART_KEY: 2277 copy = sizeof(cmd.cmd.part_key); 2278 dest = &cmd.cmd.part_key; 2279 src = &ucmd->cmd.part_key; 2280 break; 2281 case __IPATH_CMD_SLAVE_INFO: 2282 copy = sizeof(cmd.cmd.slave_mask_addr); 2283 dest = &cmd.cmd.slave_mask_addr; 2284 src = &ucmd->cmd.slave_mask_addr; 2285 break; 2286 case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg 2287 copy = 0; 2288 src = NULL; 2289 dest = NULL; 2290 break; 2291 case IPATH_CMD_POLL_TYPE: 2292 copy = sizeof(cmd.cmd.poll_type); 2293 dest = &cmd.cmd.poll_type; 2294 src = &ucmd->cmd.poll_type; 2295 break; 2296 case IPATH_CMD_ARMLAUNCH_CTRL: 2297 copy = sizeof(cmd.cmd.armlaunch_ctrl); 2298 dest = &cmd.cmd.armlaunch_ctrl; 2299 src = &ucmd->cmd.armlaunch_ctrl; 2300 break; 2301 case IPATH_CMD_SDMA_INFLIGHT: 2302 copy = sizeof(cmd.cmd.sdma_inflight); 2303 dest = &cmd.cmd.sdma_inflight; 2304 src = &ucmd->cmd.sdma_inflight; 2305 break; 2306 case IPATH_CMD_SDMA_COMPLETE: 2307 copy = sizeof(cmd.cmd.sdma_complete); 2308 dest = &cmd.cmd.sdma_complete; 2309 src = &ucmd->cmd.sdma_complete; 2310 break; 2311 default: 2312 ret = -EINVAL; 2313 goto bail; 2314 } 2315 2316 if (copy) { 2317 if ((count - consumed) < copy) { 2318 ret = -EINVAL; 2319 goto bail; 2320 } 2321 2322 if (copy_from_user(dest, src, copy)) { 2323 ret = -EFAULT; 2324 goto bail; 2325 } 2326 2327 consumed += copy; 2328 } 2329 2330 pd = port_fp(fp); 2331 if (!pd && cmd.type != __IPATH_CMD_USER_INIT && 2332 cmd.type != IPATH_CMD_ASSIGN_PORT) { 2333 ret = -EINVAL; 2334 goto bail; 2335 } 2336 2337 switch (cmd.type) { 2338 case IPATH_CMD_ASSIGN_PORT: 2339 ret = ipath_assign_port(fp, &cmd.cmd.user_info); 2340 if (ret) 2341 goto bail; 2342 break; 2343 case __IPATH_CMD_USER_INIT: 2344 /* backwards compatibility, get port first */ 2345 ret = ipath_assign_port(fp, &cmd.cmd.user_info); 2346 if (ret) 2347 goto bail; 2348 /* and fall through to current version. */ 2349 case IPATH_CMD_USER_INIT: 2350 ret = ipath_do_user_init(fp, &cmd.cmd.user_info); 2351 if (ret) 2352 goto bail; 2353 ret = ipath_get_base_info( 2354 fp, (void __user *) (unsigned long) 2355 cmd.cmd.user_info.spu_base_info, 2356 cmd.cmd.user_info.spu_base_info_size); 2357 break; 2358 case IPATH_CMD_RECV_CTRL: 2359 ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl); 2360 break; 2361 case IPATH_CMD_PORT_INFO: 2362 ret = ipath_port_info(pd, subport_fp(fp), 2363 (struct ipath_port_info __user *) 2364 (unsigned long) cmd.cmd.port_info); 2365 break; 2366 case IPATH_CMD_TID_UPDATE: 2367 ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info); 2368 break; 2369 case IPATH_CMD_TID_FREE: 2370 ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info); 2371 break; 2372 case IPATH_CMD_SET_PART_KEY: 2373 ret = ipath_set_part_key(pd, cmd.cmd.part_key); 2374 break; 2375 case __IPATH_CMD_SLAVE_INFO: 2376 ret = ipath_get_slave_info(pd, 2377 (void __user *) (unsigned long) 2378 cmd.cmd.slave_mask_addr); 2379 break; 2380 case IPATH_CMD_PIOAVAILUPD: 2381 ipath_force_pio_avail_update(pd->port_dd); 2382 break; 2383 case IPATH_CMD_POLL_TYPE: 2384 pd->poll_type = cmd.cmd.poll_type; 2385 break; 2386 case IPATH_CMD_ARMLAUNCH_CTRL: 2387 if (cmd.cmd.armlaunch_ctrl) 2388 ipath_enable_armlaunch(pd->port_dd); 2389 else 2390 ipath_disable_armlaunch(pd->port_dd); 2391 break; 2392 case IPATH_CMD_SDMA_INFLIGHT: 2393 ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp), 2394 (u32 __user *) (unsigned long) 2395 cmd.cmd.sdma_inflight); 2396 break; 2397 case IPATH_CMD_SDMA_COMPLETE: 2398 ret = ipath_sdma_get_complete(pd->port_dd, 2399 user_sdma_queue_fp(fp), 2400 (u32 __user *) (unsigned long) 2401 cmd.cmd.sdma_complete); 2402 break; 2403 } 2404 2405 if (ret >= 0) 2406 ret = consumed; 2407 2408bail: 2409 return ret; 2410} 2411 2412static ssize_t ipath_writev(struct kiocb *iocb, const struct iovec *iov, 2413 unsigned long dim, loff_t off) 2414{ 2415 struct file *filp = iocb->ki_filp; 2416 struct ipath_filedata *fp = filp->private_data; 2417 struct ipath_portdata *pd = port_fp(filp); 2418 struct ipath_user_sdma_queue *pq = fp->pq; 2419 2420 if (!dim) 2421 return -EINVAL; 2422 2423 return ipath_user_sdma_writev(pd->port_dd, pq, iov, dim); 2424} 2425 2426static struct class *ipath_class; 2427 2428static int init_cdev(int minor, char *name, const struct file_operations *fops, 2429 struct cdev **cdevp, struct device **devp) 2430{ 2431 const dev_t dev = MKDEV(IPATH_MAJOR, minor); 2432 struct cdev *cdev = NULL; 2433 struct device *device = NULL; 2434 int ret; 2435 2436 cdev = cdev_alloc(); 2437 if (!cdev) { 2438 printk(KERN_ERR IPATH_DRV_NAME 2439 ": Could not allocate cdev for minor %d, %s\n", 2440 minor, name); 2441 ret = -ENOMEM; 2442 goto done; 2443 } 2444 2445 cdev->owner = THIS_MODULE; 2446 cdev->ops = fops; 2447 kobject_set_name(&cdev->kobj, name); 2448 2449 ret = cdev_add(cdev, dev, 1); 2450 if (ret < 0) { 2451 printk(KERN_ERR IPATH_DRV_NAME 2452 ": Could not add cdev for minor %d, %s (err %d)\n", 2453 minor, name, -ret); 2454 goto err_cdev; 2455 } 2456 2457 device = device_create(ipath_class, NULL, dev, name); 2458 2459 if (IS_ERR(device)) { 2460 ret = PTR_ERR(device); 2461 printk(KERN_ERR IPATH_DRV_NAME ": Could not create " 2462 "device for minor %d, %s (err %d)\n", 2463 minor, name, -ret); 2464 goto err_cdev; 2465 } 2466 2467 goto done; 2468 2469err_cdev: 2470 cdev_del(cdev); 2471 cdev = NULL; 2472 2473done: 2474 if (ret >= 0) { 2475 *cdevp = cdev; 2476 *devp = device; 2477 } else { 2478 *cdevp = NULL; 2479 *devp = NULL; 2480 } 2481 2482 return ret; 2483} 2484 2485int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, 2486 struct cdev **cdevp, struct device **devp) 2487{ 2488 return init_cdev(minor, name, fops, cdevp, devp); 2489} 2490 2491static void cleanup_cdev(struct cdev **cdevp, 2492 struct device **devp) 2493{ 2494 struct device *dev = *devp; 2495 2496 if (dev) { 2497 device_unregister(dev); 2498 *devp = NULL; 2499 } 2500 2501 if (*cdevp) { 2502 cdev_del(*cdevp); 2503 *cdevp = NULL; 2504 } 2505} 2506 2507void ipath_cdev_cleanup(struct cdev **cdevp, 2508 struct device **devp) 2509{ 2510 cleanup_cdev(cdevp, devp); 2511} 2512 2513static struct cdev *wildcard_cdev; 2514static struct device *wildcard_dev; 2515 2516static const dev_t dev = MKDEV(IPATH_MAJOR, 0); 2517 2518static int user_init(void) 2519{ 2520 int ret; 2521 2522 ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME); 2523 if (ret < 0) { 2524 printk(KERN_ERR IPATH_DRV_NAME ": Could not register " 2525 "chrdev region (err %d)\n", -ret); 2526 goto done; 2527 } 2528 2529 ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME); 2530 2531 if (IS_ERR(ipath_class)) { 2532 ret = PTR_ERR(ipath_class); 2533 printk(KERN_ERR IPATH_DRV_NAME ": Could not create " 2534 "device class (err %d)\n", -ret); 2535 goto bail; 2536 } 2537 2538 goto done; 2539bail: 2540 unregister_chrdev_region(dev, IPATH_NMINORS); 2541done: 2542 return ret; 2543} 2544 2545static void user_cleanup(void) 2546{ 2547 if (ipath_class) { 2548 class_destroy(ipath_class); 2549 ipath_class = NULL; 2550 } 2551 2552 unregister_chrdev_region(dev, IPATH_NMINORS); 2553} 2554 2555static atomic_t user_count = ATOMIC_INIT(0); 2556static atomic_t user_setup = ATOMIC_INIT(0); 2557 2558int ipath_user_add(struct ipath_devdata *dd) 2559{ 2560 char name[10]; 2561 int ret; 2562 2563 if (atomic_inc_return(&user_count) == 1) { 2564 ret = user_init(); 2565 if (ret < 0) { 2566 ipath_dev_err(dd, "Unable to set up user support: " 2567 "error %d\n", -ret); 2568 goto bail; 2569 } 2570 ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev, 2571 &wildcard_dev); 2572 if (ret < 0) { 2573 ipath_dev_err(dd, "Could not create wildcard " 2574 "minor: error %d\n", -ret); 2575 goto bail_user; 2576 } 2577 2578 atomic_set(&user_setup, 1); 2579 } 2580 2581 snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit); 2582 2583 ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops, 2584 &dd->user_cdev, &dd->user_dev); 2585 if (ret < 0) 2586 ipath_dev_err(dd, "Could not create user minor %d, %s\n", 2587 dd->ipath_unit + 1, name); 2588 2589 goto bail; 2590 2591bail_user: 2592 user_cleanup(); 2593bail: 2594 return ret; 2595} 2596 2597void ipath_user_remove(struct ipath_devdata *dd) 2598{ 2599 cleanup_cdev(&dd->user_cdev, &dd->user_dev); 2600 2601 if (atomic_dec_return(&user_count) == 0) { 2602 if (atomic_read(&user_setup) == 0) 2603 goto bail; 2604 2605 cleanup_cdev(&wildcard_cdev, &wildcard_dev); 2606 user_cleanup(); 2607 2608 atomic_set(&user_setup, 0); 2609 } 2610bail: 2611 return; 2612} 2613