ipath_file_ops.c revision c7e29ff11f23ec78b3caf691789c2b791bb596bf
1/* 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <linux/pci.h> 35#include <linux/poll.h> 36#include <linux/cdev.h> 37#include <linux/swap.h> 38#include <linux/vmalloc.h> 39#include <asm/pgtable.h> 40 41#include "ipath_kernel.h" 42#include "ipath_common.h" 43 44static int ipath_open(struct inode *, struct file *); 45static int ipath_close(struct inode *, struct file *); 46static ssize_t ipath_write(struct file *, const char __user *, size_t, 47 loff_t *); 48static unsigned int ipath_poll(struct file *, struct poll_table_struct *); 49static int ipath_mmap(struct file *, struct vm_area_struct *); 50 51static const struct file_operations ipath_file_ops = { 52 .owner = THIS_MODULE, 53 .write = ipath_write, 54 .open = ipath_open, 55 .release = ipath_close, 56 .poll = ipath_poll, 57 .mmap = ipath_mmap 58}; 59 60/* 61 * Convert kernel virtual addresses to physical addresses so they don't 62 * potentially conflict with the chip addresses used as mmap offsets. 63 * It doesn't really matter what mmap offset we use as long as we can 64 * interpret it correctly. 65 */ 66static u64 cvt_kvaddr(void *p) 67{ 68 struct page *page; 69 u64 paddr = 0; 70 71 page = vmalloc_to_page(p); 72 if (page) 73 paddr = page_to_pfn(page) << PAGE_SHIFT; 74 75 return paddr; 76} 77 78static int ipath_get_base_info(struct file *fp, 79 void __user *ubase, size_t ubase_size) 80{ 81 struct ipath_portdata *pd = port_fp(fp); 82 int ret = 0; 83 struct ipath_base_info *kinfo = NULL; 84 struct ipath_devdata *dd = pd->port_dd; 85 unsigned subport_cnt; 86 int shared, master; 87 size_t sz; 88 89 subport_cnt = pd->port_subport_cnt; 90 if (!subport_cnt) { 91 shared = 0; 92 master = 0; 93 subport_cnt = 1; 94 } else { 95 shared = 1; 96 master = !subport_fp(fp); 97 } 98 99 sz = sizeof(*kinfo); 100 /* If port sharing is not requested, allow the old size structure */ 101 if (!shared) 102 sz -= 7 * sizeof(u64); 103 if (ubase_size < sz) { 104 ipath_cdbg(PROC, 105 "Base size %zu, need %zu (version mismatch?)\n", 106 ubase_size, sz); 107 ret = -EINVAL; 108 goto bail; 109 } 110 111 kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL); 112 if (kinfo == NULL) { 113 ret = -ENOMEM; 114 goto bail; 115 } 116 117 ret = dd->ipath_f_get_base_info(pd, kinfo); 118 if (ret < 0) 119 goto bail; 120 121 kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt; 122 kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize; 123 kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt; 124 kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize; 125 /* 126 * have to mmap whole thing 127 */ 128 kinfo->spi_rcv_egrbuftotlen = 129 pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; 130 kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk; 131 kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen / 132 pd->port_rcvegrbuf_chunks; 133 kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt; 134 if (master) 135 kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt; 136 /* 137 * for this use, may be ipath_cfgports summed over all chips that 138 * are are configured and present 139 */ 140 kinfo->spi_nports = dd->ipath_cfgports; 141 /* unit (chip/board) our port is on */ 142 kinfo->spi_unit = dd->ipath_unit; 143 /* for now, only a single page */ 144 kinfo->spi_tid_maxsize = PAGE_SIZE; 145 146 /* 147 * Doing this per port, and based on the skip value, etc. This has 148 * to be the actual buffer size, since the protocol code treats it 149 * as an array. 150 * 151 * These have to be set to user addresses in the user code via mmap. 152 * These values are used on return to user code for the mmap target 153 * addresses only. For 32 bit, same 44 bit address problem, so use 154 * the physical address, not virtual. Before 2.6.11, using the 155 * page_address() macro worked, but in 2.6.11, even that returns the 156 * full 64 bit address (upper bits all 1's). So far, using the 157 * physical addresses (or chip offsets, for chip mapping) works, but 158 * no doubt some future kernel release will change that, and we'll be 159 * on to yet another method of dealing with this. 160 */ 161 kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys; 162 kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys; 163 kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys; 164 kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys; 165 kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + 166 (void *) dd->ipath_statusp - 167 (void *) dd->ipath_pioavailregs_dma; 168 if (!shared) { 169 kinfo->spi_piocnt = dd->ipath_pbufsport; 170 kinfo->spi_piobufbase = (u64) pd->port_piobufs; 171 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + 172 dd->ipath_palign * pd->port_port; 173 } else if (master) { 174 kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) + 175 (dd->ipath_pbufsport % subport_cnt); 176 /* Master's PIO buffers are after all the slave's */ 177 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 178 dd->ipath_palign * 179 (dd->ipath_pbufsport - kinfo->spi_piocnt); 180 } else { 181 unsigned slave = subport_fp(fp) - 1; 182 183 kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; 184 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 185 dd->ipath_palign * kinfo->spi_piocnt * slave; 186 } 187 if (shared) { 188 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + 189 dd->ipath_palign * pd->port_port; 190 kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs; 191 kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base; 192 kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr; 193 194 kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase + 195 PAGE_SIZE * subport_fp(fp)); 196 197 kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base + 198 pd->port_rcvhdrq_size * subport_fp(fp)); 199 kinfo->spi_rcvhdr_tailaddr = 0; 200 kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf + 201 pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size * 202 subport_fp(fp)); 203 204 kinfo->spi_subport_uregbase = 205 cvt_kvaddr(pd->subport_uregbase); 206 kinfo->spi_subport_rcvegrbuf = 207 cvt_kvaddr(pd->subport_rcvegrbuf); 208 kinfo->spi_subport_rcvhdr_base = 209 cvt_kvaddr(pd->subport_rcvhdr_base); 210 ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n", 211 kinfo->spi_port, kinfo->spi_runtime_flags, 212 (unsigned long long) kinfo->spi_subport_uregbase, 213 (unsigned long long) kinfo->spi_subport_rcvegrbuf, 214 (unsigned long long) kinfo->spi_subport_rcvhdr_base); 215 } 216 217 kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / 218 dd->ipath_palign; 219 kinfo->spi_pioalign = dd->ipath_palign; 220 221 kinfo->spi_qpair = IPATH_KD_QP; 222 kinfo->spi_piosize = dd->ipath_ibmaxlen; 223 kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */ 224 kinfo->spi_port = pd->port_port; 225 kinfo->spi_subport = subport_fp(fp); 226 kinfo->spi_sw_version = IPATH_KERN_SWVERSION; 227 kinfo->spi_hw_version = dd->ipath_revision; 228 229 if (master) { 230 kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; 231 } 232 233 sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo); 234 if (copy_to_user(ubase, kinfo, sz)) 235 ret = -EFAULT; 236 237bail: 238 kfree(kinfo); 239 return ret; 240} 241 242/** 243 * ipath_tid_update - update a port TID 244 * @pd: the port 245 * @fp: the ipath device file 246 * @ti: the TID information 247 * 248 * The new implementation as of Oct 2004 is that the driver assigns 249 * the tid and returns it to the caller. To make it easier to 250 * catch bugs, and to reduce search time, we keep a cursor for 251 * each port, walking the shadow tid array to find one that's not 252 * in use. 253 * 254 * For now, if we can't allocate the full list, we fail, although 255 * in the long run, we'll allocate as many as we can, and the 256 * caller will deal with that by trying the remaining pages later. 257 * That means that when we fail, we have to mark the tids as not in 258 * use again, in our shadow copy. 259 * 260 * It's up to the caller to free the tids when they are done. 261 * We'll unlock the pages as they free them. 262 * 263 * Also, right now we are locking one page at a time, but since 264 * the intended use of this routine is for a single group of 265 * virtually contiguous pages, that should change to improve 266 * performance. 267 */ 268static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp, 269 const struct ipath_tid_info *ti) 270{ 271 int ret = 0, ntids; 272 u32 tid, porttid, cnt, i, tidcnt, tidoff; 273 u16 *tidlist; 274 struct ipath_devdata *dd = pd->port_dd; 275 u64 physaddr; 276 unsigned long vaddr; 277 u64 __iomem *tidbase; 278 unsigned long tidmap[8]; 279 struct page **pagep = NULL; 280 unsigned subport = subport_fp(fp); 281 282 if (!dd->ipath_pageshadow) { 283 ret = -ENOMEM; 284 goto done; 285 } 286 287 cnt = ti->tidcnt; 288 if (!cnt) { 289 ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n", 290 (unsigned long long) ti->tidlist); 291 /* 292 * Should we treat as success? likely a bug 293 */ 294 ret = -EFAULT; 295 goto done; 296 } 297 porttid = pd->port_port * dd->ipath_rcvtidcnt; 298 if (!pd->port_subport_cnt) { 299 tidcnt = dd->ipath_rcvtidcnt; 300 tid = pd->port_tidcursor; 301 tidoff = 0; 302 } else if (!subport) { 303 tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + 304 (dd->ipath_rcvtidcnt % pd->port_subport_cnt); 305 tidoff = dd->ipath_rcvtidcnt - tidcnt; 306 porttid += tidoff; 307 tid = tidcursor_fp(fp); 308 } else { 309 tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; 310 tidoff = tidcnt * (subport - 1); 311 porttid += tidoff; 312 tid = tidcursor_fp(fp); 313 } 314 if (cnt > tidcnt) { 315 /* make sure it all fits in port_tid_pg_list */ 316 dev_info(&dd->pcidev->dev, "Process tried to allocate %u " 317 "TIDs, only trying max (%u)\n", cnt, tidcnt); 318 cnt = tidcnt; 319 } 320 pagep = &((struct page **) pd->port_tid_pg_list)[tidoff]; 321 tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff]; 322 323 memset(tidmap, 0, sizeof(tidmap)); 324 /* before decrement; chip actual # */ 325 ntids = tidcnt; 326 tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) + 327 dd->ipath_rcvtidbase + 328 porttid * sizeof(*tidbase)); 329 330 ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n", 331 pd->port_port, cnt, tid, tidbase); 332 333 /* virtual address of first page in transfer */ 334 vaddr = ti->tidvaddr; 335 if (!access_ok(VERIFY_WRITE, (void __user *) vaddr, 336 cnt * PAGE_SIZE)) { 337 ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n", 338 (void *)vaddr, cnt); 339 ret = -EFAULT; 340 goto done; 341 } 342 ret = ipath_get_user_pages(vaddr, cnt, pagep); 343 if (ret) { 344 if (ret == -EBUSY) { 345 ipath_dbg("Failed to lock addr %p, %u pages " 346 "(already locked)\n", 347 (void *) vaddr, cnt); 348 /* 349 * for now, continue, and see what happens but with 350 * the new implementation, this should never happen, 351 * unless perhaps the user has mpin'ed the pages 352 * themselves (something we need to test) 353 */ 354 ret = 0; 355 } else { 356 dev_info(&dd->pcidev->dev, 357 "Failed to lock addr %p, %u pages: " 358 "errno %d\n", (void *) vaddr, cnt, -ret); 359 goto done; 360 } 361 } 362 for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) { 363 for (; ntids--; tid++) { 364 if (tid == tidcnt) 365 tid = 0; 366 if (!dd->ipath_pageshadow[porttid + tid]) 367 break; 368 } 369 if (ntids < 0) { 370 /* 371 * oops, wrapped all the way through their TIDs, 372 * and didn't have enough free; see comments at 373 * start of routine 374 */ 375 ipath_dbg("Not enough free TIDs for %u pages " 376 "(index %d), failing\n", cnt, i); 377 i--; /* last tidlist[i] not filled in */ 378 ret = -ENOMEM; 379 break; 380 } 381 tidlist[i] = tid + tidoff; 382 ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, " 383 "vaddr %lx\n", i, tid + tidoff, vaddr); 384 /* we "know" system pages and TID pages are same size */ 385 dd->ipath_pageshadow[porttid + tid] = pagep[i]; 386 dd->ipath_physshadow[porttid + tid] = ipath_map_page( 387 dd->pcidev, pagep[i], 0, PAGE_SIZE, 388 PCI_DMA_FROMDEVICE); 389 /* 390 * don't need atomic or it's overhead 391 */ 392 __set_bit(tid, tidmap); 393 physaddr = dd->ipath_physshadow[porttid + tid]; 394 ipath_stats.sps_pagelocks++; 395 ipath_cdbg(VERBOSE, 396 "TID %u, vaddr %lx, physaddr %llx pgp %p\n", 397 tid, vaddr, (unsigned long long) physaddr, 398 pagep[i]); 399 dd->ipath_f_put_tid(dd, &tidbase[tid], 1, physaddr); 400 /* 401 * don't check this tid in ipath_portshadow, since we 402 * just filled it in; start with the next one. 403 */ 404 tid++; 405 } 406 407 if (ret) { 408 u32 limit; 409 cleanup: 410 /* jump here if copy out of updated info failed... */ 411 ipath_dbg("After failure (ret=%d), undo %d of %d entries\n", 412 -ret, i, cnt); 413 /* same code that's in ipath_free_tid() */ 414 limit = sizeof(tidmap) * BITS_PER_BYTE; 415 if (limit > tidcnt) 416 /* just in case size changes in future */ 417 limit = tidcnt; 418 tid = find_first_bit((const unsigned long *)tidmap, limit); 419 for (; tid < limit; tid++) { 420 if (!test_bit(tid, tidmap)) 421 continue; 422 if (dd->ipath_pageshadow[porttid + tid]) { 423 ipath_cdbg(VERBOSE, "Freeing TID %u\n", 424 tid); 425 dd->ipath_f_put_tid(dd, &tidbase[tid], 1, 426 dd->ipath_tidinvalid); 427 pci_unmap_page(dd->pcidev, 428 dd->ipath_physshadow[porttid + tid], 429 PAGE_SIZE, PCI_DMA_FROMDEVICE); 430 dd->ipath_pageshadow[porttid + tid] = NULL; 431 ipath_stats.sps_pageunlocks++; 432 } 433 } 434 ipath_release_user_pages(pagep, cnt); 435 } else { 436 /* 437 * Copy the updated array, with ipath_tid's filled in, back 438 * to user. Since we did the copy in already, this "should 439 * never fail" If it does, we have to clean up... 440 */ 441 if (copy_to_user((void __user *) 442 (unsigned long) ti->tidlist, 443 tidlist, cnt * sizeof(*tidlist))) { 444 ret = -EFAULT; 445 goto cleanup; 446 } 447 if (copy_to_user((void __user *) (unsigned long) ti->tidmap, 448 tidmap, sizeof tidmap)) { 449 ret = -EFAULT; 450 goto cleanup; 451 } 452 if (tid == tidcnt) 453 tid = 0; 454 if (!pd->port_subport_cnt) 455 pd->port_tidcursor = tid; 456 else 457 tidcursor_fp(fp) = tid; 458 } 459 460done: 461 if (ret) 462 ipath_dbg("Failed to map %u TID pages, failing with %d\n", 463 ti->tidcnt, -ret); 464 return ret; 465} 466 467/** 468 * ipath_tid_free - free a port TID 469 * @pd: the port 470 * @subport: the subport 471 * @ti: the TID info 472 * 473 * right now we are unlocking one page at a time, but since 474 * the intended use of this routine is for a single group of 475 * virtually contiguous pages, that should change to improve 476 * performance. We check that the TID is in range for this port 477 * but otherwise don't check validity; if user has an error and 478 * frees the wrong tid, it's only their own data that can thereby 479 * be corrupted. We do check that the TID was in use, for sanity 480 * We always use our idea of the saved address, not the address that 481 * they pass in to us. 482 */ 483 484static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, 485 const struct ipath_tid_info *ti) 486{ 487 int ret = 0; 488 u32 tid, porttid, cnt, limit, tidcnt; 489 struct ipath_devdata *dd = pd->port_dd; 490 u64 __iomem *tidbase; 491 unsigned long tidmap[8]; 492 493 if (!dd->ipath_pageshadow) { 494 ret = -ENOMEM; 495 goto done; 496 } 497 498 if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap, 499 sizeof tidmap)) { 500 ret = -EFAULT; 501 goto done; 502 } 503 504 porttid = pd->port_port * dd->ipath_rcvtidcnt; 505 if (!pd->port_subport_cnt) 506 tidcnt = dd->ipath_rcvtidcnt; 507 else if (!subport) { 508 tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + 509 (dd->ipath_rcvtidcnt % pd->port_subport_cnt); 510 porttid += dd->ipath_rcvtidcnt - tidcnt; 511 } else { 512 tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; 513 porttid += tidcnt * (subport - 1); 514 } 515 tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + 516 dd->ipath_rcvtidbase + 517 porttid * sizeof(*tidbase)); 518 519 limit = sizeof(tidmap) * BITS_PER_BYTE; 520 if (limit > tidcnt) 521 /* just in case size changes in future */ 522 limit = tidcnt; 523 tid = find_first_bit(tidmap, limit); 524 ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) " 525 "set is %d, porttid %u\n", pd->port_port, ti->tidcnt, 526 limit, tid, porttid); 527 for (cnt = 0; tid < limit; tid++) { 528 /* 529 * small optimization; if we detect a run of 3 or so without 530 * any set, use find_first_bit again. That's mainly to 531 * accelerate the case where we wrapped, so we have some at 532 * the beginning, and some at the end, and a big gap 533 * in the middle. 534 */ 535 if (!test_bit(tid, tidmap)) 536 continue; 537 cnt++; 538 if (dd->ipath_pageshadow[porttid + tid]) { 539 ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", 540 pd->port_pid, tid); 541 dd->ipath_f_put_tid(dd, &tidbase[tid], 1, 542 dd->ipath_tidinvalid); 543 pci_unmap_page(dd->pcidev, 544 dd->ipath_physshadow[porttid + tid], 545 PAGE_SIZE, PCI_DMA_FROMDEVICE); 546 ipath_release_user_pages( 547 &dd->ipath_pageshadow[porttid + tid], 1); 548 dd->ipath_pageshadow[porttid + tid] = NULL; 549 ipath_stats.sps_pageunlocks++; 550 } else 551 ipath_dbg("Unused tid %u, ignoring\n", tid); 552 } 553 if (cnt != ti->tidcnt) 554 ipath_dbg("passed in tidcnt %d, only %d bits set in map\n", 555 ti->tidcnt, cnt); 556done: 557 if (ret) 558 ipath_dbg("Failed to unmap %u TID pages, failing with %d\n", 559 ti->tidcnt, -ret); 560 return ret; 561} 562 563/** 564 * ipath_set_part_key - set a partition key 565 * @pd: the port 566 * @key: the key 567 * 568 * We can have up to 4 active at a time (other than the default, which is 569 * always allowed). This is somewhat tricky, since multiple ports may set 570 * the same key, so we reference count them, and clean up at exit. All 4 571 * partition keys are packed into a single infinipath register. It's an 572 * error for a process to set the same pkey multiple times. We provide no 573 * mechanism to de-allocate a pkey at this time, we may eventually need to 574 * do that. I've used the atomic operations, and no locking, and only make 575 * a single pass through what's available. This should be more than 576 * adequate for some time. I'll think about spinlocks or the like if and as 577 * it's necessary. 578 */ 579static int ipath_set_part_key(struct ipath_portdata *pd, u16 key) 580{ 581 struct ipath_devdata *dd = pd->port_dd; 582 int i, any = 0, pidx = -1; 583 u16 lkey = key & 0x7FFF; 584 int ret; 585 586 if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) { 587 /* nothing to do; this key always valid */ 588 ret = 0; 589 goto bail; 590 } 591 592 ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys " 593 "%hx:%x %hx:%x %hx:%x %hx:%x\n", 594 pd->port_port, key, dd->ipath_pkeys[0], 595 atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1], 596 atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2], 597 atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3], 598 atomic_read(&dd->ipath_pkeyrefs[3])); 599 600 if (!lkey) { 601 ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n", 602 pd->port_port); 603 ret = -EINVAL; 604 goto bail; 605 } 606 607 /* 608 * Set the full membership bit, because it has to be 609 * set in the register or the packet, and it seems 610 * cleaner to set in the register than to force all 611 * callers to set it. (see bug 4331) 612 */ 613 key |= 0x8000; 614 615 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { 616 if (!pd->port_pkeys[i] && pidx == -1) 617 pidx = i; 618 if (pd->port_pkeys[i] == key) { 619 ipath_cdbg(VERBOSE, "p%u tries to set same pkey " 620 "(%x) more than once\n", 621 pd->port_port, key); 622 ret = -EEXIST; 623 goto bail; 624 } 625 } 626 if (pidx == -1) { 627 ipath_dbg("All pkeys for port %u already in use, " 628 "can't set %x\n", pd->port_port, key); 629 ret = -EBUSY; 630 goto bail; 631 } 632 for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { 633 if (!dd->ipath_pkeys[i]) { 634 any++; 635 continue; 636 } 637 if (dd->ipath_pkeys[i] == key) { 638 atomic_t *pkrefs = &dd->ipath_pkeyrefs[i]; 639 640 if (atomic_inc_return(pkrefs) > 1) { 641 pd->port_pkeys[pidx] = key; 642 ipath_cdbg(VERBOSE, "p%u set key %x " 643 "matches #%d, count now %d\n", 644 pd->port_port, key, i, 645 atomic_read(pkrefs)); 646 ret = 0; 647 goto bail; 648 } else { 649 /* 650 * lost race, decrement count, catch below 651 */ 652 atomic_dec(pkrefs); 653 ipath_cdbg(VERBOSE, "Lost race, count was " 654 "0, after dec, it's %d\n", 655 atomic_read(pkrefs)); 656 any++; 657 } 658 } 659 if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { 660 /* 661 * It makes no sense to have both the limited and 662 * full membership PKEY set at the same time since 663 * the unlimited one will disable the limited one. 664 */ 665 ret = -EEXIST; 666 goto bail; 667 } 668 } 669 if (!any) { 670 ipath_dbg("port %u, all pkeys already in use, " 671 "can't set %x\n", pd->port_port, key); 672 ret = -EBUSY; 673 goto bail; 674 } 675 for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { 676 if (!dd->ipath_pkeys[i] && 677 atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { 678 u64 pkey; 679 680 /* for ipathstats, etc. */ 681 ipath_stats.sps_pkeys[i] = lkey; 682 pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key; 683 pkey = 684 (u64) dd->ipath_pkeys[0] | 685 ((u64) dd->ipath_pkeys[1] << 16) | 686 ((u64) dd->ipath_pkeys[2] << 32) | 687 ((u64) dd->ipath_pkeys[3] << 48); 688 ipath_cdbg(PROC, "p%u set key %x in #%d, " 689 "portidx %d, new pkey reg %llx\n", 690 pd->port_port, key, i, pidx, 691 (unsigned long long) pkey); 692 ipath_write_kreg( 693 dd, dd->ipath_kregs->kr_partitionkey, pkey); 694 695 ret = 0; 696 goto bail; 697 } 698 } 699 ipath_dbg("port %u, all pkeys already in use 2nd pass, " 700 "can't set %x\n", pd->port_port, key); 701 ret = -EBUSY; 702 703bail: 704 return ret; 705} 706 707/** 708 * ipath_manage_rcvq - manage a port's receive queue 709 * @pd: the port 710 * @subport: the subport 711 * @start_stop: action to carry out 712 * 713 * start_stop == 0 disables receive on the port, for use in queue 714 * overflow conditions. start_stop==1 re-enables, to be used to 715 * re-init the software copy of the head register 716 */ 717static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, 718 int start_stop) 719{ 720 struct ipath_devdata *dd = pd->port_dd; 721 722 ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n", 723 start_stop ? "en" : "dis", dd->ipath_unit, 724 pd->port_port, subport); 725 if (subport) 726 goto bail; 727 /* atomically clear receive enable port. */ 728 if (start_stop) { 729 /* 730 * On enable, force in-memory copy of the tail register to 731 * 0, so that protocol code doesn't have to worry about 732 * whether or not the chip has yet updated the in-memory 733 * copy or not on return from the system call. The chip 734 * always resets it's tail register back to 0 on a 735 * transition from disabled to enabled. This could cause a 736 * problem if software was broken, and did the enable w/o 737 * the disable, but eventually the in-memory copy will be 738 * updated and correct itself, even in the face of software 739 * bugs. 740 */ 741 *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0; 742 set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, 743 &dd->ipath_rcvctrl); 744 } else 745 clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, 746 &dd->ipath_rcvctrl); 747 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 748 dd->ipath_rcvctrl); 749 /* now be sure chip saw it before we return */ 750 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 751 if (start_stop) { 752 /* 753 * And try to be sure that tail reg update has happened too. 754 * This should in theory interlock with the RXE changes to 755 * the tail register. Don't assign it to the tail register 756 * in memory copy, since we could overwrite an update by the 757 * chip if we did. 758 */ 759 ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); 760 } 761 /* always; new head should be equal to new tail; see above */ 762bail: 763 return 0; 764} 765 766static void ipath_clean_part_key(struct ipath_portdata *pd, 767 struct ipath_devdata *dd) 768{ 769 int i, j, pchanged = 0; 770 u64 oldpkey; 771 772 /* for debugging only */ 773 oldpkey = (u64) dd->ipath_pkeys[0] | 774 ((u64) dd->ipath_pkeys[1] << 16) | 775 ((u64) dd->ipath_pkeys[2] << 32) | 776 ((u64) dd->ipath_pkeys[3] << 48); 777 778 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { 779 if (!pd->port_pkeys[i]) 780 continue; 781 ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i, 782 pd->port_pkeys[i]); 783 for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) { 784 /* check for match independent of the global bit */ 785 if ((dd->ipath_pkeys[j] & 0x7fff) != 786 (pd->port_pkeys[i] & 0x7fff)) 787 continue; 788 if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) { 789 ipath_cdbg(VERBOSE, "p%u clear key " 790 "%x matches #%d\n", 791 pd->port_port, 792 pd->port_pkeys[i], j); 793 ipath_stats.sps_pkeys[j] = 794 dd->ipath_pkeys[j] = 0; 795 pchanged++; 796 } 797 else ipath_cdbg( 798 VERBOSE, "p%u key %x matches #%d, " 799 "but ref still %d\n", pd->port_port, 800 pd->port_pkeys[i], j, 801 atomic_read(&dd->ipath_pkeyrefs[j])); 802 break; 803 } 804 pd->port_pkeys[i] = 0; 805 } 806 if (pchanged) { 807 u64 pkey = (u64) dd->ipath_pkeys[0] | 808 ((u64) dd->ipath_pkeys[1] << 16) | 809 ((u64) dd->ipath_pkeys[2] << 32) | 810 ((u64) dd->ipath_pkeys[3] << 48); 811 ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, " 812 "new pkey reg %llx\n", pd->port_port, 813 (unsigned long long) oldpkey, 814 (unsigned long long) pkey); 815 ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, 816 pkey); 817 } 818} 819 820/* 821 * Initialize the port data with the receive buffer sizes 822 * so this can be done while the master port is locked. 823 * Otherwise, there is a race with a slave opening the port 824 * and seeing these fields uninitialized. 825 */ 826static void init_user_egr_sizes(struct ipath_portdata *pd) 827{ 828 struct ipath_devdata *dd = pd->port_dd; 829 unsigned egrperchunk, egrcnt, size; 830 831 /* 832 * to avoid wasting a lot of memory, we allocate 32KB chunks of 833 * physically contiguous memory, advance through it until used up 834 * and then allocate more. Of course, we need memory to store those 835 * extra pointers, now. Started out with 256KB, but under heavy 836 * memory pressure (creating large files and then copying them over 837 * NFS while doing lots of MPI jobs), we hit some allocation 838 * failures, even though we can sleep... (2.6.10) Still get 839 * failures at 64K. 32K is the lowest we can go without wasting 840 * additional memory. 841 */ 842 size = 0x8000; 843 egrperchunk = size / dd->ipath_rcvegrbufsize; 844 egrcnt = dd->ipath_rcvegrcnt; 845 pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk; 846 pd->port_rcvegrbufs_perchunk = egrperchunk; 847 pd->port_rcvegrbuf_size = size; 848} 849 850/** 851 * ipath_create_user_egr - allocate eager TID buffers 852 * @pd: the port to allocate TID buffers for 853 * 854 * This routine is now quite different for user and kernel, because 855 * the kernel uses skb's, for the accelerated network performance 856 * This is the user port version 857 * 858 * Allocate the eager TID buffers and program them into infinipath 859 * They are no longer completely contiguous, we do multiple allocation 860 * calls. 861 */ 862static int ipath_create_user_egr(struct ipath_portdata *pd) 863{ 864 struct ipath_devdata *dd = pd->port_dd; 865 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; 866 size_t size; 867 int ret; 868 gfp_t gfp_flags; 869 870 /* 871 * GFP_USER, but without GFP_FS, so buffer cache can be 872 * coalesced (we hope); otherwise, even at order 4, 873 * heavy filesystem activity makes these fail, and we can 874 * use compound pages. 875 */ 876 gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; 877 878 egrcnt = dd->ipath_rcvegrcnt; 879 /* TID number offset for this port */ 880 egroff = pd->port_port * egrcnt; 881 egrsize = dd->ipath_rcvegrbufsize; 882 ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " 883 "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); 884 885 chunk = pd->port_rcvegrbuf_chunks; 886 egrperchunk = pd->port_rcvegrbufs_perchunk; 887 size = pd->port_rcvegrbuf_size; 888 pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]), 889 GFP_KERNEL); 890 if (!pd->port_rcvegrbuf) { 891 ret = -ENOMEM; 892 goto bail; 893 } 894 pd->port_rcvegrbuf_phys = 895 kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]), 896 GFP_KERNEL); 897 if (!pd->port_rcvegrbuf_phys) { 898 ret = -ENOMEM; 899 goto bail_rcvegrbuf; 900 } 901 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { 902 903 pd->port_rcvegrbuf[e] = dma_alloc_coherent( 904 &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e], 905 gfp_flags); 906 907 if (!pd->port_rcvegrbuf[e]) { 908 ret = -ENOMEM; 909 goto bail_rcvegrbuf_phys; 910 } 911 } 912 913 pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0]; 914 915 for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) { 916 dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk]; 917 unsigned i; 918 919 for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { 920 dd->ipath_f_put_tid(dd, e + egroff + 921 (u64 __iomem *) 922 ((char __iomem *) 923 dd->ipath_kregbase + 924 dd->ipath_rcvegrbase), 0, pa); 925 pa += egrsize; 926 } 927 cond_resched(); /* don't hog the cpu */ 928 } 929 930 ret = 0; 931 goto bail; 932 933bail_rcvegrbuf_phys: 934 for (e = 0; e < pd->port_rcvegrbuf_chunks && 935 pd->port_rcvegrbuf[e]; e++) { 936 dma_free_coherent(&dd->pcidev->dev, size, 937 pd->port_rcvegrbuf[e], 938 pd->port_rcvegrbuf_phys[e]); 939 940 } 941 kfree(pd->port_rcvegrbuf_phys); 942 pd->port_rcvegrbuf_phys = NULL; 943bail_rcvegrbuf: 944 kfree(pd->port_rcvegrbuf); 945 pd->port_rcvegrbuf = NULL; 946bail: 947 return ret; 948} 949 950 951/* common code for the mappings on dma_alloc_coherent mem */ 952static int ipath_mmap_mem(struct vm_area_struct *vma, 953 struct ipath_portdata *pd, unsigned len, int write_ok, 954 void *kvaddr, char *what) 955{ 956 struct ipath_devdata *dd = pd->port_dd; 957 unsigned long pfn; 958 int ret; 959 960 if ((vma->vm_end - vma->vm_start) > len) { 961 dev_info(&dd->pcidev->dev, 962 "FAIL on %s: len %lx > %x\n", what, 963 vma->vm_end - vma->vm_start, len); 964 ret = -EFAULT; 965 goto bail; 966 } 967 968 if (!write_ok) { 969 if (vma->vm_flags & VM_WRITE) { 970 dev_info(&dd->pcidev->dev, 971 "%s must be mapped readonly\n", what); 972 ret = -EPERM; 973 goto bail; 974 } 975 976 /* don't allow them to later change with mprotect */ 977 vma->vm_flags &= ~VM_MAYWRITE; 978 } 979 980 pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT; 981 ret = remap_pfn_range(vma, vma->vm_start, pfn, 982 len, vma->vm_page_prot); 983 if (ret) 984 dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x " 985 "bytes r%c failed: %d\n", what, pd->port_port, 986 pfn, len, write_ok?'w':'o', ret); 987 else 988 ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes " 989 "r%c\n", what, pd->port_port, pfn, len, 990 write_ok?'w':'o'); 991bail: 992 return ret; 993} 994 995static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd, 996 u64 ureg) 997{ 998 unsigned long phys; 999 int ret; 1000 1001 /* 1002 * This is real hardware, so use io_remap. This is the mechanism 1003 * for the user process to update the head registers for their port 1004 * in the chip. 1005 */ 1006 if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { 1007 dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen " 1008 "%lx > PAGE\n", vma->vm_end - vma->vm_start); 1009 ret = -EFAULT; 1010 } else { 1011 phys = dd->ipath_physaddr + ureg; 1012 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1013 1014 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; 1015 ret = io_remap_pfn_range(vma, vma->vm_start, 1016 phys >> PAGE_SHIFT, 1017 vma->vm_end - vma->vm_start, 1018 vma->vm_page_prot); 1019 } 1020 return ret; 1021} 1022 1023static int mmap_piobufs(struct vm_area_struct *vma, 1024 struct ipath_devdata *dd, 1025 struct ipath_portdata *pd, 1026 unsigned piobufs, unsigned piocnt) 1027{ 1028 unsigned long phys; 1029 int ret; 1030 1031 /* 1032 * When we map the PIO buffers in the chip, we want to map them as 1033 * writeonly, no read possible. This prevents access to previous 1034 * process data, and catches users who might try to read the i/o 1035 * space due to a bug. 1036 */ 1037 if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) { 1038 dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: " 1039 "reqlen %lx > PAGE\n", 1040 vma->vm_end - vma->vm_start); 1041 ret = -EINVAL; 1042 goto bail; 1043 } 1044 1045 phys = dd->ipath_physaddr + piobufs; 1046 1047 /* 1048 * Don't mark this as non-cached, or we don't get the 1049 * write combining behavior we want on the PIO buffers! 1050 */ 1051 1052#if defined(__powerpc__) 1053 /* There isn't a generic way to specify writethrough mappings */ 1054 pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; 1055 pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU; 1056 pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED; 1057#endif 1058 1059 /* 1060 * don't allow them to later change to readable with mprotect (for when 1061 * not initially mapped readable, as is normally the case) 1062 */ 1063 vma->vm_flags &= ~VM_MAYREAD; 1064 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; 1065 1066 ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, 1067 vma->vm_end - vma->vm_start, 1068 vma->vm_page_prot); 1069bail: 1070 return ret; 1071} 1072 1073static int mmap_rcvegrbufs(struct vm_area_struct *vma, 1074 struct ipath_portdata *pd) 1075{ 1076 struct ipath_devdata *dd = pd->port_dd; 1077 unsigned long start, size; 1078 size_t total_size, i; 1079 unsigned long pfn; 1080 int ret; 1081 1082 size = pd->port_rcvegrbuf_size; 1083 total_size = pd->port_rcvegrbuf_chunks * size; 1084 if ((vma->vm_end - vma->vm_start) > total_size) { 1085 dev_info(&dd->pcidev->dev, "FAIL on egr bufs: " 1086 "reqlen %lx > actual %lx\n", 1087 vma->vm_end - vma->vm_start, 1088 (unsigned long) total_size); 1089 ret = -EINVAL; 1090 goto bail; 1091 } 1092 1093 if (vma->vm_flags & VM_WRITE) { 1094 dev_info(&dd->pcidev->dev, "Can't map eager buffers as " 1095 "writable (flags=%lx)\n", vma->vm_flags); 1096 ret = -EPERM; 1097 goto bail; 1098 } 1099 /* don't allow them to later change to writeable with mprotect */ 1100 vma->vm_flags &= ~VM_MAYWRITE; 1101 1102 start = vma->vm_start; 1103 1104 for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) { 1105 pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT; 1106 ret = remap_pfn_range(vma, start, pfn, size, 1107 vma->vm_page_prot); 1108 if (ret < 0) 1109 goto bail; 1110 } 1111 ret = 0; 1112 1113bail: 1114 return ret; 1115} 1116 1117/* 1118 * ipath_file_vma_nopage - handle a VMA page fault. 1119 */ 1120static struct page *ipath_file_vma_nopage(struct vm_area_struct *vma, 1121 unsigned long address, int *type) 1122{ 1123 unsigned long offset = address - vma->vm_start; 1124 struct page *page = NOPAGE_SIGBUS; 1125 void *pageptr; 1126 1127 /* 1128 * Convert the vmalloc address into a struct page. 1129 */ 1130 pageptr = (void *)(offset + (vma->vm_pgoff << PAGE_SHIFT)); 1131 page = vmalloc_to_page(pageptr); 1132 if (!page) 1133 goto out; 1134 1135 /* Increment the reference count. */ 1136 get_page(page); 1137 if (type) 1138 *type = VM_FAULT_MINOR; 1139out: 1140 return page; 1141} 1142 1143static struct vm_operations_struct ipath_file_vm_ops = { 1144 .nopage = ipath_file_vma_nopage, 1145}; 1146 1147static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, 1148 struct ipath_portdata *pd, unsigned subport) 1149{ 1150 unsigned long len; 1151 struct ipath_devdata *dd; 1152 void *addr; 1153 size_t size; 1154 int ret = 0; 1155 1156 /* If the port is not shared, all addresses should be physical */ 1157 if (!pd->port_subport_cnt) 1158 goto bail; 1159 1160 dd = pd->port_dd; 1161 size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; 1162 1163 /* 1164 * Each process has all the subport uregbase, rcvhdrq, and 1165 * rcvegrbufs mmapped - as an array for all the processes, 1166 * and also separately for this process. 1167 */ 1168 if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) { 1169 addr = pd->subport_uregbase; 1170 size = PAGE_SIZE * pd->port_subport_cnt; 1171 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) { 1172 addr = pd->subport_rcvhdr_base; 1173 size = pd->port_rcvhdrq_size * pd->port_subport_cnt; 1174 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) { 1175 addr = pd->subport_rcvegrbuf; 1176 size *= pd->port_subport_cnt; 1177 } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase + 1178 PAGE_SIZE * subport)) { 1179 addr = pd->subport_uregbase + PAGE_SIZE * subport; 1180 size = PAGE_SIZE; 1181 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base + 1182 pd->port_rcvhdrq_size * subport)) { 1183 addr = pd->subport_rcvhdr_base + 1184 pd->port_rcvhdrq_size * subport; 1185 size = pd->port_rcvhdrq_size; 1186 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf + 1187 size * subport)) { 1188 addr = pd->subport_rcvegrbuf + size * subport; 1189 /* rcvegrbufs are read-only on the slave */ 1190 if (vma->vm_flags & VM_WRITE) { 1191 dev_info(&dd->pcidev->dev, 1192 "Can't map eager buffers as " 1193 "writable (flags=%lx)\n", vma->vm_flags); 1194 ret = -EPERM; 1195 goto bail; 1196 } 1197 /* 1198 * Don't allow permission to later change to writeable 1199 * with mprotect. 1200 */ 1201 vma->vm_flags &= ~VM_MAYWRITE; 1202 } else { 1203 goto bail; 1204 } 1205 len = vma->vm_end - vma->vm_start; 1206 if (len > size) { 1207 ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size); 1208 ret = -EINVAL; 1209 goto bail; 1210 } 1211 1212 vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; 1213 vma->vm_ops = &ipath_file_vm_ops; 1214 vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; 1215 ret = 1; 1216 1217bail: 1218 return ret; 1219} 1220 1221/** 1222 * ipath_mmap - mmap various structures into user space 1223 * @fp: the file pointer 1224 * @vma: the VM area 1225 * 1226 * We use this to have a shared buffer between the kernel and the user code 1227 * for the rcvhdr queue, egr buffers, and the per-port user regs and pio 1228 * buffers in the chip. We have the open and close entries so we can bump 1229 * the ref count and keep the driver from being unloaded while still mapped. 1230 */ 1231static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) 1232{ 1233 struct ipath_portdata *pd; 1234 struct ipath_devdata *dd; 1235 u64 pgaddr, ureg; 1236 unsigned piobufs, piocnt; 1237 int ret; 1238 1239 pd = port_fp(fp); 1240 if (!pd) { 1241 ret = -EINVAL; 1242 goto bail; 1243 } 1244 dd = pd->port_dd; 1245 1246 /* 1247 * This is the ipath_do_user_init() code, mapping the shared buffers 1248 * into the user process. The address referred to by vm_pgoff is the 1249 * file offset passed via mmap(). For shared ports, this is the 1250 * kernel vmalloc() address of the pages to share with the master. 1251 * For non-shared or master ports, this is a physical address. 1252 * We only do one mmap for each space mapped. 1253 */ 1254 pgaddr = vma->vm_pgoff << PAGE_SHIFT; 1255 1256 /* 1257 * Check for 0 in case one of the allocations failed, but user 1258 * called mmap anyway. 1259 */ 1260 if (!pgaddr) { 1261 ret = -EINVAL; 1262 goto bail; 1263 } 1264 1265 ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n", 1266 (unsigned long long) pgaddr, vma->vm_start, 1267 vma->vm_end - vma->vm_start, dd->ipath_unit, 1268 pd->port_port, subport_fp(fp)); 1269 1270 /* 1271 * Physical addresses must fit in 40 bits for our hardware. 1272 * Check for kernel virtual addresses first, anything else must 1273 * match a HW or memory address. 1274 */ 1275 ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); 1276 if (ret) { 1277 if (ret > 0) 1278 ret = 0; 1279 goto bail; 1280 } 1281 1282 ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; 1283 if (!pd->port_subport_cnt) { 1284 /* port is not shared */ 1285 piocnt = dd->ipath_pbufsport; 1286 piobufs = pd->port_piobufs; 1287 } else if (!subport_fp(fp)) { 1288 /* caller is the master */ 1289 piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + 1290 (dd->ipath_pbufsport % pd->port_subport_cnt); 1291 piobufs = pd->port_piobufs + 1292 dd->ipath_palign * (dd->ipath_pbufsport - piocnt); 1293 } else { 1294 unsigned slave = subport_fp(fp) - 1; 1295 1296 /* caller is a slave */ 1297 piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; 1298 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; 1299 } 1300 1301 if (pgaddr == ureg) 1302 ret = mmap_ureg(vma, dd, ureg); 1303 else if (pgaddr == piobufs) 1304 ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt); 1305 else if (pgaddr == dd->ipath_pioavailregs_phys) 1306 /* in-memory copy of pioavail registers */ 1307 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, 1308 (void *) dd->ipath_pioavailregs_dma, 1309 "pioavail registers"); 1310 else if (pgaddr == pd->port_rcvegr_phys) 1311 ret = mmap_rcvegrbufs(vma, pd); 1312 else if (pgaddr == (u64) pd->port_rcvhdrq_phys) 1313 /* 1314 * The rcvhdrq itself; readonly except on HT (so have 1315 * to allow writable mapping), multiple pages, contiguous 1316 * from an i/o perspective. 1317 */ 1318 ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1, 1319 pd->port_rcvhdrq, 1320 "rcvhdrq"); 1321 else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys) 1322 /* in-memory copy of rcvhdrq tail register */ 1323 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, 1324 pd->port_rcvhdrtail_kvaddr, 1325 "rcvhdrq tail"); 1326 else 1327 ret = -EINVAL; 1328 1329 vma->vm_private_data = NULL; 1330 1331 if (ret < 0) 1332 dev_info(&dd->pcidev->dev, 1333 "Failure %d on off %llx len %lx\n", 1334 -ret, (unsigned long long)pgaddr, 1335 vma->vm_end - vma->vm_start); 1336bail: 1337 return ret; 1338} 1339 1340static unsigned int ipath_poll(struct file *fp, 1341 struct poll_table_struct *pt) 1342{ 1343 struct ipath_portdata *pd; 1344 u32 head, tail; 1345 int bit; 1346 unsigned pollflag = 0; 1347 struct ipath_devdata *dd; 1348 1349 pd = port_fp(fp); 1350 if (!pd) 1351 goto bail; 1352 dd = pd->port_dd; 1353 1354 bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT; 1355 set_bit(bit, &dd->ipath_rcvctrl); 1356 1357 /* 1358 * Before blocking, make sure that head is still == tail, 1359 * reading from the chip, so we can be sure the interrupt 1360 * enable has made it to the chip. If not equal, disable 1361 * interrupt again and return immediately. This avoids races, 1362 * and the overhead of the chip read doesn't matter much at 1363 * this point, since we are waiting for something anyway. 1364 */ 1365 1366 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1367 dd->ipath_rcvctrl); 1368 1369 head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); 1370 tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); 1371 1372 if (tail == head) { 1373 set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); 1374 if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ 1375 (void)ipath_write_ureg(dd, ur_rcvhdrhead, 1376 dd->ipath_rhdrhead_intr_off 1377 | head, pd->port_port); 1378 poll_wait(fp, &pd->port_wait, pt); 1379 1380 if (test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { 1381 /* timed out, no packets received */ 1382 clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); 1383 pd->port_rcvwait_to++; 1384 } 1385 else 1386 pollflag = POLLIN | POLLRDNORM; 1387 } 1388 else { 1389 /* it's already happened; don't do wait_event overhead */ 1390 pollflag = POLLIN | POLLRDNORM; 1391 pd->port_rcvnowait++; 1392 } 1393 1394 clear_bit(bit, &dd->ipath_rcvctrl); 1395 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1396 dd->ipath_rcvctrl); 1397 1398bail: 1399 return pollflag; 1400} 1401 1402static int init_subports(struct ipath_devdata *dd, 1403 struct ipath_portdata *pd, 1404 const struct ipath_user_info *uinfo) 1405{ 1406 int ret = 0; 1407 unsigned num_subports; 1408 size_t size; 1409 1410 /* 1411 * If the user is requesting zero or one port, 1412 * skip the subport allocation. 1413 */ 1414 if (uinfo->spu_subport_cnt <= 1) 1415 goto bail; 1416 1417 /* Old user binaries don't know about new subport implementation */ 1418 if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) { 1419 dev_info(&dd->pcidev->dev, 1420 "Mismatched user minor version (%d) and driver " 1421 "minor version (%d) while port sharing. Ensure " 1422 "that driver and library are from the same " 1423 "release.\n", 1424 (int) (uinfo->spu_userversion & 0xffff), 1425 IPATH_USER_SWMINOR); 1426 goto bail; 1427 } 1428 if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) { 1429 ret = -EINVAL; 1430 goto bail; 1431 } 1432 1433 num_subports = uinfo->spu_subport_cnt; 1434 pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports); 1435 if (!pd->subport_uregbase) { 1436 ret = -ENOMEM; 1437 goto bail; 1438 } 1439 /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ 1440 size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * 1441 sizeof(u32), PAGE_SIZE) * num_subports; 1442 pd->subport_rcvhdr_base = vmalloc(size); 1443 if (!pd->subport_rcvhdr_base) { 1444 ret = -ENOMEM; 1445 goto bail_ureg; 1446 } 1447 1448 pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks * 1449 pd->port_rcvegrbuf_size * 1450 num_subports); 1451 if (!pd->subport_rcvegrbuf) { 1452 ret = -ENOMEM; 1453 goto bail_rhdr; 1454 } 1455 1456 pd->port_subport_cnt = uinfo->spu_subport_cnt; 1457 pd->port_subport_id = uinfo->spu_subport_id; 1458 pd->active_slaves = 1; 1459 set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); 1460 memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports); 1461 memset(pd->subport_rcvhdr_base, 0, size); 1462 memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks * 1463 pd->port_rcvegrbuf_size * 1464 num_subports); 1465 goto bail; 1466 1467bail_rhdr: 1468 vfree(pd->subport_rcvhdr_base); 1469bail_ureg: 1470 vfree(pd->subport_uregbase); 1471 pd->subport_uregbase = NULL; 1472bail: 1473 return ret; 1474} 1475 1476static int try_alloc_port(struct ipath_devdata *dd, int port, 1477 struct file *fp, 1478 const struct ipath_user_info *uinfo) 1479{ 1480 struct ipath_portdata *pd; 1481 int ret; 1482 1483 if (!(pd = dd->ipath_pd[port])) { 1484 void *ptmp; 1485 1486 pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL); 1487 1488 /* 1489 * Allocate memory for use in ipath_tid_update() just once 1490 * at open, not per call. Reduces cost of expected send 1491 * setup. 1492 */ 1493 ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) + 1494 dd->ipath_rcvtidcnt * sizeof(struct page **), 1495 GFP_KERNEL); 1496 if (!pd || !ptmp) { 1497 ipath_dev_err(dd, "Unable to allocate portdata " 1498 "memory, failing open\n"); 1499 ret = -ENOMEM; 1500 kfree(pd); 1501 kfree(ptmp); 1502 goto bail; 1503 } 1504 dd->ipath_pd[port] = pd; 1505 dd->ipath_pd[port]->port_port = port; 1506 dd->ipath_pd[port]->port_dd = dd; 1507 dd->ipath_pd[port]->port_tid_pg_list = ptmp; 1508 init_waitqueue_head(&dd->ipath_pd[port]->port_wait); 1509 } 1510 if (!pd->port_cnt) { 1511 pd->userversion = uinfo->spu_userversion; 1512 init_user_egr_sizes(pd); 1513 if ((ret = init_subports(dd, pd, uinfo)) != 0) 1514 goto bail; 1515 ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n", 1516 current->comm, current->pid, dd->ipath_unit, 1517 port); 1518 pd->port_cnt = 1; 1519 port_fp(fp) = pd; 1520 pd->port_pid = current->pid; 1521 strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); 1522 ipath_stats.sps_ports++; 1523 ret = 0; 1524 } else 1525 ret = -EBUSY; 1526 1527bail: 1528 return ret; 1529} 1530 1531static inline int usable(struct ipath_devdata *dd) 1532{ 1533 return dd && 1534 (dd->ipath_flags & IPATH_PRESENT) && 1535 dd->ipath_kregbase && 1536 dd->ipath_lid && 1537 !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED 1538 | IPATH_LINKUNK)); 1539} 1540 1541static int find_free_port(int unit, struct file *fp, 1542 const struct ipath_user_info *uinfo) 1543{ 1544 struct ipath_devdata *dd = ipath_lookup(unit); 1545 int ret, i; 1546 1547 if (!dd) { 1548 ret = -ENODEV; 1549 goto bail; 1550 } 1551 1552 if (!usable(dd)) { 1553 ret = -ENETDOWN; 1554 goto bail; 1555 } 1556 1557 for (i = 1; i < dd->ipath_cfgports; i++) { 1558 ret = try_alloc_port(dd, i, fp, uinfo); 1559 if (ret != -EBUSY) 1560 goto bail; 1561 } 1562 ret = -EBUSY; 1563 1564bail: 1565 return ret; 1566} 1567 1568static int find_best_unit(struct file *fp, 1569 const struct ipath_user_info *uinfo) 1570{ 1571 int ret = 0, i, prefunit = -1, devmax; 1572 int maxofallports, npresent, nup; 1573 int ndev; 1574 1575 devmax = ipath_count_units(&npresent, &nup, &maxofallports); 1576 1577 /* 1578 * This code is present to allow a knowledgeable person to 1579 * specify the layout of processes to processors before opening 1580 * this driver, and then we'll assign the process to the "closest" 1581 * InfiniPath chip to that processor (we assume reasonable connectivity, 1582 * for now). This code assumes that if affinity has been set 1583 * before this point, that at most one cpu is set; for now this 1584 * is reasonable. I check for both cpus_empty() and cpus_full(), 1585 * in case some kernel variant sets none of the bits when no 1586 * affinity is set. 2.6.11 and 12 kernels have all present 1587 * cpus set. Some day we'll have to fix it up further to handle 1588 * a cpu subset. This algorithm fails for two HT chips connected 1589 * in tunnel fashion. Eventually this needs real topology 1590 * information. There may be some issues with dual core numbering 1591 * as well. This needs more work prior to release. 1592 */ 1593 if (!cpus_empty(current->cpus_allowed) && 1594 !cpus_full(current->cpus_allowed)) { 1595 int ncpus = num_online_cpus(), curcpu = -1; 1596 for (i = 0; i < ncpus; i++) 1597 if (cpu_isset(i, current->cpus_allowed)) { 1598 ipath_cdbg(PROC, "%s[%u] affinity set for " 1599 "cpu %d\n", current->comm, 1600 current->pid, i); 1601 curcpu = i; 1602 } 1603 if (curcpu != -1) { 1604 if (npresent) { 1605 prefunit = curcpu / (ncpus / npresent); 1606 ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, " 1607 "%d cpus/chip, select unit %d\n", 1608 current->comm, current->pid, 1609 npresent, ncpus, ncpus / npresent, 1610 prefunit); 1611 } 1612 } 1613 } 1614 1615 /* 1616 * user ports start at 1, kernel port is 0 1617 * For now, we do round-robin access across all chips 1618 */ 1619 1620 if (prefunit != -1) 1621 devmax = prefunit + 1; 1622recheck: 1623 for (i = 1; i < maxofallports; i++) { 1624 for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax; 1625 ndev++) { 1626 struct ipath_devdata *dd = ipath_lookup(ndev); 1627 1628 if (!usable(dd)) 1629 continue; /* can't use this unit */ 1630 if (i >= dd->ipath_cfgports) 1631 /* 1632 * Maxed out on users of this unit. Try 1633 * next. 1634 */ 1635 continue; 1636 ret = try_alloc_port(dd, i, fp, uinfo); 1637 if (!ret) 1638 goto done; 1639 } 1640 } 1641 1642 if (npresent) { 1643 if (nup == 0) { 1644 ret = -ENETDOWN; 1645 ipath_dbg("No ports available (none initialized " 1646 "and ready)\n"); 1647 } else { 1648 if (prefunit > 0) { 1649 /* if started above 0, retry from 0 */ 1650 ipath_cdbg(PROC, 1651 "%s[%u] no ports on prefunit " 1652 "%d, clear and re-check\n", 1653 current->comm, current->pid, 1654 prefunit); 1655 devmax = ipath_count_units(NULL, NULL, 1656 NULL); 1657 prefunit = -1; 1658 goto recheck; 1659 } 1660 ret = -EBUSY; 1661 ipath_dbg("No ports available\n"); 1662 } 1663 } else { 1664 ret = -ENXIO; 1665 ipath_dbg("No boards found\n"); 1666 } 1667 1668done: 1669 return ret; 1670} 1671 1672static int find_shared_port(struct file *fp, 1673 const struct ipath_user_info *uinfo) 1674{ 1675 int devmax, ndev, i; 1676 int ret = 0; 1677 1678 devmax = ipath_count_units(NULL, NULL, NULL); 1679 1680 for (ndev = 0; ndev < devmax; ndev++) { 1681 struct ipath_devdata *dd = ipath_lookup(ndev); 1682 1683 if (!dd) 1684 continue; 1685 for (i = 1; i < dd->ipath_cfgports; i++) { 1686 struct ipath_portdata *pd = dd->ipath_pd[i]; 1687 1688 /* Skip ports which are not yet open */ 1689 if (!pd || !pd->port_cnt) 1690 continue; 1691 /* Skip port if it doesn't match the requested one */ 1692 if (pd->port_subport_id != uinfo->spu_subport_id) 1693 continue; 1694 /* Verify the sharing process matches the master */ 1695 if (pd->port_subport_cnt != uinfo->spu_subport_cnt || 1696 pd->userversion != uinfo->spu_userversion || 1697 pd->port_cnt >= pd->port_subport_cnt) { 1698 ret = -EINVAL; 1699 goto done; 1700 } 1701 port_fp(fp) = pd; 1702 subport_fp(fp) = pd->port_cnt++; 1703 tidcursor_fp(fp) = 0; 1704 pd->active_slaves |= 1 << subport_fp(fp); 1705 ipath_cdbg(PROC, 1706 "%s[%u] %u sharing %s[%u] unit:port %u:%u\n", 1707 current->comm, current->pid, 1708 subport_fp(fp), 1709 pd->port_comm, pd->port_pid, 1710 dd->ipath_unit, pd->port_port); 1711 ret = 1; 1712 goto done; 1713 } 1714 } 1715 1716done: 1717 return ret; 1718} 1719 1720static int ipath_open(struct inode *in, struct file *fp) 1721{ 1722 /* The real work is performed later in ipath_assign_port() */ 1723 fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL); 1724 return fp->private_data ? 0 : -ENOMEM; 1725} 1726 1727 1728/* Get port early, so can set affinity prior to memory allocation */ 1729static int ipath_assign_port(struct file *fp, 1730 const struct ipath_user_info *uinfo) 1731{ 1732 int ret; 1733 int i_minor; 1734 unsigned swminor; 1735 1736 /* Check to be sure we haven't already initialized this file */ 1737 if (port_fp(fp)) { 1738 ret = -EINVAL; 1739 goto done; 1740 } 1741 1742 /* for now, if major version is different, bail */ 1743 if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { 1744 ipath_dbg("User major version %d not same as driver " 1745 "major %d\n", uinfo->spu_userversion >> 16, 1746 IPATH_USER_SWMAJOR); 1747 ret = -ENODEV; 1748 goto done; 1749 } 1750 1751 swminor = uinfo->spu_userversion & 0xffff; 1752 if (swminor != IPATH_USER_SWMINOR) 1753 ipath_dbg("User minor version %d not same as driver " 1754 "minor %d\n", swminor, IPATH_USER_SWMINOR); 1755 1756 mutex_lock(&ipath_mutex); 1757 1758 if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt && 1759 (ret = find_shared_port(fp, uinfo))) { 1760 mutex_unlock(&ipath_mutex); 1761 if (ret > 0) 1762 ret = 0; 1763 goto done; 1764 } 1765 1766 i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE; 1767 ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", 1768 (long)fp->f_path.dentry->d_inode->i_rdev, i_minor); 1769 1770 if (i_minor) 1771 ret = find_free_port(i_minor - 1, fp, uinfo); 1772 else 1773 ret = find_best_unit(fp, uinfo); 1774 1775 mutex_unlock(&ipath_mutex); 1776 1777done: 1778 return ret; 1779} 1780 1781 1782static int ipath_do_user_init(struct file *fp, 1783 const struct ipath_user_info *uinfo) 1784{ 1785 int ret; 1786 struct ipath_portdata *pd = port_fp(fp); 1787 struct ipath_devdata *dd; 1788 u32 head32; 1789 1790 /* Subports don't need to initialize anything since master did it. */ 1791 if (subport_fp(fp)) { 1792 ret = wait_event_interruptible(pd->port_wait, 1793 !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag)); 1794 goto done; 1795 } 1796 1797 dd = pd->port_dd; 1798 1799 if (uinfo->spu_rcvhdrsize) { 1800 ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); 1801 if (ret) 1802 goto done; 1803 } 1804 1805 /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ 1806 1807 /* for right now, kernel piobufs are at end, so port 1 is at 0 */ 1808 pd->port_piobufs = dd->ipath_piobufbase + 1809 dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign; 1810 ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", 1811 pd->port_port, pd->port_piobufs); 1812 1813 /* 1814 * Now allocate the rcvhdr Q and eager TIDs; skip the TID 1815 * array for time being. If pd->port_port > chip-supported, 1816 * we need to do extra stuff here to handle by handling overflow 1817 * through port 0, someday 1818 */ 1819 ret = ipath_create_rcvhdrq(dd, pd); 1820 if (!ret) 1821 ret = ipath_create_user_egr(pd); 1822 if (ret) 1823 goto done; 1824 1825 /* 1826 * set the eager head register for this port to the current values 1827 * of the tail pointers, since we don't know if they were 1828 * updated on last use of the port. 1829 */ 1830 head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); 1831 ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); 1832 dd->ipath_lastegrheads[pd->port_port] = -1; 1833 dd->ipath_lastrcvhdrqtails[pd->port_port] = -1; 1834 ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", 1835 pd->port_port, head32); 1836 pd->port_tidcursor = 0; /* start at beginning after open */ 1837 /* 1838 * now enable the port; the tail registers will be written to memory 1839 * by the chip as soon as it sees the write to 1840 * dd->ipath_kregs->kr_rcvctrl. The update only happens on 1841 * transition from 0 to 1, so clear it first, then set it as part of 1842 * enabling the port. This will (very briefly) affect any other 1843 * open ports, but it shouldn't be long enough to be an issue. 1844 * We explictly set the in-memory copy to 0 beforehand, so we don't 1845 * have to wait to be sure the DMA update has happened. 1846 */ 1847 *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0ULL; 1848 set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, 1849 &dd->ipath_rcvctrl); 1850 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1851 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); 1852 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1853 dd->ipath_rcvctrl); 1854 /* Notify any waiting slaves */ 1855 if (pd->port_subport_cnt) { 1856 clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); 1857 wake_up(&pd->port_wait); 1858 } 1859done: 1860 return ret; 1861} 1862 1863/** 1864 * unlock_exptid - unlock any expected TID entries port still had in use 1865 * @pd: port 1866 * 1867 * We don't actually update the chip here, because we do a bulk update 1868 * below, using ipath_f_clear_tids. 1869 */ 1870static void unlock_expected_tids(struct ipath_portdata *pd) 1871{ 1872 struct ipath_devdata *dd = pd->port_dd; 1873 int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt; 1874 int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt; 1875 1876 ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n", 1877 pd->port_port); 1878 for (i = port_tidbase; i < maxtid; i++) { 1879 if (!dd->ipath_pageshadow[i]) 1880 continue; 1881 1882 pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i], 1883 PAGE_SIZE, PCI_DMA_FROMDEVICE); 1884 ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i], 1885 1); 1886 dd->ipath_pageshadow[i] = NULL; 1887 cnt++; 1888 ipath_stats.sps_pageunlocks++; 1889 } 1890 if (cnt) 1891 ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n", 1892 pd->port_port, cnt); 1893 1894 if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks) 1895 ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n", 1896 (unsigned long long) ipath_stats.sps_pagelocks, 1897 (unsigned long long) 1898 ipath_stats.sps_pageunlocks); 1899} 1900 1901static int ipath_close(struct inode *in, struct file *fp) 1902{ 1903 int ret = 0; 1904 struct ipath_filedata *fd; 1905 struct ipath_portdata *pd; 1906 struct ipath_devdata *dd; 1907 unsigned port; 1908 1909 ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n", 1910 (long)in->i_rdev, fp->private_data); 1911 1912 mutex_lock(&ipath_mutex); 1913 1914 fd = (struct ipath_filedata *) fp->private_data; 1915 fp->private_data = NULL; 1916 pd = fd->pd; 1917 if (!pd) { 1918 mutex_unlock(&ipath_mutex); 1919 goto bail; 1920 } 1921 if (--pd->port_cnt) { 1922 /* 1923 * XXX If the master closes the port before the slave(s), 1924 * revoke the mmap for the eager receive queue so 1925 * the slave(s) don't wait for receive data forever. 1926 */ 1927 pd->active_slaves &= ~(1 << fd->subport); 1928 mutex_unlock(&ipath_mutex); 1929 goto bail; 1930 } 1931 port = pd->port_port; 1932 dd = pd->port_dd; 1933 1934 if (pd->port_hdrqfull) { 1935 ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " 1936 "during run\n", pd->port_comm, pd->port_pid, 1937 pd->port_hdrqfull); 1938 pd->port_hdrqfull = 0; 1939 } 1940 1941 if (pd->port_rcvwait_to || pd->port_piowait_to 1942 || pd->port_rcvnowait || pd->port_pionowait) { 1943 ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; " 1944 "%u rcv %u, pio already\n", 1945 pd->port_port, pd->port_rcvwait_to, 1946 pd->port_piowait_to, pd->port_rcvnowait, 1947 pd->port_pionowait); 1948 pd->port_rcvwait_to = pd->port_piowait_to = 1949 pd->port_rcvnowait = pd->port_pionowait = 0; 1950 } 1951 if (pd->port_flag) { 1952 ipath_dbg("port %u port_flag still set to 0x%lx\n", 1953 pd->port_port, pd->port_flag); 1954 pd->port_flag = 0; 1955 } 1956 1957 if (dd->ipath_kregbase) { 1958 int i; 1959 /* atomically clear receive enable port. */ 1960 clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + port, 1961 &dd->ipath_rcvctrl); 1962 ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl, 1963 dd->ipath_rcvctrl); 1964 /* and read back from chip to be sure that nothing 1965 * else is in flight when we do the rest */ 1966 (void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1967 1968 /* clean up the pkeys for this port user */ 1969 ipath_clean_part_key(pd, dd); 1970 /* 1971 * be paranoid, and never write 0's to these, just use an 1972 * unused part of the port 0 tail page. Of course, 1973 * rcvhdraddr points to a large chunk of memory, so this 1974 * could still trash things, but at least it won't trash 1975 * page 0, and by disabling the port, it should stop "soon", 1976 * even if a packet or two is in already in flight after we 1977 * disabled the port. 1978 */ 1979 ipath_write_kreg_port(dd, 1980 dd->ipath_kregs->kr_rcvhdrtailaddr, port, 1981 dd->ipath_dummy_hdrq_phys); 1982 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, 1983 pd->port_port, dd->ipath_dummy_hdrq_phys); 1984 1985 i = dd->ipath_pbufsport * (port - 1); 1986 ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport); 1987 1988 dd->ipath_f_clear_tids(dd, pd->port_port); 1989 1990 if (dd->ipath_pageshadow) 1991 unlock_expected_tids(pd); 1992 ipath_stats.sps_ports--; 1993 ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", 1994 pd->port_comm, pd->port_pid, 1995 dd->ipath_unit, port); 1996 } 1997 1998 pd->port_pid = 0; 1999 dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */ 2000 mutex_unlock(&ipath_mutex); 2001 ipath_free_pddata(dd, pd); /* after releasing the mutex */ 2002 2003bail: 2004 kfree(fd); 2005 return ret; 2006} 2007 2008static int ipath_port_info(struct ipath_portdata *pd, u16 subport, 2009 struct ipath_port_info __user *uinfo) 2010{ 2011 struct ipath_port_info info; 2012 int nup; 2013 int ret; 2014 size_t sz; 2015 2016 (void) ipath_count_units(NULL, &nup, NULL); 2017 info.num_active = nup; 2018 info.unit = pd->port_dd->ipath_unit; 2019 info.port = pd->port_port; 2020 info.subport = subport; 2021 /* Don't return new fields if old library opened the port. */ 2022 if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) { 2023 /* Number of user ports available for this device. */ 2024 info.num_ports = pd->port_dd->ipath_cfgports - 1; 2025 info.num_subports = pd->port_subport_cnt; 2026 sz = sizeof(info); 2027 } else 2028 sz = sizeof(info) - 2 * sizeof(u16); 2029 2030 if (copy_to_user(uinfo, &info, sz)) { 2031 ret = -EFAULT; 2032 goto bail; 2033 } 2034 ret = 0; 2035 2036bail: 2037 return ret; 2038} 2039 2040static int ipath_get_slave_info(struct ipath_portdata *pd, 2041 void __user *slave_mask_addr) 2042{ 2043 int ret = 0; 2044 2045 if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32))) 2046 ret = -EFAULT; 2047 return ret; 2048} 2049 2050static ssize_t ipath_write(struct file *fp, const char __user *data, 2051 size_t count, loff_t *off) 2052{ 2053 const struct ipath_cmd __user *ucmd; 2054 struct ipath_portdata *pd; 2055 const void __user *src; 2056 size_t consumed, copy; 2057 struct ipath_cmd cmd; 2058 ssize_t ret = 0; 2059 void *dest; 2060 2061 if (count < sizeof(cmd.type)) { 2062 ret = -EINVAL; 2063 goto bail; 2064 } 2065 2066 ucmd = (const struct ipath_cmd __user *) data; 2067 2068 if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) { 2069 ret = -EFAULT; 2070 goto bail; 2071 } 2072 2073 consumed = sizeof(cmd.type); 2074 2075 switch (cmd.type) { 2076 case IPATH_CMD_ASSIGN_PORT: 2077 case __IPATH_CMD_USER_INIT: 2078 case IPATH_CMD_USER_INIT: 2079 copy = sizeof(cmd.cmd.user_info); 2080 dest = &cmd.cmd.user_info; 2081 src = &ucmd->cmd.user_info; 2082 break; 2083 case IPATH_CMD_RECV_CTRL: 2084 copy = sizeof(cmd.cmd.recv_ctrl); 2085 dest = &cmd.cmd.recv_ctrl; 2086 src = &ucmd->cmd.recv_ctrl; 2087 break; 2088 case IPATH_CMD_PORT_INFO: 2089 copy = sizeof(cmd.cmd.port_info); 2090 dest = &cmd.cmd.port_info; 2091 src = &ucmd->cmd.port_info; 2092 break; 2093 case IPATH_CMD_TID_UPDATE: 2094 case IPATH_CMD_TID_FREE: 2095 copy = sizeof(cmd.cmd.tid_info); 2096 dest = &cmd.cmd.tid_info; 2097 src = &ucmd->cmd.tid_info; 2098 break; 2099 case IPATH_CMD_SET_PART_KEY: 2100 copy = sizeof(cmd.cmd.part_key); 2101 dest = &cmd.cmd.part_key; 2102 src = &ucmd->cmd.part_key; 2103 break; 2104 case __IPATH_CMD_SLAVE_INFO: 2105 copy = sizeof(cmd.cmd.slave_mask_addr); 2106 dest = &cmd.cmd.slave_mask_addr; 2107 src = &ucmd->cmd.slave_mask_addr; 2108 break; 2109 default: 2110 ret = -EINVAL; 2111 goto bail; 2112 } 2113 2114 if ((count - consumed) < copy) { 2115 ret = -EINVAL; 2116 goto bail; 2117 } 2118 2119 if (copy_from_user(dest, src, copy)) { 2120 ret = -EFAULT; 2121 goto bail; 2122 } 2123 2124 consumed += copy; 2125 pd = port_fp(fp); 2126 if (!pd && cmd.type != __IPATH_CMD_USER_INIT && 2127 cmd.type != IPATH_CMD_ASSIGN_PORT) { 2128 ret = -EINVAL; 2129 goto bail; 2130 } 2131 2132 switch (cmd.type) { 2133 case IPATH_CMD_ASSIGN_PORT: 2134 ret = ipath_assign_port(fp, &cmd.cmd.user_info); 2135 if (ret) 2136 goto bail; 2137 break; 2138 case __IPATH_CMD_USER_INIT: 2139 /* backwards compatibility, get port first */ 2140 ret = ipath_assign_port(fp, &cmd.cmd.user_info); 2141 if (ret) 2142 goto bail; 2143 /* and fall through to current version. */ 2144 case IPATH_CMD_USER_INIT: 2145 ret = ipath_do_user_init(fp, &cmd.cmd.user_info); 2146 if (ret) 2147 goto bail; 2148 ret = ipath_get_base_info( 2149 fp, (void __user *) (unsigned long) 2150 cmd.cmd.user_info.spu_base_info, 2151 cmd.cmd.user_info.spu_base_info_size); 2152 break; 2153 case IPATH_CMD_RECV_CTRL: 2154 ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl); 2155 break; 2156 case IPATH_CMD_PORT_INFO: 2157 ret = ipath_port_info(pd, subport_fp(fp), 2158 (struct ipath_port_info __user *) 2159 (unsigned long) cmd.cmd.port_info); 2160 break; 2161 case IPATH_CMD_TID_UPDATE: 2162 ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info); 2163 break; 2164 case IPATH_CMD_TID_FREE: 2165 ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info); 2166 break; 2167 case IPATH_CMD_SET_PART_KEY: 2168 ret = ipath_set_part_key(pd, cmd.cmd.part_key); 2169 break; 2170 case __IPATH_CMD_SLAVE_INFO: 2171 ret = ipath_get_slave_info(pd, 2172 (void __user *) (unsigned long) 2173 cmd.cmd.slave_mask_addr); 2174 break; 2175 } 2176 2177 if (ret >= 0) 2178 ret = consumed; 2179 2180bail: 2181 return ret; 2182} 2183 2184static struct class *ipath_class; 2185 2186static int init_cdev(int minor, char *name, const struct file_operations *fops, 2187 struct cdev **cdevp, struct class_device **class_devp) 2188{ 2189 const dev_t dev = MKDEV(IPATH_MAJOR, minor); 2190 struct cdev *cdev = NULL; 2191 struct class_device *class_dev = NULL; 2192 int ret; 2193 2194 cdev = cdev_alloc(); 2195 if (!cdev) { 2196 printk(KERN_ERR IPATH_DRV_NAME 2197 ": Could not allocate cdev for minor %d, %s\n", 2198 minor, name); 2199 ret = -ENOMEM; 2200 goto done; 2201 } 2202 2203 cdev->owner = THIS_MODULE; 2204 cdev->ops = fops; 2205 kobject_set_name(&cdev->kobj, name); 2206 2207 ret = cdev_add(cdev, dev, 1); 2208 if (ret < 0) { 2209 printk(KERN_ERR IPATH_DRV_NAME 2210 ": Could not add cdev for minor %d, %s (err %d)\n", 2211 minor, name, -ret); 2212 goto err_cdev; 2213 } 2214 2215 class_dev = class_device_create(ipath_class, NULL, dev, NULL, name); 2216 2217 if (IS_ERR(class_dev)) { 2218 ret = PTR_ERR(class_dev); 2219 printk(KERN_ERR IPATH_DRV_NAME ": Could not create " 2220 "class_dev for minor %d, %s (err %d)\n", 2221 minor, name, -ret); 2222 goto err_cdev; 2223 } 2224 2225 goto done; 2226 2227err_cdev: 2228 cdev_del(cdev); 2229 cdev = NULL; 2230 2231done: 2232 if (ret >= 0) { 2233 *cdevp = cdev; 2234 *class_devp = class_dev; 2235 } else { 2236 *cdevp = NULL; 2237 *class_devp = NULL; 2238 } 2239 2240 return ret; 2241} 2242 2243int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, 2244 struct cdev **cdevp, struct class_device **class_devp) 2245{ 2246 return init_cdev(minor, name, fops, cdevp, class_devp); 2247} 2248 2249static void cleanup_cdev(struct cdev **cdevp, 2250 struct class_device **class_devp) 2251{ 2252 struct class_device *class_dev = *class_devp; 2253 2254 if (class_dev) { 2255 class_device_unregister(class_dev); 2256 *class_devp = NULL; 2257 } 2258 2259 if (*cdevp) { 2260 cdev_del(*cdevp); 2261 *cdevp = NULL; 2262 } 2263} 2264 2265void ipath_cdev_cleanup(struct cdev **cdevp, 2266 struct class_device **class_devp) 2267{ 2268 cleanup_cdev(cdevp, class_devp); 2269} 2270 2271static struct cdev *wildcard_cdev; 2272static struct class_device *wildcard_class_dev; 2273 2274static const dev_t dev = MKDEV(IPATH_MAJOR, 0); 2275 2276static int user_init(void) 2277{ 2278 int ret; 2279 2280 ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME); 2281 if (ret < 0) { 2282 printk(KERN_ERR IPATH_DRV_NAME ": Could not register " 2283 "chrdev region (err %d)\n", -ret); 2284 goto done; 2285 } 2286 2287 ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME); 2288 2289 if (IS_ERR(ipath_class)) { 2290 ret = PTR_ERR(ipath_class); 2291 printk(KERN_ERR IPATH_DRV_NAME ": Could not create " 2292 "device class (err %d)\n", -ret); 2293 goto bail; 2294 } 2295 2296 goto done; 2297bail: 2298 unregister_chrdev_region(dev, IPATH_NMINORS); 2299done: 2300 return ret; 2301} 2302 2303static void user_cleanup(void) 2304{ 2305 if (ipath_class) { 2306 class_destroy(ipath_class); 2307 ipath_class = NULL; 2308 } 2309 2310 unregister_chrdev_region(dev, IPATH_NMINORS); 2311} 2312 2313static atomic_t user_count = ATOMIC_INIT(0); 2314static atomic_t user_setup = ATOMIC_INIT(0); 2315 2316int ipath_user_add(struct ipath_devdata *dd) 2317{ 2318 char name[10]; 2319 int ret; 2320 2321 if (atomic_inc_return(&user_count) == 1) { 2322 ret = user_init(); 2323 if (ret < 0) { 2324 ipath_dev_err(dd, "Unable to set up user support: " 2325 "error %d\n", -ret); 2326 goto bail; 2327 } 2328 ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev, 2329 &wildcard_class_dev); 2330 if (ret < 0) { 2331 ipath_dev_err(dd, "Could not create wildcard " 2332 "minor: error %d\n", -ret); 2333 goto bail_user; 2334 } 2335 2336 atomic_set(&user_setup, 1); 2337 } 2338 2339 snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit); 2340 2341 ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops, 2342 &dd->user_cdev, &dd->user_class_dev); 2343 if (ret < 0) 2344 ipath_dev_err(dd, "Could not create user minor %d, %s\n", 2345 dd->ipath_unit + 1, name); 2346 2347 goto bail; 2348 2349bail_user: 2350 user_cleanup(); 2351bail: 2352 return ret; 2353} 2354 2355void ipath_user_remove(struct ipath_devdata *dd) 2356{ 2357 cleanup_cdev(&dd->user_cdev, &dd->user_class_dev); 2358 2359 if (atomic_dec_return(&user_count) == 0) { 2360 if (atomic_read(&user_setup) == 0) 2361 goto bail; 2362 2363 cleanup_cdev(&wildcard_cdev, &wildcard_class_dev); 2364 user_cleanup(); 2365 2366 atomic_set(&user_setup, 0); 2367 } 2368bail: 2369 return; 2370} 2371