1/* 2 * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <linux/err.h> 35#include <linux/slab.h> 36#include <linux/vmalloc.h> 37 38#include "qib_verbs.h" 39 40/** 41 * qib_cq_enter - add a new entry to the completion queue 42 * @cq: completion queue 43 * @entry: work completion entry to add 44 * @sig: true if @entry is a solicitated entry 45 * 46 * This may be called with qp->s_lock held. 47 */ 48void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited) 49{ 50 struct qib_cq_wc *wc; 51 unsigned long flags; 52 u32 head; 53 u32 next; 54 55 spin_lock_irqsave(&cq->lock, flags); 56 57 /* 58 * Note that the head pointer might be writable by user processes. 59 * Take care to verify it is a sane value. 60 */ 61 wc = cq->queue; 62 head = wc->head; 63 if (head >= (unsigned) cq->ibcq.cqe) { 64 head = cq->ibcq.cqe; 65 next = 0; 66 } else 67 next = head + 1; 68 if (unlikely(next == wc->tail)) { 69 spin_unlock_irqrestore(&cq->lock, flags); 70 if (cq->ibcq.event_handler) { 71 struct ib_event ev; 72 73 ev.device = cq->ibcq.device; 74 ev.element.cq = &cq->ibcq; 75 ev.event = IB_EVENT_CQ_ERR; 76 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); 77 } 78 return; 79 } 80 if (cq->ip) { 81 wc->uqueue[head].wr_id = entry->wr_id; 82 wc->uqueue[head].status = entry->status; 83 wc->uqueue[head].opcode = entry->opcode; 84 wc->uqueue[head].vendor_err = entry->vendor_err; 85 wc->uqueue[head].byte_len = entry->byte_len; 86 wc->uqueue[head].ex.imm_data = 87 (__u32 __force)entry->ex.imm_data; 88 wc->uqueue[head].qp_num = entry->qp->qp_num; 89 wc->uqueue[head].src_qp = entry->src_qp; 90 wc->uqueue[head].wc_flags = entry->wc_flags; 91 wc->uqueue[head].pkey_index = entry->pkey_index; 92 wc->uqueue[head].slid = entry->slid; 93 wc->uqueue[head].sl = entry->sl; 94 wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; 95 wc->uqueue[head].port_num = entry->port_num; 96 /* Make sure entry is written before the head index. */ 97 smp_wmb(); 98 } else 99 wc->kqueue[head] = *entry; 100 wc->head = next; 101 102 if (cq->notify == IB_CQ_NEXT_COMP || 103 (cq->notify == IB_CQ_SOLICITED && 104 (solicited || entry->status != IB_WC_SUCCESS))) { 105 cq->notify = IB_CQ_NONE; 106 cq->triggered++; 107 /* 108 * This will cause send_complete() to be called in 109 * another thread. 110 */ 111 queue_work(qib_cq_wq, &cq->comptask); 112 } 113 114 spin_unlock_irqrestore(&cq->lock, flags); 115} 116 117/** 118 * qib_poll_cq - poll for work completion entries 119 * @ibcq: the completion queue to poll 120 * @num_entries: the maximum number of entries to return 121 * @entry: pointer to array where work completions are placed 122 * 123 * Returns the number of completion entries polled. 124 * 125 * This may be called from interrupt context. Also called by ib_poll_cq() 126 * in the generic verbs code. 127 */ 128int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) 129{ 130 struct qib_cq *cq = to_icq(ibcq); 131 struct qib_cq_wc *wc; 132 unsigned long flags; 133 int npolled; 134 u32 tail; 135 136 /* The kernel can only poll a kernel completion queue */ 137 if (cq->ip) { 138 npolled = -EINVAL; 139 goto bail; 140 } 141 142 spin_lock_irqsave(&cq->lock, flags); 143 144 wc = cq->queue; 145 tail = wc->tail; 146 if (tail > (u32) cq->ibcq.cqe) 147 tail = (u32) cq->ibcq.cqe; 148 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { 149 if (tail == wc->head) 150 break; 151 /* The kernel doesn't need a RMB since it has the lock. */ 152 *entry = wc->kqueue[tail]; 153 if (tail >= cq->ibcq.cqe) 154 tail = 0; 155 else 156 tail++; 157 } 158 wc->tail = tail; 159 160 spin_unlock_irqrestore(&cq->lock, flags); 161 162bail: 163 return npolled; 164} 165 166static void send_complete(struct work_struct *work) 167{ 168 struct qib_cq *cq = container_of(work, struct qib_cq, comptask); 169 170 /* 171 * The completion handler will most likely rearm the notification 172 * and poll for all pending entries. If a new completion entry 173 * is added while we are in this routine, queue_work() 174 * won't call us again until we return so we check triggered to 175 * see if we need to call the handler again. 176 */ 177 for (;;) { 178 u8 triggered = cq->triggered; 179 180 /* 181 * IPoIB connected mode assumes the callback is from a 182 * soft IRQ. We simulate this by blocking "bottom halves". 183 * See the implementation for ipoib_cm_handle_tx_wc(), 184 * netif_tx_lock_bh() and netif_tx_lock(). 185 */ 186 local_bh_disable(); 187 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 188 local_bh_enable(); 189 190 if (cq->triggered == triggered) 191 return; 192 } 193} 194 195/** 196 * qib_create_cq - create a completion queue 197 * @ibdev: the device this completion queue is attached to 198 * @entries: the minimum size of the completion queue 199 * @context: unused by the QLogic_IB driver 200 * @udata: user data for libibverbs.so 201 * 202 * Returns a pointer to the completion queue or negative errno values 203 * for failure. 204 * 205 * Called by ib_create_cq() in the generic verbs code. 206 */ 207struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, 208 int comp_vector, struct ib_ucontext *context, 209 struct ib_udata *udata) 210{ 211 struct qib_ibdev *dev = to_idev(ibdev); 212 struct qib_cq *cq; 213 struct qib_cq_wc *wc; 214 struct ib_cq *ret; 215 u32 sz; 216 217 if (entries < 1 || entries > ib_qib_max_cqes) { 218 ret = ERR_PTR(-EINVAL); 219 goto done; 220 } 221 222 /* Allocate the completion queue structure. */ 223 cq = kmalloc(sizeof(*cq), GFP_KERNEL); 224 if (!cq) { 225 ret = ERR_PTR(-ENOMEM); 226 goto done; 227 } 228 229 /* 230 * Allocate the completion queue entries and head/tail pointers. 231 * This is allocated separately so that it can be resized and 232 * also mapped into user space. 233 * We need to use vmalloc() in order to support mmap and large 234 * numbers of entries. 235 */ 236 sz = sizeof(*wc); 237 if (udata && udata->outlen >= sizeof(__u64)) 238 sz += sizeof(struct ib_uverbs_wc) * (entries + 1); 239 else 240 sz += sizeof(struct ib_wc) * (entries + 1); 241 wc = vmalloc_user(sz); 242 if (!wc) { 243 ret = ERR_PTR(-ENOMEM); 244 goto bail_cq; 245 } 246 247 /* 248 * Return the address of the WC as the offset to mmap. 249 * See qib_mmap() for details. 250 */ 251 if (udata && udata->outlen >= sizeof(__u64)) { 252 int err; 253 254 cq->ip = qib_create_mmap_info(dev, sz, context, wc); 255 if (!cq->ip) { 256 ret = ERR_PTR(-ENOMEM); 257 goto bail_wc; 258 } 259 260 err = ib_copy_to_udata(udata, &cq->ip->offset, 261 sizeof(cq->ip->offset)); 262 if (err) { 263 ret = ERR_PTR(err); 264 goto bail_ip; 265 } 266 } else 267 cq->ip = NULL; 268 269 spin_lock(&dev->n_cqs_lock); 270 if (dev->n_cqs_allocated == ib_qib_max_cqs) { 271 spin_unlock(&dev->n_cqs_lock); 272 ret = ERR_PTR(-ENOMEM); 273 goto bail_ip; 274 } 275 276 dev->n_cqs_allocated++; 277 spin_unlock(&dev->n_cqs_lock); 278 279 if (cq->ip) { 280 spin_lock_irq(&dev->pending_lock); 281 list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); 282 spin_unlock_irq(&dev->pending_lock); 283 } 284 285 /* 286 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. 287 * The number of entries should be >= the number requested or return 288 * an error. 289 */ 290 cq->ibcq.cqe = entries; 291 cq->notify = IB_CQ_NONE; 292 cq->triggered = 0; 293 spin_lock_init(&cq->lock); 294 INIT_WORK(&cq->comptask, send_complete); 295 wc->head = 0; 296 wc->tail = 0; 297 cq->queue = wc; 298 299 ret = &cq->ibcq; 300 301 goto done; 302 303bail_ip: 304 kfree(cq->ip); 305bail_wc: 306 vfree(wc); 307bail_cq: 308 kfree(cq); 309done: 310 return ret; 311} 312 313/** 314 * qib_destroy_cq - destroy a completion queue 315 * @ibcq: the completion queue to destroy. 316 * 317 * Returns 0 for success. 318 * 319 * Called by ib_destroy_cq() in the generic verbs code. 320 */ 321int qib_destroy_cq(struct ib_cq *ibcq) 322{ 323 struct qib_ibdev *dev = to_idev(ibcq->device); 324 struct qib_cq *cq = to_icq(ibcq); 325 326 flush_work(&cq->comptask); 327 spin_lock(&dev->n_cqs_lock); 328 dev->n_cqs_allocated--; 329 spin_unlock(&dev->n_cqs_lock); 330 if (cq->ip) 331 kref_put(&cq->ip->ref, qib_release_mmap_info); 332 else 333 vfree(cq->queue); 334 kfree(cq); 335 336 return 0; 337} 338 339/** 340 * qib_req_notify_cq - change the notification type for a completion queue 341 * @ibcq: the completion queue 342 * @notify_flags: the type of notification to request 343 * 344 * Returns 0 for success. 345 * 346 * This may be called from interrupt context. Also called by 347 * ib_req_notify_cq() in the generic verbs code. 348 */ 349int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) 350{ 351 struct qib_cq *cq = to_icq(ibcq); 352 unsigned long flags; 353 int ret = 0; 354 355 spin_lock_irqsave(&cq->lock, flags); 356 /* 357 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow 358 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). 359 */ 360 if (cq->notify != IB_CQ_NEXT_COMP) 361 cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; 362 363 if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && 364 cq->queue->head != cq->queue->tail) 365 ret = 1; 366 367 spin_unlock_irqrestore(&cq->lock, flags); 368 369 return ret; 370} 371 372/** 373 * qib_resize_cq - change the size of the CQ 374 * @ibcq: the completion queue 375 * 376 * Returns 0 for success. 377 */ 378int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) 379{ 380 struct qib_cq *cq = to_icq(ibcq); 381 struct qib_cq_wc *old_wc; 382 struct qib_cq_wc *wc; 383 u32 head, tail, n; 384 int ret; 385 u32 sz; 386 387 if (cqe < 1 || cqe > ib_qib_max_cqes) { 388 ret = -EINVAL; 389 goto bail; 390 } 391 392 /* 393 * Need to use vmalloc() if we want to support large #s of entries. 394 */ 395 sz = sizeof(*wc); 396 if (udata && udata->outlen >= sizeof(__u64)) 397 sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); 398 else 399 sz += sizeof(struct ib_wc) * (cqe + 1); 400 wc = vmalloc_user(sz); 401 if (!wc) { 402 ret = -ENOMEM; 403 goto bail; 404 } 405 406 /* Check that we can write the offset to mmap. */ 407 if (udata && udata->outlen >= sizeof(__u64)) { 408 __u64 offset = 0; 409 410 ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); 411 if (ret) 412 goto bail_free; 413 } 414 415 spin_lock_irq(&cq->lock); 416 /* 417 * Make sure head and tail are sane since they 418 * might be user writable. 419 */ 420 old_wc = cq->queue; 421 head = old_wc->head; 422 if (head > (u32) cq->ibcq.cqe) 423 head = (u32) cq->ibcq.cqe; 424 tail = old_wc->tail; 425 if (tail > (u32) cq->ibcq.cqe) 426 tail = (u32) cq->ibcq.cqe; 427 if (head < tail) 428 n = cq->ibcq.cqe + 1 + head - tail; 429 else 430 n = head - tail; 431 if (unlikely((u32)cqe < n)) { 432 ret = -EINVAL; 433 goto bail_unlock; 434 } 435 for (n = 0; tail != head; n++) { 436 if (cq->ip) 437 wc->uqueue[n] = old_wc->uqueue[tail]; 438 else 439 wc->kqueue[n] = old_wc->kqueue[tail]; 440 if (tail == (u32) cq->ibcq.cqe) 441 tail = 0; 442 else 443 tail++; 444 } 445 cq->ibcq.cqe = cqe; 446 wc->head = n; 447 wc->tail = 0; 448 cq->queue = wc; 449 spin_unlock_irq(&cq->lock); 450 451 vfree(old_wc); 452 453 if (cq->ip) { 454 struct qib_ibdev *dev = to_idev(ibcq->device); 455 struct qib_mmap_info *ip = cq->ip; 456 457 qib_update_mmap_info(dev, ip, sz, wc); 458 459 /* 460 * Return the offset to mmap. 461 * See qib_mmap() for details. 462 */ 463 if (udata && udata->outlen >= sizeof(__u64)) { 464 ret = ib_copy_to_udata(udata, &ip->offset, 465 sizeof(ip->offset)); 466 if (ret) 467 goto bail; 468 } 469 470 spin_lock_irq(&dev->pending_lock); 471 if (list_empty(&ip->pending_mmaps)) 472 list_add(&ip->pending_mmaps, &dev->pending_mmaps); 473 spin_unlock_irq(&dev->pending_lock); 474 } 475 476 ret = 0; 477 goto bail; 478 479bail_unlock: 480 spin_unlock_irq(&cq->lock); 481bail_free: 482 vfree(wc); 483bail: 484 return ret; 485} 486