1/* 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <linux/err.h> 35#include <linux/slab.h> 36#include <linux/vmalloc.h> 37 38#include "ipath_verbs.h" 39 40/** 41 * ipath_cq_enter - add a new entry to the completion queue 42 * @cq: completion queue 43 * @entry: work completion entry to add 44 * @sig: true if @entry is a solicitated entry 45 * 46 * This may be called with qp->s_lock held. 47 */ 48void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) 49{ 50 struct ipath_cq_wc *wc; 51 unsigned long flags; 52 u32 head; 53 u32 next; 54 55 spin_lock_irqsave(&cq->lock, flags); 56 57 /* 58 * Note that the head pointer might be writable by user processes. 59 * Take care to verify it is a sane value. 60 */ 61 wc = cq->queue; 62 head = wc->head; 63 if (head >= (unsigned) cq->ibcq.cqe) { 64 head = cq->ibcq.cqe; 65 next = 0; 66 } else 67 next = head + 1; 68 if (unlikely(next == wc->tail)) { 69 spin_unlock_irqrestore(&cq->lock, flags); 70 if (cq->ibcq.event_handler) { 71 struct ib_event ev; 72 73 ev.device = cq->ibcq.device; 74 ev.element.cq = &cq->ibcq; 75 ev.event = IB_EVENT_CQ_ERR; 76 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); 77 } 78 return; 79 } 80 if (cq->ip) { 81 wc->uqueue[head].wr_id = entry->wr_id; 82 wc->uqueue[head].status = entry->status; 83 wc->uqueue[head].opcode = entry->opcode; 84 wc->uqueue[head].vendor_err = entry->vendor_err; 85 wc->uqueue[head].byte_len = entry->byte_len; 86 wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data; 87 wc->uqueue[head].qp_num = entry->qp->qp_num; 88 wc->uqueue[head].src_qp = entry->src_qp; 89 wc->uqueue[head].wc_flags = entry->wc_flags; 90 wc->uqueue[head].pkey_index = entry->pkey_index; 91 wc->uqueue[head].slid = entry->slid; 92 wc->uqueue[head].sl = entry->sl; 93 wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; 94 wc->uqueue[head].port_num = entry->port_num; 95 /* Make sure entry is written before the head index. */ 96 smp_wmb(); 97 } else 98 wc->kqueue[head] = *entry; 99 wc->head = next; 100 101 if (cq->notify == IB_CQ_NEXT_COMP || 102 (cq->notify == IB_CQ_SOLICITED && solicited)) { 103 cq->notify = IB_CQ_NONE; 104 cq->triggered++; 105 /* 106 * This will cause send_complete() to be called in 107 * another thread. 108 */ 109 tasklet_hi_schedule(&cq->comptask); 110 } 111 112 spin_unlock_irqrestore(&cq->lock, flags); 113 114 if (entry->status != IB_WC_SUCCESS) 115 to_idev(cq->ibcq.device)->n_wqe_errs++; 116} 117 118/** 119 * ipath_poll_cq - poll for work completion entries 120 * @ibcq: the completion queue to poll 121 * @num_entries: the maximum number of entries to return 122 * @entry: pointer to array where work completions are placed 123 * 124 * Returns the number of completion entries polled. 125 * 126 * This may be called from interrupt context. Also called by ib_poll_cq() 127 * in the generic verbs code. 128 */ 129int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) 130{ 131 struct ipath_cq *cq = to_icq(ibcq); 132 struct ipath_cq_wc *wc; 133 unsigned long flags; 134 int npolled; 135 u32 tail; 136 137 /* The kernel can only poll a kernel completion queue */ 138 if (cq->ip) { 139 npolled = -EINVAL; 140 goto bail; 141 } 142 143 spin_lock_irqsave(&cq->lock, flags); 144 145 wc = cq->queue; 146 tail = wc->tail; 147 if (tail > (u32) cq->ibcq.cqe) 148 tail = (u32) cq->ibcq.cqe; 149 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { 150 if (tail == wc->head) 151 break; 152 /* The kernel doesn't need a RMB since it has the lock. */ 153 *entry = wc->kqueue[tail]; 154 if (tail >= cq->ibcq.cqe) 155 tail = 0; 156 else 157 tail++; 158 } 159 wc->tail = tail; 160 161 spin_unlock_irqrestore(&cq->lock, flags); 162 163bail: 164 return npolled; 165} 166 167static void send_complete(unsigned long data) 168{ 169 struct ipath_cq *cq = (struct ipath_cq *)data; 170 171 /* 172 * The completion handler will most likely rearm the notification 173 * and poll for all pending entries. If a new completion entry 174 * is added while we are in this routine, tasklet_hi_schedule() 175 * won't call us again until we return so we check triggered to 176 * see if we need to call the handler again. 177 */ 178 for (;;) { 179 u8 triggered = cq->triggered; 180 181 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 182 183 if (cq->triggered == triggered) 184 return; 185 } 186} 187 188/** 189 * ipath_create_cq - create a completion queue 190 * @ibdev: the device this completion queue is attached to 191 * @entries: the minimum size of the completion queue 192 * @context: unused by the InfiniPath driver 193 * @udata: unused by the InfiniPath driver 194 * 195 * Returns a pointer to the completion queue or negative errno values 196 * for failure. 197 * 198 * Called by ib_create_cq() in the generic verbs code. 199 */ 200struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector, 201 struct ib_ucontext *context, 202 struct ib_udata *udata) 203{ 204 struct ipath_ibdev *dev = to_idev(ibdev); 205 struct ipath_cq *cq; 206 struct ipath_cq_wc *wc; 207 struct ib_cq *ret; 208 u32 sz; 209 210 if (entries < 1 || entries > ib_ipath_max_cqes) { 211 ret = ERR_PTR(-EINVAL); 212 goto done; 213 } 214 215 /* Allocate the completion queue structure. */ 216 cq = kmalloc(sizeof(*cq), GFP_KERNEL); 217 if (!cq) { 218 ret = ERR_PTR(-ENOMEM); 219 goto done; 220 } 221 222 /* 223 * Allocate the completion queue entries and head/tail pointers. 224 * This is allocated separately so that it can be resized and 225 * also mapped into user space. 226 * We need to use vmalloc() in order to support mmap and large 227 * numbers of entries. 228 */ 229 sz = sizeof(*wc); 230 if (udata && udata->outlen >= sizeof(__u64)) 231 sz += sizeof(struct ib_uverbs_wc) * (entries + 1); 232 else 233 sz += sizeof(struct ib_wc) * (entries + 1); 234 wc = vmalloc_user(sz); 235 if (!wc) { 236 ret = ERR_PTR(-ENOMEM); 237 goto bail_cq; 238 } 239 240 /* 241 * Return the address of the WC as the offset to mmap. 242 * See ipath_mmap() for details. 243 */ 244 if (udata && udata->outlen >= sizeof(__u64)) { 245 int err; 246 247 cq->ip = ipath_create_mmap_info(dev, sz, context, wc); 248 if (!cq->ip) { 249 ret = ERR_PTR(-ENOMEM); 250 goto bail_wc; 251 } 252 253 err = ib_copy_to_udata(udata, &cq->ip->offset, 254 sizeof(cq->ip->offset)); 255 if (err) { 256 ret = ERR_PTR(err); 257 goto bail_ip; 258 } 259 } else 260 cq->ip = NULL; 261 262 spin_lock(&dev->n_cqs_lock); 263 if (dev->n_cqs_allocated == ib_ipath_max_cqs) { 264 spin_unlock(&dev->n_cqs_lock); 265 ret = ERR_PTR(-ENOMEM); 266 goto bail_ip; 267 } 268 269 dev->n_cqs_allocated++; 270 spin_unlock(&dev->n_cqs_lock); 271 272 if (cq->ip) { 273 spin_lock_irq(&dev->pending_lock); 274 list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); 275 spin_unlock_irq(&dev->pending_lock); 276 } 277 278 /* 279 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. 280 * The number of entries should be >= the number requested or return 281 * an error. 282 */ 283 cq->ibcq.cqe = entries; 284 cq->notify = IB_CQ_NONE; 285 cq->triggered = 0; 286 spin_lock_init(&cq->lock); 287 tasklet_init(&cq->comptask, send_complete, (unsigned long)cq); 288 wc->head = 0; 289 wc->tail = 0; 290 cq->queue = wc; 291 292 ret = &cq->ibcq; 293 294 goto done; 295 296bail_ip: 297 kfree(cq->ip); 298bail_wc: 299 vfree(wc); 300bail_cq: 301 kfree(cq); 302done: 303 return ret; 304} 305 306/** 307 * ipath_destroy_cq - destroy a completion queue 308 * @ibcq: the completion queue to destroy. 309 * 310 * Returns 0 for success. 311 * 312 * Called by ib_destroy_cq() in the generic verbs code. 313 */ 314int ipath_destroy_cq(struct ib_cq *ibcq) 315{ 316 struct ipath_ibdev *dev = to_idev(ibcq->device); 317 struct ipath_cq *cq = to_icq(ibcq); 318 319 tasklet_kill(&cq->comptask); 320 spin_lock(&dev->n_cqs_lock); 321 dev->n_cqs_allocated--; 322 spin_unlock(&dev->n_cqs_lock); 323 if (cq->ip) 324 kref_put(&cq->ip->ref, ipath_release_mmap_info); 325 else 326 vfree(cq->queue); 327 kfree(cq); 328 329 return 0; 330} 331 332/** 333 * ipath_req_notify_cq - change the notification type for a completion queue 334 * @ibcq: the completion queue 335 * @notify_flags: the type of notification to request 336 * 337 * Returns 0 for success. 338 * 339 * This may be called from interrupt context. Also called by 340 * ib_req_notify_cq() in the generic verbs code. 341 */ 342int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) 343{ 344 struct ipath_cq *cq = to_icq(ibcq); 345 unsigned long flags; 346 int ret = 0; 347 348 spin_lock_irqsave(&cq->lock, flags); 349 /* 350 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow 351 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). 352 */ 353 if (cq->notify != IB_CQ_NEXT_COMP) 354 cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; 355 356 if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && 357 cq->queue->head != cq->queue->tail) 358 ret = 1; 359 360 spin_unlock_irqrestore(&cq->lock, flags); 361 362 return ret; 363} 364 365/** 366 * ipath_resize_cq - change the size of the CQ 367 * @ibcq: the completion queue 368 * 369 * Returns 0 for success. 370 */ 371int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) 372{ 373 struct ipath_cq *cq = to_icq(ibcq); 374 struct ipath_cq_wc *old_wc; 375 struct ipath_cq_wc *wc; 376 u32 head, tail, n; 377 int ret; 378 u32 sz; 379 380 if (cqe < 1 || cqe > ib_ipath_max_cqes) { 381 ret = -EINVAL; 382 goto bail; 383 } 384 385 /* 386 * Need to use vmalloc() if we want to support large #s of entries. 387 */ 388 sz = sizeof(*wc); 389 if (udata && udata->outlen >= sizeof(__u64)) 390 sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); 391 else 392 sz += sizeof(struct ib_wc) * (cqe + 1); 393 wc = vmalloc_user(sz); 394 if (!wc) { 395 ret = -ENOMEM; 396 goto bail; 397 } 398 399 /* Check that we can write the offset to mmap. */ 400 if (udata && udata->outlen >= sizeof(__u64)) { 401 __u64 offset = 0; 402 403 ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); 404 if (ret) 405 goto bail_free; 406 } 407 408 spin_lock_irq(&cq->lock); 409 /* 410 * Make sure head and tail are sane since they 411 * might be user writable. 412 */ 413 old_wc = cq->queue; 414 head = old_wc->head; 415 if (head > (u32) cq->ibcq.cqe) 416 head = (u32) cq->ibcq.cqe; 417 tail = old_wc->tail; 418 if (tail > (u32) cq->ibcq.cqe) 419 tail = (u32) cq->ibcq.cqe; 420 if (head < tail) 421 n = cq->ibcq.cqe + 1 + head - tail; 422 else 423 n = head - tail; 424 if (unlikely((u32)cqe < n)) { 425 ret = -EINVAL; 426 goto bail_unlock; 427 } 428 for (n = 0; tail != head; n++) { 429 if (cq->ip) 430 wc->uqueue[n] = old_wc->uqueue[tail]; 431 else 432 wc->kqueue[n] = old_wc->kqueue[tail]; 433 if (tail == (u32) cq->ibcq.cqe) 434 tail = 0; 435 else 436 tail++; 437 } 438 cq->ibcq.cqe = cqe; 439 wc->head = n; 440 wc->tail = 0; 441 cq->queue = wc; 442 spin_unlock_irq(&cq->lock); 443 444 vfree(old_wc); 445 446 if (cq->ip) { 447 struct ipath_ibdev *dev = to_idev(ibcq->device); 448 struct ipath_mmap_info *ip = cq->ip; 449 450 ipath_update_mmap_info(dev, ip, sz, wc); 451 452 /* 453 * Return the offset to mmap. 454 * See ipath_mmap() for details. 455 */ 456 if (udata && udata->outlen >= sizeof(__u64)) { 457 ret = ib_copy_to_udata(udata, &ip->offset, 458 sizeof(ip->offset)); 459 if (ret) 460 goto bail; 461 } 462 463 spin_lock_irq(&dev->pending_lock); 464 if (list_empty(&ip->pending_mmaps)) 465 list_add(&ip->pending_mmaps, &dev->pending_mmaps); 466 spin_unlock_irq(&dev->pending_lock); 467 } 468 469 ret = 0; 470 goto bail; 471 472bail_unlock: 473 spin_unlock_irq(&cq->lock); 474bail_free: 475 vfree(wc); 476bail: 477 return ret; 478} 479