ipath_rc.c revision 6ed89b9574776d4178f1ad754d20e4f1e5a4b6c8
1/* 2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include "ipath_verbs.h" 35#include "ipath_kernel.h" 36 37/* cut down ridiculously long IB macro names */ 38#define OP(x) IB_OPCODE_RC_##x 39 40static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe, 41 u32 psn, u32 pmtu) 42{ 43 u32 len; 44 45 len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; 46 ss->sge = wqe->sg_list[0]; 47 ss->sg_list = wqe->sg_list + 1; 48 ss->num_sge = wqe->wr.num_sge; 49 ipath_skip_sge(ss, len); 50 return wqe->length - len; 51} 52 53/** 54 * ipath_init_restart- initialize the qp->s_sge after a restart 55 * @qp: the QP who's SGE we're restarting 56 * @wqe: the work queue to initialize the QP's SGE from 57 * 58 * The QP s_lock should be held and interrupts disabled. 59 */ 60static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) 61{ 62 struct ipath_ibdev *dev; 63 64 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, 65 ib_mtu_enum_to_int(qp->path_mtu)); 66 dev = to_idev(qp->ibqp.device); 67 spin_lock(&dev->pending_lock); 68 if (list_empty(&qp->timerwait)) 69 list_add_tail(&qp->timerwait, 70 &dev->pending[dev->pending_index]); 71 spin_unlock(&dev->pending_lock); 72} 73 74/** 75 * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read) 76 * @qp: a pointer to the QP 77 * @ohdr: a pointer to the IB header being constructed 78 * @pmtu: the path MTU 79 * 80 * Return 1 if constructed; otherwise, return 0. 81 * Note that we are in the responder's side of the QP context. 82 * Note the QP s_lock must be held. 83 */ 84static int ipath_make_rc_ack(struct ipath_qp *qp, 85 struct ipath_other_headers *ohdr, 86 u32 pmtu, u32 *bth0p, u32 *bth2p) 87{ 88 struct ipath_ack_entry *e; 89 u32 hwords; 90 u32 len; 91 u32 bth0; 92 u32 bth2; 93 94 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 95 hwords = 5; 96 97 switch (qp->s_ack_state) { 98 case OP(RDMA_READ_RESPONSE_LAST): 99 case OP(RDMA_READ_RESPONSE_ONLY): 100 case OP(ATOMIC_ACKNOWLEDGE): 101 qp->s_ack_state = OP(ACKNOWLEDGE); 102 /* FALLTHROUGH */ 103 case OP(ACKNOWLEDGE): 104 /* Check for no next entry in the queue. */ 105 if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { 106 if (qp->s_flags & IPATH_S_ACK_PENDING) 107 goto normal; 108 goto bail; 109 } 110 111 e = &qp->s_ack_queue[qp->s_tail_ack_queue]; 112 if (e->opcode == OP(RDMA_READ_REQUEST)) { 113 /* Copy SGE state in case we need to resend */ 114 qp->s_ack_rdma_sge = e->rdma_sge; 115 qp->s_cur_sge = &qp->s_ack_rdma_sge; 116 len = e->rdma_sge.sge.sge_length; 117 if (len > pmtu) { 118 len = pmtu; 119 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); 120 } else { 121 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); 122 if (++qp->s_tail_ack_queue > 123 IPATH_MAX_RDMA_ATOMIC) 124 qp->s_tail_ack_queue = 0; 125 } 126 ohdr->u.aeth = ipath_compute_aeth(qp); 127 hwords++; 128 qp->s_ack_rdma_psn = e->psn; 129 bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; 130 } else { 131 /* COMPARE_SWAP or FETCH_ADD */ 132 qp->s_cur_sge = NULL; 133 len = 0; 134 qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); 135 ohdr->u.at.aeth = ipath_compute_aeth(qp); 136 ohdr->u.at.atomic_ack_eth[0] = 137 cpu_to_be32(e->atomic_data >> 32); 138 ohdr->u.at.atomic_ack_eth[1] = 139 cpu_to_be32(e->atomic_data); 140 hwords += sizeof(ohdr->u.at) / sizeof(u32); 141 bth2 = e->psn; 142 if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) 143 qp->s_tail_ack_queue = 0; 144 } 145 bth0 = qp->s_ack_state << 24; 146 break; 147 148 case OP(RDMA_READ_RESPONSE_FIRST): 149 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); 150 /* FALLTHROUGH */ 151 case OP(RDMA_READ_RESPONSE_MIDDLE): 152 len = qp->s_ack_rdma_sge.sge.sge_length; 153 if (len > pmtu) 154 len = pmtu; 155 else { 156 ohdr->u.aeth = ipath_compute_aeth(qp); 157 hwords++; 158 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); 159 if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) 160 qp->s_tail_ack_queue = 0; 161 } 162 bth0 = qp->s_ack_state << 24; 163 bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; 164 break; 165 166 default: 167 normal: 168 /* 169 * Send a regular ACK. 170 * Set the s_ack_state so we wait until after sending 171 * the ACK before setting s_ack_state to ACKNOWLEDGE 172 * (see above). 173 */ 174 qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); 175 qp->s_flags &= ~IPATH_S_ACK_PENDING; 176 qp->s_cur_sge = NULL; 177 if (qp->s_nak_state) 178 ohdr->u.aeth = 179 cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | 180 (qp->s_nak_state << 181 IPATH_AETH_CREDIT_SHIFT)); 182 else 183 ohdr->u.aeth = ipath_compute_aeth(qp); 184 hwords++; 185 len = 0; 186 bth0 = OP(ACKNOWLEDGE) << 24; 187 bth2 = qp->s_ack_psn & IPATH_PSN_MASK; 188 } 189 qp->s_hdrwords = hwords; 190 qp->s_cur_size = len; 191 *bth0p = bth0; 192 *bth2p = bth2; 193 return 1; 194 195bail: 196 return 0; 197} 198 199/** 200 * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) 201 * @qp: a pointer to the QP 202 * @ohdr: a pointer to the IB header being constructed 203 * @pmtu: the path MTU 204 * @bth0p: pointer to the BTH opcode word 205 * @bth2p: pointer to the BTH PSN word 206 * 207 * Return 1 if constructed; otherwise, return 0. 208 * Note the QP s_lock must be held and interrupts disabled. 209 */ 210int ipath_make_rc_req(struct ipath_qp *qp, 211 struct ipath_other_headers *ohdr, 212 u32 pmtu, u32 *bth0p, u32 *bth2p) 213{ 214 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 215 struct ipath_sge_state *ss; 216 struct ipath_swqe *wqe; 217 u32 hwords; 218 u32 len; 219 u32 bth0; 220 u32 bth2; 221 char newreq; 222 223 /* Sending responses has higher priority over sending requests. */ 224 if ((qp->r_head_ack_queue != qp->s_tail_ack_queue || 225 (qp->s_flags & IPATH_S_ACK_PENDING) || 226 qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) && 227 ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p)) 228 goto done; 229 230 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || 231 qp->s_rnr_timeout || qp->s_wait_credit) 232 goto bail; 233 234 /* Limit the number of packets sent without an ACK. */ 235 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { 236 qp->s_wait_credit = 1; 237 dev->n_rc_stalls++; 238 goto bail; 239 } 240 241 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 242 hwords = 5; 243 bth0 = 0; 244 245 /* Send a request. */ 246 wqe = get_swqe_ptr(qp, qp->s_cur); 247 switch (qp->s_state) { 248 default: 249 /* 250 * Resend an old request or start a new one. 251 * 252 * We keep track of the current SWQE so that 253 * we don't reset the "furthest progress" state 254 * if we need to back up. 255 */ 256 newreq = 0; 257 if (qp->s_cur == qp->s_tail) { 258 /* Check if send work queue is empty. */ 259 if (qp->s_tail == qp->s_head) 260 goto bail; 261 /* 262 * If a fence is requested, wait for previous 263 * RDMA read and atomic operations to finish. 264 */ 265 if ((wqe->wr.send_flags & IB_SEND_FENCE) && 266 qp->s_num_rd_atomic) { 267 qp->s_flags |= IPATH_S_FENCE_PENDING; 268 goto bail; 269 } 270 wqe->psn = qp->s_next_psn; 271 newreq = 1; 272 } 273 /* 274 * Note that we have to be careful not to modify the 275 * original work request since we may need to resend 276 * it. 277 */ 278 len = wqe->length; 279 ss = &qp->s_sge; 280 bth2 = 0; 281 switch (wqe->wr.opcode) { 282 case IB_WR_SEND: 283 case IB_WR_SEND_WITH_IMM: 284 /* If no credit, return. */ 285 if (qp->s_lsn != (u32) -1 && 286 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) 287 goto bail; 288 wqe->lpsn = wqe->psn; 289 if (len > pmtu) { 290 wqe->lpsn += (len - 1) / pmtu; 291 qp->s_state = OP(SEND_FIRST); 292 len = pmtu; 293 break; 294 } 295 if (wqe->wr.opcode == IB_WR_SEND) 296 qp->s_state = OP(SEND_ONLY); 297 else { 298 qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); 299 /* Immediate data comes after the BTH */ 300 ohdr->u.imm_data = wqe->wr.imm_data; 301 hwords += 1; 302 } 303 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 304 bth0 |= 1 << 23; 305 bth2 = 1 << 31; /* Request ACK. */ 306 if (++qp->s_cur == qp->s_size) 307 qp->s_cur = 0; 308 break; 309 310 case IB_WR_RDMA_WRITE: 311 if (newreq && qp->s_lsn != (u32) -1) 312 qp->s_lsn++; 313 /* FALLTHROUGH */ 314 case IB_WR_RDMA_WRITE_WITH_IMM: 315 /* If no credit, return. */ 316 if (qp->s_lsn != (u32) -1 && 317 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) 318 goto bail; 319 ohdr->u.rc.reth.vaddr = 320 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 321 ohdr->u.rc.reth.rkey = 322 cpu_to_be32(wqe->wr.wr.rdma.rkey); 323 ohdr->u.rc.reth.length = cpu_to_be32(len); 324 hwords += sizeof(struct ib_reth) / sizeof(u32); 325 wqe->lpsn = wqe->psn; 326 if (len > pmtu) { 327 wqe->lpsn += (len - 1) / pmtu; 328 qp->s_state = OP(RDMA_WRITE_FIRST); 329 len = pmtu; 330 break; 331 } 332 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) 333 qp->s_state = OP(RDMA_WRITE_ONLY); 334 else { 335 qp->s_state = 336 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); 337 /* Immediate data comes after RETH */ 338 ohdr->u.rc.imm_data = wqe->wr.imm_data; 339 hwords += 1; 340 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 341 bth0 |= 1 << 23; 342 } 343 bth2 = 1 << 31; /* Request ACK. */ 344 if (++qp->s_cur == qp->s_size) 345 qp->s_cur = 0; 346 break; 347 348 case IB_WR_RDMA_READ: 349 /* 350 * Don't allow more operations to be started 351 * than the QP limits allow. 352 */ 353 if (newreq) { 354 if (qp->s_num_rd_atomic >= 355 qp->s_max_rd_atomic) { 356 qp->s_flags |= IPATH_S_RDMAR_PENDING; 357 goto bail; 358 } 359 qp->s_num_rd_atomic++; 360 if (qp->s_lsn != (u32) -1) 361 qp->s_lsn++; 362 /* 363 * Adjust s_next_psn to count the 364 * expected number of responses. 365 */ 366 if (len > pmtu) 367 qp->s_next_psn += (len - 1) / pmtu; 368 wqe->lpsn = qp->s_next_psn++; 369 } 370 ohdr->u.rc.reth.vaddr = 371 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 372 ohdr->u.rc.reth.rkey = 373 cpu_to_be32(wqe->wr.wr.rdma.rkey); 374 ohdr->u.rc.reth.length = cpu_to_be32(len); 375 qp->s_state = OP(RDMA_READ_REQUEST); 376 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); 377 ss = NULL; 378 len = 0; 379 if (++qp->s_cur == qp->s_size) 380 qp->s_cur = 0; 381 break; 382 383 case IB_WR_ATOMIC_CMP_AND_SWP: 384 case IB_WR_ATOMIC_FETCH_AND_ADD: 385 /* 386 * Don't allow more operations to be started 387 * than the QP limits allow. 388 */ 389 if (newreq) { 390 if (qp->s_num_rd_atomic >= 391 qp->s_max_rd_atomic) { 392 qp->s_flags |= IPATH_S_RDMAR_PENDING; 393 goto bail; 394 } 395 qp->s_num_rd_atomic++; 396 if (qp->s_lsn != (u32) -1) 397 qp->s_lsn++; 398 wqe->lpsn = wqe->psn; 399 } 400 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 401 qp->s_state = OP(COMPARE_SWAP); 402 ohdr->u.atomic_eth.swap_data = cpu_to_be64( 403 wqe->wr.wr.atomic.swap); 404 ohdr->u.atomic_eth.compare_data = cpu_to_be64( 405 wqe->wr.wr.atomic.compare_add); 406 } else { 407 qp->s_state = OP(FETCH_ADD); 408 ohdr->u.atomic_eth.swap_data = cpu_to_be64( 409 wqe->wr.wr.atomic.compare_add); 410 ohdr->u.atomic_eth.compare_data = 0; 411 } 412 ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( 413 wqe->wr.wr.atomic.remote_addr >> 32); 414 ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( 415 wqe->wr.wr.atomic.remote_addr); 416 ohdr->u.atomic_eth.rkey = cpu_to_be32( 417 wqe->wr.wr.atomic.rkey); 418 hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); 419 ss = NULL; 420 len = 0; 421 if (++qp->s_cur == qp->s_size) 422 qp->s_cur = 0; 423 break; 424 425 default: 426 goto bail; 427 } 428 qp->s_sge.sge = wqe->sg_list[0]; 429 qp->s_sge.sg_list = wqe->sg_list + 1; 430 qp->s_sge.num_sge = wqe->wr.num_sge; 431 qp->s_len = wqe->length; 432 if (newreq) { 433 qp->s_tail++; 434 if (qp->s_tail >= qp->s_size) 435 qp->s_tail = 0; 436 } 437 bth2 |= qp->s_psn & IPATH_PSN_MASK; 438 if (wqe->wr.opcode == IB_WR_RDMA_READ) 439 qp->s_psn = wqe->lpsn + 1; 440 else { 441 qp->s_psn++; 442 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) 443 qp->s_next_psn = qp->s_psn; 444 } 445 /* 446 * Put the QP on the pending list so lost ACKs will cause 447 * a retry. More than one request can be pending so the 448 * QP may already be on the dev->pending list. 449 */ 450 spin_lock(&dev->pending_lock); 451 if (list_empty(&qp->timerwait)) 452 list_add_tail(&qp->timerwait, 453 &dev->pending[dev->pending_index]); 454 spin_unlock(&dev->pending_lock); 455 break; 456 457 case OP(RDMA_READ_RESPONSE_FIRST): 458 /* 459 * This case can only happen if a send is restarted. 460 * See ipath_restart_rc(). 461 */ 462 ipath_init_restart(qp, wqe); 463 /* FALLTHROUGH */ 464 case OP(SEND_FIRST): 465 qp->s_state = OP(SEND_MIDDLE); 466 /* FALLTHROUGH */ 467 case OP(SEND_MIDDLE): 468 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 469 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) 470 qp->s_next_psn = qp->s_psn; 471 ss = &qp->s_sge; 472 len = qp->s_len; 473 if (len > pmtu) { 474 len = pmtu; 475 break; 476 } 477 if (wqe->wr.opcode == IB_WR_SEND) 478 qp->s_state = OP(SEND_LAST); 479 else { 480 qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); 481 /* Immediate data comes after the BTH */ 482 ohdr->u.imm_data = wqe->wr.imm_data; 483 hwords += 1; 484 } 485 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 486 bth0 |= 1 << 23; 487 bth2 |= 1 << 31; /* Request ACK. */ 488 qp->s_cur++; 489 if (qp->s_cur >= qp->s_size) 490 qp->s_cur = 0; 491 break; 492 493 case OP(RDMA_READ_RESPONSE_LAST): 494 /* 495 * This case can only happen if a RDMA write is restarted. 496 * See ipath_restart_rc(). 497 */ 498 ipath_init_restart(qp, wqe); 499 /* FALLTHROUGH */ 500 case OP(RDMA_WRITE_FIRST): 501 qp->s_state = OP(RDMA_WRITE_MIDDLE); 502 /* FALLTHROUGH */ 503 case OP(RDMA_WRITE_MIDDLE): 504 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 505 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) 506 qp->s_next_psn = qp->s_psn; 507 ss = &qp->s_sge; 508 len = qp->s_len; 509 if (len > pmtu) { 510 len = pmtu; 511 break; 512 } 513 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) 514 qp->s_state = OP(RDMA_WRITE_LAST); 515 else { 516 qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); 517 /* Immediate data comes after the BTH */ 518 ohdr->u.imm_data = wqe->wr.imm_data; 519 hwords += 1; 520 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 521 bth0 |= 1 << 23; 522 } 523 bth2 |= 1 << 31; /* Request ACK. */ 524 qp->s_cur++; 525 if (qp->s_cur >= qp->s_size) 526 qp->s_cur = 0; 527 break; 528 529 case OP(RDMA_READ_RESPONSE_MIDDLE): 530 /* 531 * This case can only happen if a RDMA read is restarted. 532 * See ipath_restart_rc(). 533 */ 534 ipath_init_restart(qp, wqe); 535 len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; 536 ohdr->u.rc.reth.vaddr = 537 cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); 538 ohdr->u.rc.reth.rkey = 539 cpu_to_be32(wqe->wr.wr.rdma.rkey); 540 ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); 541 qp->s_state = OP(RDMA_READ_REQUEST); 542 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); 543 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 544 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) 545 qp->s_next_psn = qp->s_psn; 546 ss = NULL; 547 len = 0; 548 qp->s_cur++; 549 if (qp->s_cur == qp->s_size) 550 qp->s_cur = 0; 551 break; 552 } 553 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) 554 bth2 |= 1 << 31; /* Request ACK. */ 555 qp->s_len -= len; 556 qp->s_hdrwords = hwords; 557 qp->s_cur_sge = ss; 558 qp->s_cur_size = len; 559 *bth0p = bth0 | (qp->s_state << 24); 560 *bth2p = bth2; 561done: 562 return 1; 563 564bail: 565 return 0; 566} 567 568/** 569 * send_rc_ack - Construct an ACK packet and send it 570 * @qp: a pointer to the QP 571 * 572 * This is called from ipath_rc_rcv() and only uses the receive 573 * side QP state. 574 * Note that RDMA reads and atomics are handled in the 575 * send side QP state and tasklet. 576 */ 577static void send_rc_ack(struct ipath_qp *qp) 578{ 579 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 580 u16 lrh0; 581 u32 bth0; 582 u32 hwords; 583 struct ipath_ib_header hdr; 584 struct ipath_other_headers *ohdr; 585 unsigned long flags; 586 587 /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ 588 if (qp->r_head_ack_queue != qp->s_tail_ack_queue) 589 goto queue_ack; 590 591 /* Construct the header. */ 592 ohdr = &hdr.u.oth; 593 lrh0 = IPATH_LRH_BTH; 594 /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */ 595 hwords = 6; 596 if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { 597 hwords += ipath_make_grh(dev, &hdr.u.l.grh, 598 &qp->remote_ah_attr.grh, 599 hwords, 0); 600 ohdr = &hdr.u.l.oth; 601 lrh0 = IPATH_LRH_GRH; 602 } 603 /* read pkey_index w/o lock (its atomic) */ 604 bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) | 605 OP(ACKNOWLEDGE) << 24; 606 if (qp->r_nak_state) 607 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | 608 (qp->r_nak_state << 609 IPATH_AETH_CREDIT_SHIFT)); 610 else 611 ohdr->u.aeth = ipath_compute_aeth(qp); 612 lrh0 |= qp->remote_ah_attr.sl << 4; 613 hdr.lrh[0] = cpu_to_be16(lrh0); 614 hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); 615 hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); 616 hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); 617 ohdr->bth[0] = cpu_to_be32(bth0); 618 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); 619 ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK); 620 621 /* 622 * If we can send the ACK, clear the ACK state. 623 */ 624 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { 625 dev->n_unicast_xmit++; 626 goto done; 627 } 628 629 /* 630 * We are out of PIO buffers at the moment. 631 * Pass responsibility for sending the ACK to the 632 * send tasklet so that when a PIO buffer becomes 633 * available, the ACK is sent ahead of other outgoing 634 * packets. 635 */ 636 dev->n_rc_qacks++; 637 638queue_ack: 639 spin_lock_irqsave(&qp->s_lock, flags); 640 qp->s_flags |= IPATH_S_ACK_PENDING; 641 qp->s_nak_state = qp->r_nak_state; 642 qp->s_ack_psn = qp->r_ack_psn; 643 spin_unlock_irqrestore(&qp->s_lock, flags); 644 645 /* Call ipath_do_rc_send() in another thread. */ 646 tasklet_hi_schedule(&qp->s_task); 647 648done: 649 return; 650} 651 652/** 653 * reset_psn - reset the QP state to send starting from PSN 654 * @qp: the QP 655 * @psn: the packet sequence number to restart at 656 * 657 * This is called from ipath_rc_rcv() to process an incoming RC ACK 658 * for the given QP. 659 * Called at interrupt level with the QP s_lock held. 660 */ 661static void reset_psn(struct ipath_qp *qp, u32 psn) 662{ 663 u32 n = qp->s_last; 664 struct ipath_swqe *wqe = get_swqe_ptr(qp, n); 665 u32 opcode; 666 667 qp->s_cur = n; 668 669 /* 670 * If we are starting the request from the beginning, 671 * let the normal send code handle initialization. 672 */ 673 if (ipath_cmp24(psn, wqe->psn) <= 0) { 674 qp->s_state = OP(SEND_LAST); 675 goto done; 676 } 677 678 /* Find the work request opcode corresponding to the given PSN. */ 679 opcode = wqe->wr.opcode; 680 for (;;) { 681 int diff; 682 683 if (++n == qp->s_size) 684 n = 0; 685 if (n == qp->s_tail) 686 break; 687 wqe = get_swqe_ptr(qp, n); 688 diff = ipath_cmp24(psn, wqe->psn); 689 if (diff < 0) 690 break; 691 qp->s_cur = n; 692 /* 693 * If we are starting the request from the beginning, 694 * let the normal send code handle initialization. 695 */ 696 if (diff == 0) { 697 qp->s_state = OP(SEND_LAST); 698 goto done; 699 } 700 opcode = wqe->wr.opcode; 701 } 702 703 /* 704 * Set the state to restart in the middle of a request. 705 * Don't change the s_sge, s_cur_sge, or s_cur_size. 706 * See ipath_do_rc_send(). 707 */ 708 switch (opcode) { 709 case IB_WR_SEND: 710 case IB_WR_SEND_WITH_IMM: 711 qp->s_state = OP(RDMA_READ_RESPONSE_FIRST); 712 break; 713 714 case IB_WR_RDMA_WRITE: 715 case IB_WR_RDMA_WRITE_WITH_IMM: 716 qp->s_state = OP(RDMA_READ_RESPONSE_LAST); 717 break; 718 719 case IB_WR_RDMA_READ: 720 qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE); 721 break; 722 723 default: 724 /* 725 * This case shouldn't happen since its only 726 * one PSN per req. 727 */ 728 qp->s_state = OP(SEND_LAST); 729 } 730done: 731 qp->s_psn = psn; 732} 733 734/** 735 * ipath_restart_rc - back up requester to resend the last un-ACKed request 736 * @qp: the QP to restart 737 * @psn: packet sequence number for the request 738 * @wc: the work completion request 739 * 740 * The QP s_lock should be held and interrupts disabled. 741 */ 742void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) 743{ 744 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); 745 struct ipath_ibdev *dev; 746 747 if (qp->s_retry == 0) { 748 wc->wr_id = wqe->wr.wr_id; 749 wc->status = IB_WC_RETRY_EXC_ERR; 750 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 751 wc->vendor_err = 0; 752 wc->byte_len = 0; 753 wc->qp = &qp->ibqp; 754 wc->src_qp = qp->remote_qpn; 755 wc->pkey_index = 0; 756 wc->slid = qp->remote_ah_attr.dlid; 757 wc->sl = qp->remote_ah_attr.sl; 758 wc->dlid_path_bits = 0; 759 wc->port_num = 0; 760 ipath_sqerror_qp(qp, wc); 761 goto bail; 762 } 763 qp->s_retry--; 764 765 /* 766 * Remove the QP from the timeout queue. 767 * Note: it may already have been removed by ipath_ib_timer(). 768 */ 769 dev = to_idev(qp->ibqp.device); 770 spin_lock(&dev->pending_lock); 771 if (!list_empty(&qp->timerwait)) 772 list_del_init(&qp->timerwait); 773 spin_unlock(&dev->pending_lock); 774 775 if (wqe->wr.opcode == IB_WR_RDMA_READ) 776 dev->n_rc_resends++; 777 else 778 dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; 779 780 reset_psn(qp, psn); 781 tasklet_hi_schedule(&qp->s_task); 782 783bail: 784 return; 785} 786 787static inline void update_last_psn(struct ipath_qp *qp, u32 psn) 788{ 789 if (qp->s_wait_credit) { 790 qp->s_wait_credit = 0; 791 tasklet_hi_schedule(&qp->s_task); 792 } 793 qp->s_last_psn = psn; 794} 795 796/** 797 * do_rc_ack - process an incoming RC ACK 798 * @qp: the QP the ACK came in on 799 * @psn: the packet sequence number of the ACK 800 * @opcode: the opcode of the request that resulted in the ACK 801 * 802 * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK 803 * for the given QP. 804 * Called at interrupt level with the QP s_lock held and interrupts disabled. 805 * Returns 1 if OK, 0 if current operation should be aborted (NAK). 806 */ 807static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) 808{ 809 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 810 struct ib_wc wc; 811 struct ipath_swqe *wqe; 812 int ret = 0; 813 u32 ack_psn; 814 815 /* 816 * Remove the QP from the timeout queue (or RNR timeout queue). 817 * If ipath_ib_timer() has already removed it, 818 * it's OK since we hold the QP s_lock and ipath_restart_rc() 819 * just won't find anything to restart if we ACK everything. 820 */ 821 spin_lock(&dev->pending_lock); 822 if (!list_empty(&qp->timerwait)) 823 list_del_init(&qp->timerwait); 824 spin_unlock(&dev->pending_lock); 825 826 /* 827 * Note that NAKs implicitly ACK outstanding SEND and RDMA write 828 * requests and implicitly NAK RDMA read and atomic requests issued 829 * before the NAK'ed request. The MSN won't include the NAK'ed 830 * request but will include an ACK'ed request(s). 831 */ 832 ack_psn = psn; 833 if (aeth >> 29) 834 ack_psn--; 835 wqe = get_swqe_ptr(qp, qp->s_last); 836 837 /* 838 * The MSN might be for a later WQE than the PSN indicates so 839 * only complete WQEs that the PSN finishes. 840 */ 841 while (ipath_cmp24(ack_psn, wqe->lpsn) >= 0) { 842 /* 843 * If this request is a RDMA read or atomic, and the ACK is 844 * for a later operation, this ACK NAKs the RDMA read or 845 * atomic. In other words, only a RDMA_READ_LAST or ONLY 846 * can ACK a RDMA read and likewise for atomic ops. Note 847 * that the NAK case can only happen if relaxed ordering is 848 * used and requests are sent after an RDMA read or atomic 849 * is sent but before the response is received. 850 */ 851 if ((wqe->wr.opcode == IB_WR_RDMA_READ && 852 (opcode != OP(RDMA_READ_RESPONSE_LAST) || 853 ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || 854 ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 855 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && 856 (opcode != OP(ATOMIC_ACKNOWLEDGE) || 857 ipath_cmp24(wqe->psn, psn) != 0))) { 858 /* 859 * The last valid PSN seen is the previous 860 * request's. 861 */ 862 update_last_psn(qp, wqe->psn - 1); 863 /* Retry this request. */ 864 ipath_restart_rc(qp, wqe->psn, &wc); 865 /* 866 * No need to process the ACK/NAK since we are 867 * restarting an earlier request. 868 */ 869 goto bail; 870 } 871 if (qp->s_num_rd_atomic && 872 (wqe->wr.opcode == IB_WR_RDMA_READ || 873 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 874 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { 875 qp->s_num_rd_atomic--; 876 /* Restart sending task if fence is complete */ 877 if ((qp->s_flags & IPATH_S_FENCE_PENDING) && 878 !qp->s_num_rd_atomic) { 879 qp->s_flags &= ~IPATH_S_FENCE_PENDING; 880 tasklet_hi_schedule(&qp->s_task); 881 } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) { 882 qp->s_flags &= ~IPATH_S_RDMAR_PENDING; 883 tasklet_hi_schedule(&qp->s_task); 884 } 885 } 886 /* Post a send completion queue entry if requested. */ 887 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || 888 (wqe->wr.send_flags & IB_SEND_SIGNALED)) { 889 wc.wr_id = wqe->wr.wr_id; 890 wc.status = IB_WC_SUCCESS; 891 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 892 wc.vendor_err = 0; 893 wc.byte_len = wqe->length; 894 wc.imm_data = 0; 895 wc.qp = &qp->ibqp; 896 wc.src_qp = qp->remote_qpn; 897 wc.wc_flags = 0; 898 wc.pkey_index = 0; 899 wc.slid = qp->remote_ah_attr.dlid; 900 wc.sl = qp->remote_ah_attr.sl; 901 wc.dlid_path_bits = 0; 902 wc.port_num = 0; 903 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); 904 } 905 qp->s_retry = qp->s_retry_cnt; 906 /* 907 * If we are completing a request which is in the process of 908 * being resent, we can stop resending it since we know the 909 * responder has already seen it. 910 */ 911 if (qp->s_last == qp->s_cur) { 912 if (++qp->s_cur >= qp->s_size) 913 qp->s_cur = 0; 914 qp->s_last = qp->s_cur; 915 if (qp->s_last == qp->s_tail) 916 break; 917 wqe = get_swqe_ptr(qp, qp->s_cur); 918 qp->s_state = OP(SEND_LAST); 919 qp->s_psn = wqe->psn; 920 } else { 921 if (++qp->s_last >= qp->s_size) 922 qp->s_last = 0; 923 if (qp->s_last == qp->s_tail) 924 break; 925 wqe = get_swqe_ptr(qp, qp->s_last); 926 } 927 } 928 929 switch (aeth >> 29) { 930 case 0: /* ACK */ 931 dev->n_rc_acks++; 932 /* If this is a partial ACK, reset the retransmit timer. */ 933 if (qp->s_last != qp->s_tail) { 934 spin_lock(&dev->pending_lock); 935 list_add_tail(&qp->timerwait, 936 &dev->pending[dev->pending_index]); 937 spin_unlock(&dev->pending_lock); 938 /* 939 * If we get a partial ACK for a resent operation, 940 * we can stop resending the earlier packets and 941 * continue with the next packet the receiver wants. 942 */ 943 if (ipath_cmp24(qp->s_psn, psn) <= 0) { 944 reset_psn(qp, psn + 1); 945 tasklet_hi_schedule(&qp->s_task); 946 } 947 } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { 948 qp->s_state = OP(SEND_LAST); 949 qp->s_psn = psn + 1; 950 } 951 ipath_get_credit(qp, aeth); 952 qp->s_rnr_retry = qp->s_rnr_retry_cnt; 953 qp->s_retry = qp->s_retry_cnt; 954 update_last_psn(qp, psn); 955 ret = 1; 956 goto bail; 957 958 case 1: /* RNR NAK */ 959 dev->n_rnr_naks++; 960 if (qp->s_last == qp->s_tail) 961 goto bail; 962 if (qp->s_rnr_retry == 0) { 963 wc.status = IB_WC_RNR_RETRY_EXC_ERR; 964 goto class_b; 965 } 966 if (qp->s_rnr_retry_cnt < 7) 967 qp->s_rnr_retry--; 968 969 /* The last valid PSN is the previous PSN. */ 970 update_last_psn(qp, psn - 1); 971 972 if (wqe->wr.opcode == IB_WR_RDMA_READ) 973 dev->n_rc_resends++; 974 else 975 dev->n_rc_resends += 976 (qp->s_psn - psn) & IPATH_PSN_MASK; 977 978 reset_psn(qp, psn); 979 980 qp->s_rnr_timeout = 981 ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) & 982 IPATH_AETH_CREDIT_MASK]; 983 ipath_insert_rnr_queue(qp); 984 goto bail; 985 986 case 3: /* NAK */ 987 if (qp->s_last == qp->s_tail) 988 goto bail; 989 /* The last valid PSN is the previous PSN. */ 990 update_last_psn(qp, psn - 1); 991 switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & 992 IPATH_AETH_CREDIT_MASK) { 993 case 0: /* PSN sequence error */ 994 dev->n_seq_naks++; 995 /* 996 * Back up to the responder's expected PSN. 997 * Note that we might get a NAK in the middle of an 998 * RDMA READ response which terminates the RDMA 999 * READ. 1000 */ 1001 ipath_restart_rc(qp, psn, &wc); 1002 break; 1003 1004 case 1: /* Invalid Request */ 1005 wc.status = IB_WC_REM_INV_REQ_ERR; 1006 dev->n_other_naks++; 1007 goto class_b; 1008 1009 case 2: /* Remote Access Error */ 1010 wc.status = IB_WC_REM_ACCESS_ERR; 1011 dev->n_other_naks++; 1012 goto class_b; 1013 1014 case 3: /* Remote Operation Error */ 1015 wc.status = IB_WC_REM_OP_ERR; 1016 dev->n_other_naks++; 1017 class_b: 1018 wc.wr_id = wqe->wr.wr_id; 1019 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 1020 wc.vendor_err = 0; 1021 wc.byte_len = 0; 1022 wc.qp = &qp->ibqp; 1023 wc.src_qp = qp->remote_qpn; 1024 wc.pkey_index = 0; 1025 wc.slid = qp->remote_ah_attr.dlid; 1026 wc.sl = qp->remote_ah_attr.sl; 1027 wc.dlid_path_bits = 0; 1028 wc.port_num = 0; 1029 ipath_sqerror_qp(qp, &wc); 1030 break; 1031 1032 default: 1033 /* Ignore other reserved NAK error codes */ 1034 goto reserved; 1035 } 1036 qp->s_rnr_retry = qp->s_rnr_retry_cnt; 1037 goto bail; 1038 1039 default: /* 2: reserved */ 1040 reserved: 1041 /* Ignore reserved NAK codes. */ 1042 goto bail; 1043 } 1044 1045bail: 1046 return ret; 1047} 1048 1049/** 1050 * ipath_rc_rcv_resp - process an incoming RC response packet 1051 * @dev: the device this packet came in on 1052 * @ohdr: the other headers for this packet 1053 * @data: the packet data 1054 * @tlen: the packet length 1055 * @qp: the QP for this packet 1056 * @opcode: the opcode for this packet 1057 * @psn: the packet sequence number for this packet 1058 * @hdrsize: the header length 1059 * @pmtu: the path MTU 1060 * @header_in_data: true if part of the header data is in the data buffer 1061 * 1062 * This is called from ipath_rc_rcv() to process an incoming RC response 1063 * packet for the given QP. 1064 * Called at interrupt level. 1065 */ 1066static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, 1067 struct ipath_other_headers *ohdr, 1068 void *data, u32 tlen, 1069 struct ipath_qp *qp, 1070 u32 opcode, 1071 u32 psn, u32 hdrsize, u32 pmtu, 1072 int header_in_data) 1073{ 1074 struct ipath_swqe *wqe; 1075 unsigned long flags; 1076 struct ib_wc wc; 1077 int diff; 1078 u32 pad; 1079 u32 aeth; 1080 1081 spin_lock_irqsave(&qp->s_lock, flags); 1082 1083 /* Ignore invalid responses. */ 1084 if (ipath_cmp24(psn, qp->s_next_psn) >= 0) 1085 goto ack_done; 1086 1087 /* Ignore duplicate responses. */ 1088 diff = ipath_cmp24(psn, qp->s_last_psn); 1089 if (unlikely(diff <= 0)) { 1090 /* Update credits for "ghost" ACKs */ 1091 if (diff == 0 && opcode == OP(ACKNOWLEDGE)) { 1092 if (!header_in_data) 1093 aeth = be32_to_cpu(ohdr->u.aeth); 1094 else { 1095 aeth = be32_to_cpu(((__be32 *) data)[0]); 1096 data += sizeof(__be32); 1097 } 1098 if ((aeth >> 29) == 0) 1099 ipath_get_credit(qp, aeth); 1100 } 1101 goto ack_done; 1102 } 1103 1104 if (unlikely(qp->s_last == qp->s_tail)) 1105 goto ack_done; 1106 wqe = get_swqe_ptr(qp, qp->s_last); 1107 1108 switch (opcode) { 1109 case OP(ACKNOWLEDGE): 1110 case OP(ATOMIC_ACKNOWLEDGE): 1111 case OP(RDMA_READ_RESPONSE_FIRST): 1112 if (!header_in_data) 1113 aeth = be32_to_cpu(ohdr->u.aeth); 1114 else { 1115 aeth = be32_to_cpu(((__be32 *) data)[0]); 1116 data += sizeof(__be32); 1117 } 1118 if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { 1119 u64 val; 1120 1121 if (!header_in_data) { 1122 __be32 *p = ohdr->u.at.atomic_ack_eth; 1123 1124 val = ((u64) be32_to_cpu(p[0]) << 32) | 1125 be32_to_cpu(p[1]); 1126 } else 1127 val = be64_to_cpu(((__be64 *) data)[0]); 1128 *(u64 *) wqe->sg_list[0].vaddr = val; 1129 } 1130 if (!do_rc_ack(qp, aeth, psn, opcode) || 1131 opcode != OP(RDMA_READ_RESPONSE_FIRST)) 1132 goto ack_done; 1133 hdrsize += 4; 1134 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) 1135 goto ack_op_err; 1136 /* 1137 * If this is a response to a resent RDMA read, we 1138 * have to be careful to copy the data to the right 1139 * location. 1140 */ 1141 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, 1142 wqe, psn, pmtu); 1143 goto read_middle; 1144 1145 case OP(RDMA_READ_RESPONSE_MIDDLE): 1146 /* no AETH, no ACK */ 1147 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { 1148 dev->n_rdma_seq++; 1149 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); 1150 goto ack_done; 1151 } 1152 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) 1153 goto ack_op_err; 1154 read_middle: 1155 if (unlikely(tlen != (hdrsize + pmtu + 4))) 1156 goto ack_len_err; 1157 if (unlikely(pmtu >= qp->s_rdma_read_len)) 1158 goto ack_len_err; 1159 1160 /* We got a response so update the timeout. */ 1161 spin_lock(&dev->pending_lock); 1162 if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) 1163 list_move_tail(&qp->timerwait, 1164 &dev->pending[dev->pending_index]); 1165 spin_unlock(&dev->pending_lock); 1166 /* 1167 * Update the RDMA receive state but do the copy w/o 1168 * holding the locks and blocking interrupts. 1169 */ 1170 qp->s_rdma_read_len -= pmtu; 1171 update_last_psn(qp, psn); 1172 spin_unlock_irqrestore(&qp->s_lock, flags); 1173 ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu); 1174 goto bail; 1175 1176 case OP(RDMA_READ_RESPONSE_ONLY): 1177 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { 1178 dev->n_rdma_seq++; 1179 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); 1180 goto ack_done; 1181 } 1182 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) 1183 goto ack_op_err; 1184 /* Get the number of bytes the message was padded by. */ 1185 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 1186 /* 1187 * Check that the data size is >= 0 && <= pmtu. 1188 * Remember to account for the AETH header (4) and 1189 * ICRC (4). 1190 */ 1191 if (unlikely(tlen < (hdrsize + pad + 8))) 1192 goto ack_len_err; 1193 /* 1194 * If this is a response to a resent RDMA read, we 1195 * have to be careful to copy the data to the right 1196 * location. 1197 */ 1198 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, 1199 wqe, psn, pmtu); 1200 goto read_last; 1201 1202 case OP(RDMA_READ_RESPONSE_LAST): 1203 /* ACKs READ req. */ 1204 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { 1205 dev->n_rdma_seq++; 1206 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); 1207 goto ack_done; 1208 } 1209 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) 1210 goto ack_op_err; 1211 /* Get the number of bytes the message was padded by. */ 1212 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 1213 /* 1214 * Check that the data size is >= 1 && <= pmtu. 1215 * Remember to account for the AETH header (4) and 1216 * ICRC (4). 1217 */ 1218 if (unlikely(tlen <= (hdrsize + pad + 8))) 1219 goto ack_len_err; 1220 read_last: 1221 tlen -= hdrsize + pad + 8; 1222 if (unlikely(tlen != qp->s_rdma_read_len)) 1223 goto ack_len_err; 1224 if (!header_in_data) 1225 aeth = be32_to_cpu(ohdr->u.aeth); 1226 else { 1227 aeth = be32_to_cpu(((__be32 *) data)[0]); 1228 data += sizeof(__be32); 1229 } 1230 ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen); 1231 (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST)); 1232 goto ack_done; 1233 } 1234 1235ack_done: 1236 spin_unlock_irqrestore(&qp->s_lock, flags); 1237 goto bail; 1238 1239ack_op_err: 1240 wc.status = IB_WC_LOC_QP_OP_ERR; 1241 goto ack_err; 1242 1243ack_len_err: 1244 wc.status = IB_WC_LOC_LEN_ERR; 1245ack_err: 1246 wc.wr_id = wqe->wr.wr_id; 1247 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 1248 wc.vendor_err = 0; 1249 wc.byte_len = 0; 1250 wc.imm_data = 0; 1251 wc.qp = &qp->ibqp; 1252 wc.src_qp = qp->remote_qpn; 1253 wc.wc_flags = 0; 1254 wc.pkey_index = 0; 1255 wc.slid = qp->remote_ah_attr.dlid; 1256 wc.sl = qp->remote_ah_attr.sl; 1257 wc.dlid_path_bits = 0; 1258 wc.port_num = 0; 1259 ipath_sqerror_qp(qp, &wc); 1260 spin_unlock_irqrestore(&qp->s_lock, flags); 1261bail: 1262 return; 1263} 1264 1265/** 1266 * ipath_rc_rcv_error - process an incoming duplicate or error RC packet 1267 * @dev: the device this packet came in on 1268 * @ohdr: the other headers for this packet 1269 * @data: the packet data 1270 * @qp: the QP for this packet 1271 * @opcode: the opcode for this packet 1272 * @psn: the packet sequence number for this packet 1273 * @diff: the difference between the PSN and the expected PSN 1274 * @header_in_data: true if part of the header data is in the data buffer 1275 * 1276 * This is called from ipath_rc_rcv() to process an unexpected 1277 * incoming RC packet for the given QP. 1278 * Called at interrupt level. 1279 * Return 1 if no more processing is needed; otherwise return 0 to 1280 * schedule a response to be sent. 1281 */ 1282static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, 1283 struct ipath_other_headers *ohdr, 1284 void *data, 1285 struct ipath_qp *qp, 1286 u32 opcode, 1287 u32 psn, 1288 int diff, 1289 int header_in_data) 1290{ 1291 struct ipath_ack_entry *e; 1292 u8 i, prev; 1293 int old_req; 1294 unsigned long flags; 1295 1296 if (diff > 0) { 1297 /* 1298 * Packet sequence error. 1299 * A NAK will ACK earlier sends and RDMA writes. 1300 * Don't queue the NAK if we already sent one. 1301 */ 1302 if (!qp->r_nak_state) { 1303 qp->r_nak_state = IB_NAK_PSN_ERROR; 1304 /* Use the expected PSN. */ 1305 qp->r_ack_psn = qp->r_psn; 1306 goto send_ack; 1307 } 1308 goto done; 1309 } 1310 1311 /* 1312 * Handle a duplicate request. Don't re-execute SEND, RDMA 1313 * write or atomic op. Don't NAK errors, just silently drop 1314 * the duplicate request. Note that r_sge, r_len, and 1315 * r_rcv_len may be in use so don't modify them. 1316 * 1317 * We are supposed to ACK the earliest duplicate PSN but we 1318 * can coalesce an outstanding duplicate ACK. We have to 1319 * send the earliest so that RDMA reads can be restarted at 1320 * the requester's expected PSN. 1321 * 1322 * First, find where this duplicate PSN falls within the 1323 * ACKs previously sent. 1324 */ 1325 psn &= IPATH_PSN_MASK; 1326 e = NULL; 1327 old_req = 1; 1328 spin_lock_irqsave(&qp->s_lock, flags); 1329 for (i = qp->r_head_ack_queue; ; i = prev) { 1330 if (i == qp->s_tail_ack_queue) 1331 old_req = 0; 1332 if (i) 1333 prev = i - 1; 1334 else 1335 prev = IPATH_MAX_RDMA_ATOMIC; 1336 if (prev == qp->r_head_ack_queue) { 1337 e = NULL; 1338 break; 1339 } 1340 e = &qp->s_ack_queue[prev]; 1341 if (!e->opcode) { 1342 e = NULL; 1343 break; 1344 } 1345 if (ipath_cmp24(psn, e->psn) >= 0) 1346 break; 1347 } 1348 switch (opcode) { 1349 case OP(RDMA_READ_REQUEST): { 1350 struct ib_reth *reth; 1351 u32 offset; 1352 u32 len; 1353 1354 /* 1355 * If we didn't find the RDMA read request in the ack queue, 1356 * or the send tasklet is already backed up to send an 1357 * earlier entry, we can ignore this request. 1358 */ 1359 if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req) 1360 goto unlock_done; 1361 /* RETH comes after BTH */ 1362 if (!header_in_data) 1363 reth = &ohdr->u.rc.reth; 1364 else { 1365 reth = (struct ib_reth *)data; 1366 data += sizeof(*reth); 1367 } 1368 /* 1369 * Address range must be a subset of the original 1370 * request and start on pmtu boundaries. 1371 * We reuse the old ack_queue slot since the requester 1372 * should not back up and request an earlier PSN for the 1373 * same request. 1374 */ 1375 offset = ((psn - e->psn) & IPATH_PSN_MASK) * 1376 ib_mtu_enum_to_int(qp->path_mtu); 1377 len = be32_to_cpu(reth->length); 1378 if (unlikely(offset + len > e->rdma_sge.sge.sge_length)) 1379 goto unlock_done; 1380 if (len != 0) { 1381 u32 rkey = be32_to_cpu(reth->rkey); 1382 u64 vaddr = be64_to_cpu(reth->vaddr); 1383 int ok; 1384 1385 ok = ipath_rkey_ok(qp, &e->rdma_sge, 1386 len, vaddr, rkey, 1387 IB_ACCESS_REMOTE_READ); 1388 if (unlikely(!ok)) 1389 goto unlock_done; 1390 } else { 1391 e->rdma_sge.sg_list = NULL; 1392 e->rdma_sge.num_sge = 0; 1393 e->rdma_sge.sge.mr = NULL; 1394 e->rdma_sge.sge.vaddr = NULL; 1395 e->rdma_sge.sge.length = 0; 1396 e->rdma_sge.sge.sge_length = 0; 1397 } 1398 e->psn = psn; 1399 qp->s_ack_state = OP(ACKNOWLEDGE); 1400 qp->s_tail_ack_queue = prev; 1401 break; 1402 } 1403 1404 case OP(COMPARE_SWAP): 1405 case OP(FETCH_ADD): { 1406 /* 1407 * If we didn't find the atomic request in the ack queue 1408 * or the send tasklet is already backed up to send an 1409 * earlier entry, we can ignore this request. 1410 */ 1411 if (!e || e->opcode != (u8) opcode || old_req) 1412 goto unlock_done; 1413 qp->s_ack_state = OP(ACKNOWLEDGE); 1414 qp->s_tail_ack_queue = prev; 1415 break; 1416 } 1417 1418 default: 1419 if (old_req) 1420 goto unlock_done; 1421 /* 1422 * Resend the most recent ACK if this request is 1423 * after all the previous RDMA reads and atomics. 1424 */ 1425 if (i == qp->r_head_ack_queue) { 1426 spin_unlock_irqrestore(&qp->s_lock, flags); 1427 qp->r_nak_state = 0; 1428 qp->r_ack_psn = qp->r_psn - 1; 1429 goto send_ack; 1430 } 1431 /* 1432 * Resend the RDMA read or atomic op which 1433 * ACKs this duplicate request. 1434 */ 1435 qp->s_ack_state = OP(ACKNOWLEDGE); 1436 qp->s_tail_ack_queue = i; 1437 break; 1438 } 1439 qp->r_nak_state = 0; 1440 tasklet_hi_schedule(&qp->s_task); 1441 1442unlock_done: 1443 spin_unlock_irqrestore(&qp->s_lock, flags); 1444done: 1445 return 1; 1446 1447send_ack: 1448 return 0; 1449} 1450 1451static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) 1452{ 1453 unsigned long flags; 1454 1455 spin_lock_irqsave(&qp->s_lock, flags); 1456 qp->state = IB_QPS_ERR; 1457 ipath_error_qp(qp, err); 1458 spin_unlock_irqrestore(&qp->s_lock, flags); 1459} 1460 1461/** 1462 * ipath_rc_rcv - process an incoming RC packet 1463 * @dev: the device this packet came in on 1464 * @hdr: the header of this packet 1465 * @has_grh: true if the header has a GRH 1466 * @data: the packet data 1467 * @tlen: the packet length 1468 * @qp: the QP for this packet 1469 * 1470 * This is called from ipath_qp_rcv() to process an incoming RC packet 1471 * for the given QP. 1472 * Called at interrupt level. 1473 */ 1474void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, 1475 int has_grh, void *data, u32 tlen, struct ipath_qp *qp) 1476{ 1477 struct ipath_other_headers *ohdr; 1478 u32 opcode; 1479 u32 hdrsize; 1480 u32 psn; 1481 u32 pad; 1482 struct ib_wc wc; 1483 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 1484 int diff; 1485 struct ib_reth *reth; 1486 int header_in_data; 1487 1488 /* Validate the SLID. See Ch. 9.6.1.5 */ 1489 if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) 1490 goto done; 1491 1492 /* Check for GRH */ 1493 if (!has_grh) { 1494 ohdr = &hdr->u.oth; 1495 hdrsize = 8 + 12; /* LRH + BTH */ 1496 psn = be32_to_cpu(ohdr->bth[2]); 1497 header_in_data = 0; 1498 } else { 1499 ohdr = &hdr->u.l.oth; 1500 hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ 1501 /* 1502 * The header with GRH is 60 bytes and the core driver sets 1503 * the eager header buffer size to 56 bytes so the last 4 1504 * bytes of the BTH header (PSN) is in the data buffer. 1505 */ 1506 header_in_data = dev->dd->ipath_rcvhdrentsize == 16; 1507 if (header_in_data) { 1508 psn = be32_to_cpu(((__be32 *) data)[0]); 1509 data += sizeof(__be32); 1510 } else 1511 psn = be32_to_cpu(ohdr->bth[2]); 1512 } 1513 1514 /* 1515 * Process responses (ACKs) before anything else. Note that the 1516 * packet sequence number will be for something in the send work 1517 * queue rather than the expected receive packet sequence number. 1518 * In other words, this QP is the requester. 1519 */ 1520 opcode = be32_to_cpu(ohdr->bth[0]) >> 24; 1521 if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && 1522 opcode <= OP(ATOMIC_ACKNOWLEDGE)) { 1523 ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn, 1524 hdrsize, pmtu, header_in_data); 1525 goto done; 1526 } 1527 1528 /* Compute 24 bits worth of difference. */ 1529 diff = ipath_cmp24(psn, qp->r_psn); 1530 if (unlikely(diff)) { 1531 if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode, 1532 psn, diff, header_in_data)) 1533 goto done; 1534 goto send_ack; 1535 } 1536 1537 /* Check for opcode sequence errors. */ 1538 switch (qp->r_state) { 1539 case OP(SEND_FIRST): 1540 case OP(SEND_MIDDLE): 1541 if (opcode == OP(SEND_MIDDLE) || 1542 opcode == OP(SEND_LAST) || 1543 opcode == OP(SEND_LAST_WITH_IMMEDIATE)) 1544 break; 1545 nack_inv: 1546 ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); 1547 qp->r_nak_state = IB_NAK_INVALID_REQUEST; 1548 qp->r_ack_psn = qp->r_psn; 1549 goto send_ack; 1550 1551 case OP(RDMA_WRITE_FIRST): 1552 case OP(RDMA_WRITE_MIDDLE): 1553 if (opcode == OP(RDMA_WRITE_MIDDLE) || 1554 opcode == OP(RDMA_WRITE_LAST) || 1555 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) 1556 break; 1557 goto nack_inv; 1558 1559 default: 1560 if (opcode == OP(SEND_MIDDLE) || 1561 opcode == OP(SEND_LAST) || 1562 opcode == OP(SEND_LAST_WITH_IMMEDIATE) || 1563 opcode == OP(RDMA_WRITE_MIDDLE) || 1564 opcode == OP(RDMA_WRITE_LAST) || 1565 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) 1566 goto nack_inv; 1567 /* 1568 * Note that it is up to the requester to not send a new 1569 * RDMA read or atomic operation before receiving an ACK 1570 * for the previous operation. 1571 */ 1572 break; 1573 } 1574 1575 wc.imm_data = 0; 1576 wc.wc_flags = 0; 1577 1578 /* OK, process the packet. */ 1579 switch (opcode) { 1580 case OP(SEND_FIRST): 1581 if (!ipath_get_rwqe(qp, 0)) { 1582 rnr_nak: 1583 /* 1584 * A RNR NAK will ACK earlier sends and RDMA writes. 1585 * Don't queue the NAK if a RDMA read or atomic 1586 * is pending though. 1587 */ 1588 if (qp->r_nak_state) 1589 goto done; 1590 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; 1591 qp->r_ack_psn = qp->r_psn; 1592 goto send_ack; 1593 } 1594 qp->r_rcv_len = 0; 1595 /* FALLTHROUGH */ 1596 case OP(SEND_MIDDLE): 1597 case OP(RDMA_WRITE_MIDDLE): 1598 send_middle: 1599 /* Check for invalid length PMTU or posted rwqe len. */ 1600 if (unlikely(tlen != (hdrsize + pmtu + 4))) 1601 goto nack_inv; 1602 qp->r_rcv_len += pmtu; 1603 if (unlikely(qp->r_rcv_len > qp->r_len)) 1604 goto nack_inv; 1605 ipath_copy_sge(&qp->r_sge, data, pmtu); 1606 break; 1607 1608 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): 1609 /* consume RWQE */ 1610 if (!ipath_get_rwqe(qp, 1)) 1611 goto rnr_nak; 1612 goto send_last_imm; 1613 1614 case OP(SEND_ONLY): 1615 case OP(SEND_ONLY_WITH_IMMEDIATE): 1616 if (!ipath_get_rwqe(qp, 0)) 1617 goto rnr_nak; 1618 qp->r_rcv_len = 0; 1619 if (opcode == OP(SEND_ONLY)) 1620 goto send_last; 1621 /* FALLTHROUGH */ 1622 case OP(SEND_LAST_WITH_IMMEDIATE): 1623 send_last_imm: 1624 if (header_in_data) { 1625 wc.imm_data = *(__be32 *) data; 1626 data += sizeof(__be32); 1627 } else { 1628 /* Immediate data comes after BTH */ 1629 wc.imm_data = ohdr->u.imm_data; 1630 } 1631 hdrsize += 4; 1632 wc.wc_flags = IB_WC_WITH_IMM; 1633 /* FALLTHROUGH */ 1634 case OP(SEND_LAST): 1635 case OP(RDMA_WRITE_LAST): 1636 send_last: 1637 /* Get the number of bytes the message was padded by. */ 1638 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 1639 /* Check for invalid length. */ 1640 /* XXX LAST len should be >= 1 */ 1641 if (unlikely(tlen < (hdrsize + pad + 4))) 1642 goto nack_inv; 1643 /* Don't count the CRC. */ 1644 tlen -= (hdrsize + pad + 4); 1645 wc.byte_len = tlen + qp->r_rcv_len; 1646 if (unlikely(wc.byte_len > qp->r_len)) 1647 goto nack_inv; 1648 ipath_copy_sge(&qp->r_sge, data, tlen); 1649 qp->r_msn++; 1650 if (!qp->r_wrid_valid) 1651 break; 1652 qp->r_wrid_valid = 0; 1653 wc.wr_id = qp->r_wr_id; 1654 wc.status = IB_WC_SUCCESS; 1655 wc.opcode = IB_WC_RECV; 1656 wc.vendor_err = 0; 1657 wc.qp = &qp->ibqp; 1658 wc.src_qp = qp->remote_qpn; 1659 wc.pkey_index = 0; 1660 wc.slid = qp->remote_ah_attr.dlid; 1661 wc.sl = qp->remote_ah_attr.sl; 1662 wc.dlid_path_bits = 0; 1663 wc.port_num = 0; 1664 /* Signal completion event if the solicited bit is set. */ 1665 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1666 (ohdr->bth[0] & 1667 __constant_cpu_to_be32(1 << 23)) != 0); 1668 break; 1669 1670 case OP(RDMA_WRITE_FIRST): 1671 case OP(RDMA_WRITE_ONLY): 1672 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): 1673 /* consume RWQE */ 1674 /* RETH comes after BTH */ 1675 if (!header_in_data) 1676 reth = &ohdr->u.rc.reth; 1677 else { 1678 reth = (struct ib_reth *)data; 1679 data += sizeof(*reth); 1680 } 1681 hdrsize += sizeof(*reth); 1682 qp->r_len = be32_to_cpu(reth->length); 1683 qp->r_rcv_len = 0; 1684 if (qp->r_len != 0) { 1685 u32 rkey = be32_to_cpu(reth->rkey); 1686 u64 vaddr = be64_to_cpu(reth->vaddr); 1687 int ok; 1688 1689 /* Check rkey & NAK */ 1690 ok = ipath_rkey_ok(qp, &qp->r_sge, 1691 qp->r_len, vaddr, rkey, 1692 IB_ACCESS_REMOTE_WRITE); 1693 if (unlikely(!ok)) 1694 goto nack_acc; 1695 } else { 1696 qp->r_sge.sg_list = NULL; 1697 qp->r_sge.sge.mr = NULL; 1698 qp->r_sge.sge.vaddr = NULL; 1699 qp->r_sge.sge.length = 0; 1700 qp->r_sge.sge.sge_length = 0; 1701 } 1702 if (unlikely(!(qp->qp_access_flags & 1703 IB_ACCESS_REMOTE_WRITE))) 1704 goto nack_acc; 1705 if (opcode == OP(RDMA_WRITE_FIRST)) 1706 goto send_middle; 1707 else if (opcode == OP(RDMA_WRITE_ONLY)) 1708 goto send_last; 1709 if (!ipath_get_rwqe(qp, 1)) 1710 goto rnr_nak; 1711 goto send_last_imm; 1712 1713 case OP(RDMA_READ_REQUEST): { 1714 struct ipath_ack_entry *e; 1715 u32 len; 1716 u8 next; 1717 1718 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) 1719 goto nack_acc; 1720 next = qp->r_head_ack_queue + 1; 1721 if (next > IPATH_MAX_RDMA_ATOMIC) 1722 next = 0; 1723 if (unlikely(next == qp->s_tail_ack_queue)) 1724 goto nack_inv; 1725 e = &qp->s_ack_queue[qp->r_head_ack_queue]; 1726 /* RETH comes after BTH */ 1727 if (!header_in_data) 1728 reth = &ohdr->u.rc.reth; 1729 else { 1730 reth = (struct ib_reth *)data; 1731 data += sizeof(*reth); 1732 } 1733 len = be32_to_cpu(reth->length); 1734 if (len) { 1735 u32 rkey = be32_to_cpu(reth->rkey); 1736 u64 vaddr = be64_to_cpu(reth->vaddr); 1737 int ok; 1738 1739 /* Check rkey & NAK */ 1740 ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, 1741 rkey, IB_ACCESS_REMOTE_READ); 1742 if (unlikely(!ok)) 1743 goto nack_acc; 1744 /* 1745 * Update the next expected PSN. We add 1 later 1746 * below, so only add the remainder here. 1747 */ 1748 if (len > pmtu) 1749 qp->r_psn += (len - 1) / pmtu; 1750 } else { 1751 e->rdma_sge.sg_list = NULL; 1752 e->rdma_sge.num_sge = 0; 1753 e->rdma_sge.sge.mr = NULL; 1754 e->rdma_sge.sge.vaddr = NULL; 1755 e->rdma_sge.sge.length = 0; 1756 e->rdma_sge.sge.sge_length = 0; 1757 } 1758 e->opcode = opcode; 1759 e->psn = psn; 1760 /* 1761 * We need to increment the MSN here instead of when we 1762 * finish sending the result since a duplicate request would 1763 * increment it more than once. 1764 */ 1765 qp->r_msn++; 1766 qp->r_psn++; 1767 qp->r_state = opcode; 1768 qp->r_nak_state = 0; 1769 barrier(); 1770 qp->r_head_ack_queue = next; 1771 1772 /* Call ipath_do_rc_send() in another thread. */ 1773 tasklet_hi_schedule(&qp->s_task); 1774 1775 goto done; 1776 } 1777 1778 case OP(COMPARE_SWAP): 1779 case OP(FETCH_ADD): { 1780 struct ib_atomic_eth *ateth; 1781 struct ipath_ack_entry *e; 1782 u64 vaddr; 1783 atomic64_t *maddr; 1784 u64 sdata; 1785 u32 rkey; 1786 u8 next; 1787 1788 if (unlikely(!(qp->qp_access_flags & 1789 IB_ACCESS_REMOTE_ATOMIC))) 1790 goto nack_acc; 1791 next = qp->r_head_ack_queue + 1; 1792 if (next > IPATH_MAX_RDMA_ATOMIC) 1793 next = 0; 1794 if (unlikely(next == qp->s_tail_ack_queue)) 1795 goto nack_inv; 1796 if (!header_in_data) 1797 ateth = &ohdr->u.atomic_eth; 1798 else 1799 ateth = (struct ib_atomic_eth *)data; 1800 vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | 1801 be32_to_cpu(ateth->vaddr[1]); 1802 if (unlikely(vaddr & (sizeof(u64) - 1))) 1803 goto nack_inv; 1804 rkey = be32_to_cpu(ateth->rkey); 1805 /* Check rkey & NAK */ 1806 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, 1807 sizeof(u64), vaddr, rkey, 1808 IB_ACCESS_REMOTE_ATOMIC))) 1809 goto nack_acc; 1810 /* Perform atomic OP and save result. */ 1811 maddr = (atomic64_t *) qp->r_sge.sge.vaddr; 1812 sdata = be64_to_cpu(ateth->swap_data); 1813 e = &qp->s_ack_queue[qp->r_head_ack_queue]; 1814 e->atomic_data = (opcode == OP(FETCH_ADD)) ? 1815 (u64) atomic64_add_return(sdata, maddr) - sdata : 1816 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, 1817 be64_to_cpu(ateth->compare_data), 1818 sdata); 1819 e->opcode = opcode; 1820 e->psn = psn & IPATH_PSN_MASK; 1821 qp->r_msn++; 1822 qp->r_psn++; 1823 qp->r_state = opcode; 1824 qp->r_nak_state = 0; 1825 barrier(); 1826 qp->r_head_ack_queue = next; 1827 1828 /* Call ipath_do_rc_send() in another thread. */ 1829 tasklet_hi_schedule(&qp->s_task); 1830 1831 goto done; 1832 } 1833 1834 default: 1835 /* NAK unknown opcodes. */ 1836 goto nack_inv; 1837 } 1838 qp->r_psn++; 1839 qp->r_state = opcode; 1840 qp->r_ack_psn = psn; 1841 qp->r_nak_state = 0; 1842 /* Send an ACK if requested or required. */ 1843 if (psn & (1 << 31)) 1844 goto send_ack; 1845 goto done; 1846 1847nack_acc: 1848 ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); 1849 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; 1850 qp->r_ack_psn = qp->r_psn; 1851 1852send_ack: 1853 send_rc_ack(qp); 1854 1855done: 1856 return; 1857} 1858