1/* 2 * IBM eServer eHCA Infiniband device driver for Linux on POWER 3 * 4 * Functions for EQs, NEQs and interrupts 5 * 6 * Authors: Heiko J Schick <schickhj@de.ibm.com> 7 * Khadija Souissi <souissi@de.ibm.com> 8 * Hoang-Nam Nguyen <hnguyen@de.ibm.com> 9 * Joachim Fenkes <fenkes@de.ibm.com> 10 * 11 * Copyright (c) 2005 IBM Corporation 12 * 13 * All rights reserved. 14 * 15 * This source code is distributed under a dual license of GPL v2.0 and OpenIB 16 * BSD. 17 * 18 * OpenIB BSD License 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions are met: 22 * 23 * Redistributions of source code must retain the above copyright notice, this 24 * list of conditions and the following disclaimer. 25 * 26 * Redistributions in binary form must reproduce the above copyright notice, 27 * this list of conditions and the following disclaimer in the documentation 28 * and/or other materials 29 * provided with the distribution. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 32 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 35 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 39 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 41 * POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44#include <linux/slab.h> 45 46#include "ehca_classes.h" 47#include "ehca_irq.h" 48#include "ehca_iverbs.h" 49#include "ehca_tools.h" 50#include "hcp_if.h" 51#include "hipz_fns.h" 52#include "ipz_pt_fn.h" 53 54#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) 55#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) 56#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) 57#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) 58#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) 59#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) 60#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) 61 62#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) 63#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) 64#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) 65#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) 66#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) 67#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23) 68 69#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) 70#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) 71 72static void queue_comp_task(struct ehca_cq *__cq); 73 74static struct ehca_comp_pool *pool; 75 76static inline void comp_event_callback(struct ehca_cq *cq) 77{ 78 if (!cq->ib_cq.comp_handler) 79 return; 80 81 spin_lock(&cq->cb_lock); 82 cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context); 83 spin_unlock(&cq->cb_lock); 84 85 return; 86} 87 88static void print_error_data(struct ehca_shca *shca, void *data, 89 u64 *rblock, int length) 90{ 91 u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); 92 u64 resource = rblock[1]; 93 94 switch (type) { 95 case 0x1: /* Queue Pair */ 96 { 97 struct ehca_qp *qp = (struct ehca_qp *)data; 98 99 /* only print error data if AER is set */ 100 if (rblock[6] == 0) 101 return; 102 103 ehca_err(&shca->ib_device, 104 "QP 0x%x (resource=%llx) has errors.", 105 qp->ib_qp.qp_num, resource); 106 break; 107 } 108 case 0x4: /* Completion Queue */ 109 { 110 struct ehca_cq *cq = (struct ehca_cq *)data; 111 112 ehca_err(&shca->ib_device, 113 "CQ 0x%x (resource=%llx) has errors.", 114 cq->cq_number, resource); 115 break; 116 } 117 default: 118 ehca_err(&shca->ib_device, 119 "Unknown error type: %llx on %s.", 120 type, shca->ib_device.name); 121 break; 122 } 123 124 ehca_err(&shca->ib_device, "Error data is available: %llx.", resource); 125 ehca_err(&shca->ib_device, "EHCA ----- error data begin " 126 "---------------------------------------------------"); 127 ehca_dmp(rblock, length, "resource=%llx", resource); 128 ehca_err(&shca->ib_device, "EHCA ----- error data end " 129 "----------------------------------------------------"); 130 131 return; 132} 133 134int ehca_error_data(struct ehca_shca *shca, void *data, 135 u64 resource) 136{ 137 138 unsigned long ret; 139 u64 *rblock; 140 unsigned long block_count; 141 142 rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); 143 if (!rblock) { 144 ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); 145 ret = -ENOMEM; 146 goto error_data1; 147 } 148 149 /* rblock must be 4K aligned and should be 4K large */ 150 ret = hipz_h_error_data(shca->ipz_hca_handle, 151 resource, 152 rblock, 153 &block_count); 154 155 if (ret == H_R_STATE) 156 ehca_err(&shca->ib_device, 157 "No error data is available: %llx.", resource); 158 else if (ret == H_SUCCESS) { 159 int length; 160 161 length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); 162 163 if (length > EHCA_PAGESIZE) 164 length = EHCA_PAGESIZE; 165 166 print_error_data(shca, data, rblock, length); 167 } else 168 ehca_err(&shca->ib_device, 169 "Error data could not be fetched: %llx", resource); 170 171 ehca_free_fw_ctrlblock(rblock); 172 173error_data1: 174 return ret; 175 176} 177 178static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, 179 enum ib_event_type event_type) 180{ 181 struct ib_event event; 182 183 /* PATH_MIG without the QP ever having been armed is false alarm */ 184 if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) 185 return; 186 187 event.device = &shca->ib_device; 188 event.event = event_type; 189 190 if (qp->ext_type == EQPT_SRQ) { 191 if (!qp->ib_srq.event_handler) 192 return; 193 194 event.element.srq = &qp->ib_srq; 195 qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context); 196 } else { 197 if (!qp->ib_qp.event_handler) 198 return; 199 200 event.element.qp = &qp->ib_qp; 201 qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); 202 } 203} 204 205static void qp_event_callback(struct ehca_shca *shca, u64 eqe, 206 enum ib_event_type event_type, int fatal) 207{ 208 struct ehca_qp *qp; 209 u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); 210 211 read_lock(&ehca_qp_idr_lock); 212 qp = idr_find(&ehca_qp_idr, token); 213 if (qp) 214 atomic_inc(&qp->nr_events); 215 read_unlock(&ehca_qp_idr_lock); 216 217 if (!qp) 218 return; 219 220 if (fatal) 221 ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); 222 223 dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ? 224 IB_EVENT_SRQ_ERR : event_type); 225 226 /* 227 * eHCA only processes one WQE at a time for SRQ base QPs, 228 * so the last WQE has been processed as soon as the QP enters 229 * error state. 230 */ 231 if (fatal && qp->ext_type == EQPT_SRQBASE) 232 dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); 233 234 if (atomic_dec_and_test(&qp->nr_events)) 235 wake_up(&qp->wait_completion); 236 return; 237} 238 239static void cq_event_callback(struct ehca_shca *shca, 240 u64 eqe) 241{ 242 struct ehca_cq *cq; 243 u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); 244 245 read_lock(&ehca_cq_idr_lock); 246 cq = idr_find(&ehca_cq_idr, token); 247 if (cq) 248 atomic_inc(&cq->nr_events); 249 read_unlock(&ehca_cq_idr_lock); 250 251 if (!cq) 252 return; 253 254 ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); 255 256 if (atomic_dec_and_test(&cq->nr_events)) 257 wake_up(&cq->wait_completion); 258 259 return; 260} 261 262static void parse_identifier(struct ehca_shca *shca, u64 eqe) 263{ 264 u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe); 265 266 switch (identifier) { 267 case 0x02: /* path migrated */ 268 qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0); 269 break; 270 case 0x03: /* communication established */ 271 qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0); 272 break; 273 case 0x04: /* send queue drained */ 274 qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0); 275 break; 276 case 0x05: /* QP error */ 277 case 0x06: /* QP error */ 278 qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1); 279 break; 280 case 0x07: /* CQ error */ 281 case 0x08: /* CQ error */ 282 cq_event_callback(shca, eqe); 283 break; 284 case 0x09: /* MRMWPTE error */ 285 ehca_err(&shca->ib_device, "MRMWPTE error."); 286 break; 287 case 0x0A: /* port event */ 288 ehca_err(&shca->ib_device, "Port event."); 289 break; 290 case 0x0B: /* MR access error */ 291 ehca_err(&shca->ib_device, "MR access error."); 292 break; 293 case 0x0C: /* EQ error */ 294 ehca_err(&shca->ib_device, "EQ error."); 295 break; 296 case 0x0D: /* P/Q_Key mismatch */ 297 ehca_err(&shca->ib_device, "P/Q_Key mismatch."); 298 break; 299 case 0x10: /* sampling complete */ 300 ehca_err(&shca->ib_device, "Sampling complete."); 301 break; 302 case 0x11: /* unaffiliated access error */ 303 ehca_err(&shca->ib_device, "Unaffiliated access error."); 304 break; 305 case 0x12: /* path migrating */ 306 ehca_err(&shca->ib_device, "Path migrating."); 307 break; 308 case 0x13: /* interface trace stopped */ 309 ehca_err(&shca->ib_device, "Interface trace stopped."); 310 break; 311 case 0x14: /* first error capture info available */ 312 ehca_info(&shca->ib_device, "First error capture available"); 313 break; 314 case 0x15: /* SRQ limit reached */ 315 qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0); 316 break; 317 default: 318 ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", 319 identifier, shca->ib_device.name); 320 break; 321 } 322 323 return; 324} 325 326static void dispatch_port_event(struct ehca_shca *shca, int port_num, 327 enum ib_event_type type, const char *msg) 328{ 329 struct ib_event event; 330 331 ehca_info(&shca->ib_device, "port %d %s.", port_num, msg); 332 event.device = &shca->ib_device; 333 event.event = type; 334 event.element.port_num = port_num; 335 ib_dispatch_event(&event); 336} 337 338static void notify_port_conf_change(struct ehca_shca *shca, int port_num) 339{ 340 struct ehca_sma_attr new_attr; 341 struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr; 342 343 ehca_query_sma_attr(shca, port_num, &new_attr); 344 345 if (new_attr.sm_sl != old_attr->sm_sl || 346 new_attr.sm_lid != old_attr->sm_lid) 347 dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE, 348 "SM changed"); 349 350 if (new_attr.lid != old_attr->lid || 351 new_attr.lmc != old_attr->lmc) 352 dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE, 353 "LID changed"); 354 355 if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len || 356 memcmp(new_attr.pkeys, old_attr->pkeys, 357 sizeof(u16) * new_attr.pkey_tbl_len)) 358 dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE, 359 "P_Key changed"); 360 361 *old_attr = new_attr; 362} 363 364/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */ 365static int replay_modify_qp(struct ehca_sport *sport) 366{ 367 int aqp1_destroyed; 368 unsigned long flags; 369 370 spin_lock_irqsave(&sport->mod_sqp_lock, flags); 371 372 aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI]; 373 374 if (sport->ibqp_sqp[IB_QPT_SMI]) 375 ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); 376 if (!aqp1_destroyed) 377 ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); 378 379 spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); 380 381 return aqp1_destroyed; 382} 383 384static void parse_ec(struct ehca_shca *shca, u64 eqe) 385{ 386 u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); 387 u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); 388 u8 spec_event; 389 struct ehca_sport *sport = &shca->sport[port - 1]; 390 391 switch (ec) { 392 case 0x30: /* port availability change */ 393 if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { 394 /* only replay modify_qp calls in autodetect mode; 395 * if AQP1 was destroyed, the port is already down 396 * again and we can drop the event. 397 */ 398 if (ehca_nr_ports < 0) 399 if (replay_modify_qp(sport)) 400 break; 401 402 sport->port_state = IB_PORT_ACTIVE; 403 dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, 404 "is active"); 405 ehca_query_sma_attr(shca, port, &sport->saved_attr); 406 } else { 407 sport->port_state = IB_PORT_DOWN; 408 dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, 409 "is inactive"); 410 } 411 break; 412 case 0x31: 413 /* port configuration change 414 * disruptive change is caused by 415 * LID, PKEY or SM change 416 */ 417 if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) { 418 ehca_warn(&shca->ib_device, "disruptive port " 419 "%d configuration change", port); 420 421 sport->port_state = IB_PORT_DOWN; 422 dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, 423 "is inactive"); 424 425 sport->port_state = IB_PORT_ACTIVE; 426 dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, 427 "is active"); 428 ehca_query_sma_attr(shca, port, 429 &sport->saved_attr); 430 } else 431 notify_port_conf_change(shca, port); 432 break; 433 case 0x32: /* adapter malfunction */ 434 ehca_err(&shca->ib_device, "Adapter malfunction."); 435 break; 436 case 0x33: /* trace stopped */ 437 ehca_err(&shca->ib_device, "Traced stopped."); 438 break; 439 case 0x34: /* util async event */ 440 spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe); 441 if (spec_event == 0x80) /* client reregister required */ 442 dispatch_port_event(shca, port, 443 IB_EVENT_CLIENT_REREGISTER, 444 "client reregister req."); 445 else 446 ehca_warn(&shca->ib_device, "Unknown util async " 447 "event %x on port %x", spec_event, port); 448 break; 449 default: 450 ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", 451 ec, shca->ib_device.name); 452 break; 453 } 454 455 return; 456} 457 458static inline void reset_eq_pending(struct ehca_cq *cq) 459{ 460 u64 CQx_EP; 461 struct h_galpa gal = cq->galpas.kernel; 462 463 hipz_galpa_store_cq(gal, cqx_ep, 0x0); 464 CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep)); 465 466 return; 467} 468 469irqreturn_t ehca_interrupt_neq(int irq, void *dev_id) 470{ 471 struct ehca_shca *shca = (struct ehca_shca*)dev_id; 472 473 tasklet_hi_schedule(&shca->neq.interrupt_task); 474 475 return IRQ_HANDLED; 476} 477 478void ehca_tasklet_neq(unsigned long data) 479{ 480 struct ehca_shca *shca = (struct ehca_shca*)data; 481 struct ehca_eqe *eqe; 482 u64 ret; 483 484 eqe = ehca_poll_eq(shca, &shca->neq); 485 486 while (eqe) { 487 if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) 488 parse_ec(shca, eqe->entry); 489 490 eqe = ehca_poll_eq(shca, &shca->neq); 491 } 492 493 ret = hipz_h_reset_event(shca->ipz_hca_handle, 494 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL); 495 496 if (ret != H_SUCCESS) 497 ehca_err(&shca->ib_device, "Can't clear notification events."); 498 499 return; 500} 501 502irqreturn_t ehca_interrupt_eq(int irq, void *dev_id) 503{ 504 struct ehca_shca *shca = (struct ehca_shca*)dev_id; 505 506 tasklet_hi_schedule(&shca->eq.interrupt_task); 507 508 return IRQ_HANDLED; 509} 510 511 512static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) 513{ 514 u64 eqe_value; 515 u32 token; 516 struct ehca_cq *cq; 517 518 eqe_value = eqe->entry; 519 ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value); 520 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { 521 ehca_dbg(&shca->ib_device, "Got completion event"); 522 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); 523 read_lock(&ehca_cq_idr_lock); 524 cq = idr_find(&ehca_cq_idr, token); 525 if (cq) 526 atomic_inc(&cq->nr_events); 527 read_unlock(&ehca_cq_idr_lock); 528 if (cq == NULL) { 529 ehca_err(&shca->ib_device, 530 "Invalid eqe for non-existing cq token=%x", 531 token); 532 return; 533 } 534 reset_eq_pending(cq); 535 if (ehca_scaling_code) 536 queue_comp_task(cq); 537 else { 538 comp_event_callback(cq); 539 if (atomic_dec_and_test(&cq->nr_events)) 540 wake_up(&cq->wait_completion); 541 } 542 } else { 543 ehca_dbg(&shca->ib_device, "Got non completion event"); 544 parse_identifier(shca, eqe_value); 545 } 546} 547 548void ehca_process_eq(struct ehca_shca *shca, int is_irq) 549{ 550 struct ehca_eq *eq = &shca->eq; 551 struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; 552 u64 eqe_value, ret; 553 int eqe_cnt, i; 554 int eq_empty = 0; 555 556 spin_lock(&eq->irq_spinlock); 557 if (is_irq) { 558 const int max_query_cnt = 100; 559 int query_cnt = 0; 560 int int_state = 1; 561 do { 562 int_state = hipz_h_query_int_state( 563 shca->ipz_hca_handle, eq->ist); 564 query_cnt++; 565 iosync(); 566 } while (int_state && query_cnt < max_query_cnt); 567 if (unlikely((query_cnt == max_query_cnt))) 568 ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x", 569 int_state, query_cnt); 570 } 571 572 /* read out all eqes */ 573 eqe_cnt = 0; 574 do { 575 u32 token; 576 eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); 577 if (!eqe_cache[eqe_cnt].eqe) 578 break; 579 eqe_value = eqe_cache[eqe_cnt].eqe->entry; 580 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { 581 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); 582 read_lock(&ehca_cq_idr_lock); 583 eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); 584 if (eqe_cache[eqe_cnt].cq) 585 atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); 586 read_unlock(&ehca_cq_idr_lock); 587 if (!eqe_cache[eqe_cnt].cq) { 588 ehca_err(&shca->ib_device, 589 "Invalid eqe for non-existing cq " 590 "token=%x", token); 591 continue; 592 } 593 } else 594 eqe_cache[eqe_cnt].cq = NULL; 595 eqe_cnt++; 596 } while (eqe_cnt < EHCA_EQE_CACHE_SIZE); 597 if (!eqe_cnt) { 598 if (is_irq) 599 ehca_dbg(&shca->ib_device, 600 "No eqe found for irq event"); 601 goto unlock_irq_spinlock; 602 } else if (!is_irq) { 603 ret = hipz_h_eoi(eq->ist); 604 if (ret != H_SUCCESS) 605 ehca_err(&shca->ib_device, 606 "bad return code EOI -rc = %lld\n", ret); 607 ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); 608 } 609 if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) 610 ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); 611 /* enable irq for new packets */ 612 for (i = 0; i < eqe_cnt; i++) { 613 if (eq->eqe_cache[i].cq) 614 reset_eq_pending(eq->eqe_cache[i].cq); 615 } 616 /* check eq */ 617 spin_lock(&eq->spinlock); 618 eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue)); 619 spin_unlock(&eq->spinlock); 620 /* call completion handler for cached eqes */ 621 for (i = 0; i < eqe_cnt; i++) 622 if (eq->eqe_cache[i].cq) { 623 if (ehca_scaling_code) 624 queue_comp_task(eq->eqe_cache[i].cq); 625 else { 626 struct ehca_cq *cq = eq->eqe_cache[i].cq; 627 comp_event_callback(cq); 628 if (atomic_dec_and_test(&cq->nr_events)) 629 wake_up(&cq->wait_completion); 630 } 631 } else { 632 ehca_dbg(&shca->ib_device, "Got non completion event"); 633 parse_identifier(shca, eq->eqe_cache[i].eqe->entry); 634 } 635 /* poll eq if not empty */ 636 if (eq_empty) 637 goto unlock_irq_spinlock; 638 do { 639 struct ehca_eqe *eqe; 640 eqe = ehca_poll_eq(shca, &shca->eq); 641 if (!eqe) 642 break; 643 process_eqe(shca, eqe); 644 } while (1); 645 646unlock_irq_spinlock: 647 spin_unlock(&eq->irq_spinlock); 648} 649 650void ehca_tasklet_eq(unsigned long data) 651{ 652 ehca_process_eq((struct ehca_shca*)data, 1); 653} 654 655static inline int find_next_online_cpu(struct ehca_comp_pool *pool) 656{ 657 int cpu; 658 unsigned long flags; 659 660 WARN_ON_ONCE(!in_interrupt()); 661 if (ehca_debug_level >= 3) 662 ehca_dmp(cpu_online_mask, cpumask_size(), ""); 663 664 spin_lock_irqsave(&pool->last_cpu_lock, flags); 665 cpu = cpumask_next(pool->last_cpu, cpu_online_mask); 666 if (cpu >= nr_cpu_ids) 667 cpu = cpumask_first(cpu_online_mask); 668 pool->last_cpu = cpu; 669 spin_unlock_irqrestore(&pool->last_cpu_lock, flags); 670 671 return cpu; 672} 673 674static void __queue_comp_task(struct ehca_cq *__cq, 675 struct ehca_cpu_comp_task *cct) 676{ 677 unsigned long flags; 678 679 spin_lock_irqsave(&cct->task_lock, flags); 680 spin_lock(&__cq->task_lock); 681 682 if (__cq->nr_callbacks == 0) { 683 __cq->nr_callbacks++; 684 list_add_tail(&__cq->entry, &cct->cq_list); 685 cct->cq_jobs++; 686 wake_up(&cct->wait_queue); 687 } else 688 __cq->nr_callbacks++; 689 690 spin_unlock(&__cq->task_lock); 691 spin_unlock_irqrestore(&cct->task_lock, flags); 692} 693 694static void queue_comp_task(struct ehca_cq *__cq) 695{ 696 int cpu_id; 697 struct ehca_cpu_comp_task *cct; 698 int cq_jobs; 699 unsigned long flags; 700 701 cpu_id = find_next_online_cpu(pool); 702 BUG_ON(!cpu_online(cpu_id)); 703 704 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 705 BUG_ON(!cct); 706 707 spin_lock_irqsave(&cct->task_lock, flags); 708 cq_jobs = cct->cq_jobs; 709 spin_unlock_irqrestore(&cct->task_lock, flags); 710 if (cq_jobs > 0) { 711 cpu_id = find_next_online_cpu(pool); 712 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 713 BUG_ON(!cct); 714 } 715 716 __queue_comp_task(__cq, cct); 717} 718 719static void run_comp_task(struct ehca_cpu_comp_task *cct) 720{ 721 struct ehca_cq *cq; 722 unsigned long flags; 723 724 spin_lock_irqsave(&cct->task_lock, flags); 725 726 while (!list_empty(&cct->cq_list)) { 727 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 728 spin_unlock_irqrestore(&cct->task_lock, flags); 729 730 comp_event_callback(cq); 731 if (atomic_dec_and_test(&cq->nr_events)) 732 wake_up(&cq->wait_completion); 733 734 spin_lock_irqsave(&cct->task_lock, flags); 735 spin_lock(&cq->task_lock); 736 cq->nr_callbacks--; 737 if (!cq->nr_callbacks) { 738 list_del_init(cct->cq_list.next); 739 cct->cq_jobs--; 740 } 741 spin_unlock(&cq->task_lock); 742 } 743 744 spin_unlock_irqrestore(&cct->task_lock, flags); 745} 746 747static int comp_task(void *__cct) 748{ 749 struct ehca_cpu_comp_task *cct = __cct; 750 int cql_empty; 751 DECLARE_WAITQUEUE(wait, current); 752 753 set_current_state(TASK_INTERRUPTIBLE); 754 while (!kthread_should_stop()) { 755 add_wait_queue(&cct->wait_queue, &wait); 756 757 spin_lock_irq(&cct->task_lock); 758 cql_empty = list_empty(&cct->cq_list); 759 spin_unlock_irq(&cct->task_lock); 760 if (cql_empty) 761 schedule(); 762 else 763 __set_current_state(TASK_RUNNING); 764 765 remove_wait_queue(&cct->wait_queue, &wait); 766 767 spin_lock_irq(&cct->task_lock); 768 cql_empty = list_empty(&cct->cq_list); 769 spin_unlock_irq(&cct->task_lock); 770 if (!cql_empty) 771 run_comp_task(__cct); 772 773 set_current_state(TASK_INTERRUPTIBLE); 774 } 775 __set_current_state(TASK_RUNNING); 776 777 return 0; 778} 779 780static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, 781 int cpu) 782{ 783 struct ehca_cpu_comp_task *cct; 784 785 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 786 spin_lock_init(&cct->task_lock); 787 INIT_LIST_HEAD(&cct->cq_list); 788 init_waitqueue_head(&cct->wait_queue); 789 cct->task = kthread_create_on_node(comp_task, cct, cpu_to_node(cpu), 790 "ehca_comp/%d", cpu); 791 792 return cct->task; 793} 794 795static void destroy_comp_task(struct ehca_comp_pool *pool, 796 int cpu) 797{ 798 struct ehca_cpu_comp_task *cct; 799 struct task_struct *task; 800 unsigned long flags_cct; 801 802 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 803 804 spin_lock_irqsave(&cct->task_lock, flags_cct); 805 806 task = cct->task; 807 cct->task = NULL; 808 cct->cq_jobs = 0; 809 810 spin_unlock_irqrestore(&cct->task_lock, flags_cct); 811 812 if (task) 813 kthread_stop(task); 814} 815 816static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu) 817{ 818 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 819 LIST_HEAD(list); 820 struct ehca_cq *cq; 821 unsigned long flags_cct; 822 823 spin_lock_irqsave(&cct->task_lock, flags_cct); 824 825 list_splice_init(&cct->cq_list, &list); 826 827 while (!list_empty(&list)) { 828 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 829 830 list_del(&cq->entry); 831 __queue_comp_task(cq, this_cpu_ptr(pool->cpu_comp_tasks)); 832 } 833 834 spin_unlock_irqrestore(&cct->task_lock, flags_cct); 835 836} 837 838static int __cpuinit comp_pool_callback(struct notifier_block *nfb, 839 unsigned long action, 840 void *hcpu) 841{ 842 unsigned int cpu = (unsigned long)hcpu; 843 struct ehca_cpu_comp_task *cct; 844 845 switch (action) { 846 case CPU_UP_PREPARE: 847 case CPU_UP_PREPARE_FROZEN: 848 ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); 849 if (!create_comp_task(pool, cpu)) { 850 ehca_gen_err("Can't create comp_task for cpu: %x", cpu); 851 return notifier_from_errno(-ENOMEM); 852 } 853 break; 854 case CPU_UP_CANCELED: 855 case CPU_UP_CANCELED_FROZEN: 856 ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); 857 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 858 kthread_bind(cct->task, cpumask_any(cpu_online_mask)); 859 destroy_comp_task(pool, cpu); 860 break; 861 case CPU_ONLINE: 862 case CPU_ONLINE_FROZEN: 863 ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); 864 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 865 kthread_bind(cct->task, cpu); 866 wake_up_process(cct->task); 867 break; 868 case CPU_DOWN_PREPARE: 869 case CPU_DOWN_PREPARE_FROZEN: 870 ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); 871 break; 872 case CPU_DOWN_FAILED: 873 case CPU_DOWN_FAILED_FROZEN: 874 ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); 875 break; 876 case CPU_DEAD: 877 case CPU_DEAD_FROZEN: 878 ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); 879 destroy_comp_task(pool, cpu); 880 take_over_work(pool, cpu); 881 break; 882 } 883 884 return NOTIFY_OK; 885} 886 887static struct notifier_block comp_pool_callback_nb __cpuinitdata = { 888 .notifier_call = comp_pool_callback, 889 .priority = 0, 890}; 891 892int ehca_create_comp_pool(void) 893{ 894 int cpu; 895 struct task_struct *task; 896 897 if (!ehca_scaling_code) 898 return 0; 899 900 pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); 901 if (pool == NULL) 902 return -ENOMEM; 903 904 spin_lock_init(&pool->last_cpu_lock); 905 pool->last_cpu = cpumask_any(cpu_online_mask); 906 907 pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); 908 if (pool->cpu_comp_tasks == NULL) { 909 kfree(pool); 910 return -EINVAL; 911 } 912 913 for_each_online_cpu(cpu) { 914 task = create_comp_task(pool, cpu); 915 if (task) { 916 kthread_bind(task, cpu); 917 wake_up_process(task); 918 } 919 } 920 921 register_hotcpu_notifier(&comp_pool_callback_nb); 922 923 printk(KERN_INFO "eHCA scaling code enabled\n"); 924 925 return 0; 926} 927 928void ehca_destroy_comp_pool(void) 929{ 930 int i; 931 932 if (!ehca_scaling_code) 933 return; 934 935 unregister_hotcpu_notifier(&comp_pool_callback_nb); 936 937 for_each_online_cpu(i) 938 destroy_comp_task(pool, i); 939 940 free_percpu(pool->cpu_comp_tasks); 941 kfree(pool); 942} 943