1/* 2 * SN Platform GRU Driver 3 * 4 * KERNEL SERVICES THAT USE THE GRU 5 * 6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 */ 22 23#include <linux/kernel.h> 24#include <linux/errno.h> 25#include <linux/slab.h> 26#include <linux/mm.h> 27#include <linux/spinlock.h> 28#include <linux/device.h> 29#include <linux/miscdevice.h> 30#include <linux/proc_fs.h> 31#include <linux/interrupt.h> 32#include <linux/uaccess.h> 33#include <linux/delay.h> 34#include <linux/export.h> 35#include <asm/io_apic.h> 36#include "gru.h" 37#include "grulib.h" 38#include "grutables.h" 39#include "grukservices.h" 40#include "gru_instructions.h" 41#include <asm/uv/uv_hub.h> 42 43/* 44 * Kernel GRU Usage 45 * 46 * The following is an interim algorithm for management of kernel GRU 47 * resources. This will likely be replaced when we better understand the 48 * kernel/user requirements. 49 * 50 * Blade percpu resources reserved for kernel use. These resources are 51 * reserved whenever the the kernel context for the blade is loaded. Note 52 * that the kernel context is not guaranteed to be always available. It is 53 * loaded on demand & can be stolen by a user if the user demand exceeds the 54 * kernel demand. The kernel can always reload the kernel context but 55 * a SLEEP may be required!!!. 56 * 57 * Async Overview: 58 * 59 * Each blade has one "kernel context" that owns GRU kernel resources 60 * located on the blade. Kernel drivers use GRU resources in this context 61 * for sending messages, zeroing memory, etc. 62 * 63 * The kernel context is dynamically loaded on demand. If it is not in 64 * use by the kernel, the kernel context can be unloaded & given to a user. 65 * The kernel context will be reloaded when needed. This may require that 66 * a context be stolen from a user. 67 * NOTE: frequent unloading/reloading of the kernel context is 68 * expensive. We are depending on batch schedulers, cpusets, sane 69 * drivers or some other mechanism to prevent the need for frequent 70 * stealing/reloading. 71 * 72 * The kernel context consists of two parts: 73 * - 1 CB & a few DSRs that are reserved for each cpu on the blade. 74 * Each cpu has it's own private resources & does not share them 75 * with other cpus. These resources are used serially, ie, 76 * locked, used & unlocked on each call to a function in 77 * grukservices. 78 * (Now that we have dynamic loading of kernel contexts, I 79 * may rethink this & allow sharing between cpus....) 80 * 81 * - Additional resources can be reserved long term & used directly 82 * by UV drivers located in the kernel. Drivers using these GRU 83 * resources can use asynchronous GRU instructions that send 84 * interrupts on completion. 85 * - these resources must be explicitly locked/unlocked 86 * - locked resources prevent (obviously) the kernel 87 * context from being unloaded. 88 * - drivers using these resource directly issue their own 89 * GRU instruction and must wait/check completion. 90 * 91 * When these resources are reserved, the caller can optionally 92 * associate a wait_queue with the resources and use asynchronous 93 * GRU instructions. When an async GRU instruction completes, the 94 * driver will do a wakeup on the event. 95 * 96 */ 97 98 99#define ASYNC_HAN_TO_BID(h) ((h) - 1) 100#define ASYNC_BID_TO_HAN(b) ((b) + 1) 101#define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] 102 103#define GRU_NUM_KERNEL_CBR 1 104#define GRU_NUM_KERNEL_DSR_BYTES 256 105#define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ 106 GRU_CACHE_LINE_BYTES) 107 108/* GRU instruction attributes for all instructions */ 109#define IMA IMA_CB_DELAY 110 111/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */ 112#define __gru_cacheline_aligned__ \ 113 __attribute__((__aligned__(GRU_CACHE_LINE_BYTES))) 114 115#define MAGIC 0x1234567887654321UL 116 117/* Default retry count for GRU errors on kernel instructions */ 118#define EXCEPTION_RETRY_LIMIT 3 119 120/* Status of message queue sections */ 121#define MQS_EMPTY 0 122#define MQS_FULL 1 123#define MQS_NOOP 2 124 125/*----------------- RESOURCE MANAGEMENT -------------------------------------*/ 126/* optimized for x86_64 */ 127struct message_queue { 128 union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */ 129 int qlines; /* DW 1 */ 130 long hstatus[2]; 131 void *next __gru_cacheline_aligned__;/* CL 1 */ 132 void *limit; 133 void *start; 134 void *start2; 135 char data ____cacheline_aligned; /* CL 2 */ 136}; 137 138/* First word in every message - used by mesq interface */ 139struct message_header { 140 char present; 141 char present2; 142 char lines; 143 char fill; 144}; 145 146#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) 147 148/* 149 * Reload the blade's kernel context into a GRU chiplet. Called holding 150 * the bs_kgts_sema for READ. Will steal user contexts if necessary. 151 */ 152static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) 153{ 154 struct gru_state *gru; 155 struct gru_thread_state *kgts; 156 void *vaddr; 157 int ctxnum, ncpus; 158 159 up_read(&bs->bs_kgts_sema); 160 down_write(&bs->bs_kgts_sema); 161 162 if (!bs->bs_kgts) { 163 bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); 164 bs->bs_kgts->ts_user_blade_id = blade_id; 165 } 166 kgts = bs->bs_kgts; 167 168 if (!kgts->ts_gru) { 169 STAT(load_kernel_context); 170 ncpus = uv_blade_nr_possible_cpus(blade_id); 171 kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( 172 GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); 173 kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( 174 GRU_NUM_KERNEL_DSR_BYTES * ncpus + 175 bs->bs_async_dsr_bytes); 176 while (!gru_assign_gru_context(kgts)) { 177 msleep(1); 178 gru_steal_context(kgts); 179 } 180 gru_load_context(kgts); 181 gru = bs->bs_kgts->ts_gru; 182 vaddr = gru->gs_gru_base_vaddr; 183 ctxnum = kgts->ts_ctxnum; 184 bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); 185 bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); 186 } 187 downgrade_write(&bs->bs_kgts_sema); 188} 189 190/* 191 * Free all kernel contexts that are not currently in use. 192 * Returns 0 if all freed, else number of inuse context. 193 */ 194static int gru_free_kernel_contexts(void) 195{ 196 struct gru_blade_state *bs; 197 struct gru_thread_state *kgts; 198 int bid, ret = 0; 199 200 for (bid = 0; bid < GRU_MAX_BLADES; bid++) { 201 bs = gru_base[bid]; 202 if (!bs) 203 continue; 204 205 /* Ignore busy contexts. Don't want to block here. */ 206 if (down_write_trylock(&bs->bs_kgts_sema)) { 207 kgts = bs->bs_kgts; 208 if (kgts && kgts->ts_gru) 209 gru_unload_context(kgts, 0); 210 bs->bs_kgts = NULL; 211 up_write(&bs->bs_kgts_sema); 212 kfree(kgts); 213 } else { 214 ret++; 215 } 216 } 217 return ret; 218} 219 220/* 221 * Lock & load the kernel context for the specified blade. 222 */ 223static struct gru_blade_state *gru_lock_kernel_context(int blade_id) 224{ 225 struct gru_blade_state *bs; 226 int bid; 227 228 STAT(lock_kernel_context); 229again: 230 bid = blade_id < 0 ? uv_numa_blade_id() : blade_id; 231 bs = gru_base[bid]; 232 233 /* Handle the case where migration occurred while waiting for the sema */ 234 down_read(&bs->bs_kgts_sema); 235 if (blade_id < 0 && bid != uv_numa_blade_id()) { 236 up_read(&bs->bs_kgts_sema); 237 goto again; 238 } 239 if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) 240 gru_load_kernel_context(bs, bid); 241 return bs; 242 243} 244 245/* 246 * Unlock the kernel context for the specified blade. Context is not 247 * unloaded but may be stolen before next use. 248 */ 249static void gru_unlock_kernel_context(int blade_id) 250{ 251 struct gru_blade_state *bs; 252 253 bs = gru_base[blade_id]; 254 up_read(&bs->bs_kgts_sema); 255 STAT(unlock_kernel_context); 256} 257 258/* 259 * Reserve & get pointers to the DSR/CBRs reserved for the current cpu. 260 * - returns with preemption disabled 261 */ 262static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) 263{ 264 struct gru_blade_state *bs; 265 int lcpu; 266 267 BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); 268 preempt_disable(); 269 bs = gru_lock_kernel_context(-1); 270 lcpu = uv_blade_processor_id(); 271 *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; 272 *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; 273 return 0; 274} 275 276/* 277 * Free the current cpus reserved DSR/CBR resources. 278 */ 279static void gru_free_cpu_resources(void *cb, void *dsr) 280{ 281 gru_unlock_kernel_context(uv_numa_blade_id()); 282 preempt_enable(); 283} 284 285/* 286 * Reserve GRU resources to be used asynchronously. 287 * Note: currently supports only 1 reservation per blade. 288 * 289 * input: 290 * blade_id - blade on which resources should be reserved 291 * cbrs - number of CBRs 292 * dsr_bytes - number of DSR bytes needed 293 * output: 294 * handle to identify resource 295 * (0 = async resources already reserved) 296 */ 297unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, 298 struct completion *cmp) 299{ 300 struct gru_blade_state *bs; 301 struct gru_thread_state *kgts; 302 int ret = 0; 303 304 bs = gru_base[blade_id]; 305 306 down_write(&bs->bs_kgts_sema); 307 308 /* Verify no resources already reserved */ 309 if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs) 310 goto done; 311 bs->bs_async_dsr_bytes = dsr_bytes; 312 bs->bs_async_cbrs = cbrs; 313 bs->bs_async_wq = cmp; 314 kgts = bs->bs_kgts; 315 316 /* Resources changed. Unload context if already loaded */ 317 if (kgts && kgts->ts_gru) 318 gru_unload_context(kgts, 0); 319 ret = ASYNC_BID_TO_HAN(blade_id); 320 321done: 322 up_write(&bs->bs_kgts_sema); 323 return ret; 324} 325 326/* 327 * Release async resources previously reserved. 328 * 329 * input: 330 * han - handle to identify resources 331 */ 332void gru_release_async_resources(unsigned long han) 333{ 334 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 335 336 down_write(&bs->bs_kgts_sema); 337 bs->bs_async_dsr_bytes = 0; 338 bs->bs_async_cbrs = 0; 339 bs->bs_async_wq = NULL; 340 up_write(&bs->bs_kgts_sema); 341} 342 343/* 344 * Wait for async GRU instructions to complete. 345 * 346 * input: 347 * han - handle to identify resources 348 */ 349void gru_wait_async_cbr(unsigned long han) 350{ 351 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 352 353 wait_for_completion(bs->bs_async_wq); 354 mb(); 355} 356 357/* 358 * Lock previous reserved async GRU resources 359 * 360 * input: 361 * han - handle to identify resources 362 * output: 363 * cb - pointer to first CBR 364 * dsr - pointer to first DSR 365 */ 366void gru_lock_async_resource(unsigned long han, void **cb, void **dsr) 367{ 368 struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); 369 int blade_id = ASYNC_HAN_TO_BID(han); 370 int ncpus; 371 372 gru_lock_kernel_context(blade_id); 373 ncpus = uv_blade_nr_possible_cpus(blade_id); 374 if (cb) 375 *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE; 376 if (dsr) 377 *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES; 378} 379 380/* 381 * Unlock previous reserved async GRU resources 382 * 383 * input: 384 * han - handle to identify resources 385 */ 386void gru_unlock_async_resource(unsigned long han) 387{ 388 int blade_id = ASYNC_HAN_TO_BID(han); 389 390 gru_unlock_kernel_context(blade_id); 391} 392 393/*----------------------------------------------------------------------*/ 394int gru_get_cb_exception_detail(void *cb, 395 struct control_block_extended_exc_detail *excdet) 396{ 397 struct gru_control_block_extended *cbe; 398 struct gru_thread_state *kgts = NULL; 399 unsigned long off; 400 int cbrnum, bid; 401 402 /* 403 * Locate kgts for cb. This algorithm is SLOW but 404 * this function is rarely called (ie., almost never). 405 * Performance does not matter. 406 */ 407 for_each_possible_blade(bid) { 408 if (!gru_base[bid]) 409 break; 410 kgts = gru_base[bid]->bs_kgts; 411 if (!kgts || !kgts->ts_gru) 412 continue; 413 off = cb - kgts->ts_gru->gs_gru_base_vaddr; 414 if (off < GRU_SIZE) 415 break; 416 kgts = NULL; 417 } 418 BUG_ON(!kgts); 419 cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); 420 cbe = get_cbe(GRUBASE(cb), cbrnum); 421 gru_flush_cache(cbe); /* CBE not coherent */ 422 sync_core(); 423 excdet->opc = cbe->opccpy; 424 excdet->exopc = cbe->exopccpy; 425 excdet->ecause = cbe->ecause; 426 excdet->exceptdet0 = cbe->idef1upd; 427 excdet->exceptdet1 = cbe->idef3upd; 428 gru_flush_cache(cbe); 429 return 0; 430} 431 432char *gru_get_cb_exception_detail_str(int ret, void *cb, 433 char *buf, int size) 434{ 435 struct gru_control_block_status *gen = (void *)cb; 436 struct control_block_extended_exc_detail excdet; 437 438 if (ret > 0 && gen->istatus == CBS_EXCEPTION) { 439 gru_get_cb_exception_detail(cb, &excdet); 440 snprintf(buf, size, 441 "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x," 442 "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(), 443 gen, excdet.opc, excdet.exopc, excdet.ecause, 444 excdet.exceptdet0, excdet.exceptdet1); 445 } else { 446 snprintf(buf, size, "No exception"); 447 } 448 return buf; 449} 450 451static int gru_wait_idle_or_exception(struct gru_control_block_status *gen) 452{ 453 while (gen->istatus >= CBS_ACTIVE) { 454 cpu_relax(); 455 barrier(); 456 } 457 return gen->istatus; 458} 459 460static int gru_retry_exception(void *cb) 461{ 462 struct gru_control_block_status *gen = (void *)cb; 463 struct control_block_extended_exc_detail excdet; 464 int retry = EXCEPTION_RETRY_LIMIT; 465 466 while (1) { 467 if (gru_wait_idle_or_exception(gen) == CBS_IDLE) 468 return CBS_IDLE; 469 if (gru_get_cb_message_queue_substatus(cb)) 470 return CBS_EXCEPTION; 471 gru_get_cb_exception_detail(cb, &excdet); 472 if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || 473 (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) 474 break; 475 if (retry-- == 0) 476 break; 477 gen->icmd = 1; 478 gru_flush_cache(gen); 479 } 480 return CBS_EXCEPTION; 481} 482 483int gru_check_status_proc(void *cb) 484{ 485 struct gru_control_block_status *gen = (void *)cb; 486 int ret; 487 488 ret = gen->istatus; 489 if (ret == CBS_EXCEPTION) 490 ret = gru_retry_exception(cb); 491 rmb(); 492 return ret; 493 494} 495 496int gru_wait_proc(void *cb) 497{ 498 struct gru_control_block_status *gen = (void *)cb; 499 int ret; 500 501 ret = gru_wait_idle_or_exception(gen); 502 if (ret == CBS_EXCEPTION) 503 ret = gru_retry_exception(cb); 504 rmb(); 505 return ret; 506} 507 508void gru_abort(int ret, void *cb, char *str) 509{ 510 char buf[GRU_EXC_STR_SIZE]; 511 512 panic("GRU FATAL ERROR: %s - %s\n", str, 513 gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf))); 514} 515 516void gru_wait_abort_proc(void *cb) 517{ 518 int ret; 519 520 ret = gru_wait_proc(cb); 521 if (ret) 522 gru_abort(ret, cb, "gru_wait_abort"); 523} 524 525 526/*------------------------------ MESSAGE QUEUES -----------------------------*/ 527 528/* Internal status . These are NOT returned to the user. */ 529#define MQIE_AGAIN -1 /* try again */ 530 531 532/* 533 * Save/restore the "present" flag that is in the second line of 2-line 534 * messages 535 */ 536static inline int get_present2(void *p) 537{ 538 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 539 return mhdr->present; 540} 541 542static inline void restore_present2(void *p, int val) 543{ 544 struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; 545 mhdr->present = val; 546} 547 548/* 549 * Create a message queue. 550 * qlines - message queue size in cache lines. Includes 2-line header. 551 */ 552int gru_create_message_queue(struct gru_message_queue_desc *mqd, 553 void *p, unsigned int bytes, int nasid, int vector, int apicid) 554{ 555 struct message_queue *mq = p; 556 unsigned int qlines; 557 558 qlines = bytes / GRU_CACHE_LINE_BYTES - 2; 559 memset(mq, 0, bytes); 560 mq->start = &mq->data; 561 mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES; 562 mq->next = &mq->data; 563 mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES; 564 mq->qlines = qlines; 565 mq->hstatus[0] = 0; 566 mq->hstatus[1] = 1; 567 mq->head = gru_mesq_head(2, qlines / 2 + 1); 568 mqd->mq = mq; 569 mqd->mq_gpa = uv_gpa(mq); 570 mqd->qlines = qlines; 571 mqd->interrupt_pnode = nasid >> 1; 572 mqd->interrupt_vector = vector; 573 mqd->interrupt_apicid = apicid; 574 return 0; 575} 576EXPORT_SYMBOL_GPL(gru_create_message_queue); 577 578/* 579 * Send a NOOP message to a message queue 580 * Returns: 581 * 0 - if queue is full after the send. This is the normal case 582 * but various races can change this. 583 * -1 - if mesq sent successfully but queue not full 584 * >0 - unexpected error. MQE_xxx returned 585 */ 586static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, 587 void *mesg) 588{ 589 const struct message_header noop_header = { 590 .present = MQS_NOOP, .lines = 1}; 591 unsigned long m; 592 int substatus, ret; 593 struct message_header save_mhdr, *mhdr = mesg; 594 595 STAT(mesq_noop); 596 save_mhdr = *mhdr; 597 *mhdr = noop_header; 598 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA); 599 ret = gru_wait(cb); 600 601 if (ret) { 602 substatus = gru_get_cb_message_queue_substatus(cb); 603 switch (substatus) { 604 case CBSS_NO_ERROR: 605 STAT(mesq_noop_unexpected_error); 606 ret = MQE_UNEXPECTED_CB_ERR; 607 break; 608 case CBSS_LB_OVERFLOWED: 609 STAT(mesq_noop_lb_overflow); 610 ret = MQE_CONGESTION; 611 break; 612 case CBSS_QLIMIT_REACHED: 613 STAT(mesq_noop_qlimit_reached); 614 ret = 0; 615 break; 616 case CBSS_AMO_NACKED: 617 STAT(mesq_noop_amo_nacked); 618 ret = MQE_CONGESTION; 619 break; 620 case CBSS_PUT_NACKED: 621 STAT(mesq_noop_put_nacked); 622 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 623 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, 624 IMA); 625 if (gru_wait(cb) == CBS_IDLE) 626 ret = MQIE_AGAIN; 627 else 628 ret = MQE_UNEXPECTED_CB_ERR; 629 break; 630 case CBSS_PAGE_OVERFLOW: 631 STAT(mesq_noop_page_overflow); 632 /* fallthru */ 633 default: 634 BUG(); 635 } 636 } 637 *mhdr = save_mhdr; 638 return ret; 639} 640 641/* 642 * Handle a gru_mesq full. 643 */ 644static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd, 645 void *mesg, int lines) 646{ 647 union gru_mesqhead mqh; 648 unsigned int limit, head; 649 unsigned long avalue; 650 int half, qlines; 651 652 /* Determine if switching to first/second half of q */ 653 avalue = gru_get_amo_value(cb); 654 head = gru_get_amo_value_head(cb); 655 limit = gru_get_amo_value_limit(cb); 656 657 qlines = mqd->qlines; 658 half = (limit != qlines); 659 660 if (half) 661 mqh = gru_mesq_head(qlines / 2 + 1, qlines); 662 else 663 mqh = gru_mesq_head(2, qlines / 2 + 1); 664 665 /* Try to get lock for switching head pointer */ 666 gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA); 667 if (gru_wait(cb) != CBS_IDLE) 668 goto cberr; 669 if (!gru_get_amo_value(cb)) { 670 STAT(mesq_qf_locked); 671 return MQE_QUEUE_FULL; 672 } 673 674 /* Got the lock. Send optional NOP if queue not full, */ 675 if (head != limit) { 676 if (send_noop_message(cb, mqd, mesg)) { 677 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), 678 XTYPE_DW, IMA); 679 if (gru_wait(cb) != CBS_IDLE) 680 goto cberr; 681 STAT(mesq_qf_noop_not_full); 682 return MQIE_AGAIN; 683 } 684 avalue++; 685 } 686 687 /* Then flip queuehead to other half of queue. */ 688 gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue, 689 IMA); 690 if (gru_wait(cb) != CBS_IDLE) 691 goto cberr; 692 693 /* If not successfully in swapping queue head, clear the hstatus lock */ 694 if (gru_get_amo_value(cb) != avalue) { 695 STAT(mesq_qf_switch_head_failed); 696 gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, 697 IMA); 698 if (gru_wait(cb) != CBS_IDLE) 699 goto cberr; 700 } 701 return MQIE_AGAIN; 702cberr: 703 STAT(mesq_qf_unexpected_error); 704 return MQE_UNEXPECTED_CB_ERR; 705} 706 707/* 708 * Handle a PUT failure. Note: if message was a 2-line message, one of the 709 * lines might have successfully have been written. Before sending the 710 * message, "present" must be cleared in BOTH lines to prevent the receiver 711 * from prematurely seeing the full message. 712 */ 713static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, 714 void *mesg, int lines) 715{ 716 unsigned long m, *val = mesg, gpa, save; 717 int ret; 718 719 m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); 720 if (lines == 2) { 721 gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA); 722 if (gru_wait(cb) != CBS_IDLE) 723 return MQE_UNEXPECTED_CB_ERR; 724 } 725 gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); 726 if (gru_wait(cb) != CBS_IDLE) 727 return MQE_UNEXPECTED_CB_ERR; 728 729 if (!mqd->interrupt_vector) 730 return MQE_OK; 731 732 /* 733 * Send a cross-partition interrupt to the SSI that contains the target 734 * message queue. Normally, the interrupt is automatically delivered by 735 * hardware but some error conditions require explicit delivery. 736 * Use the GRU to deliver the interrupt. Otherwise partition failures 737 * could cause unrecovered errors. 738 */ 739 gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT); 740 save = *val; 741 *val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector, 742 dest_Fixed); 743 gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA); 744 ret = gru_wait(cb); 745 *val = save; 746 if (ret != CBS_IDLE) 747 return MQE_UNEXPECTED_CB_ERR; 748 return MQE_OK; 749} 750 751/* 752 * Handle a gru_mesq failure. Some of these failures are software recoverable 753 * or retryable. 754 */ 755static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, 756 void *mesg, int lines) 757{ 758 int substatus, ret = 0; 759 760 substatus = gru_get_cb_message_queue_substatus(cb); 761 switch (substatus) { 762 case CBSS_NO_ERROR: 763 STAT(mesq_send_unexpected_error); 764 ret = MQE_UNEXPECTED_CB_ERR; 765 break; 766 case CBSS_LB_OVERFLOWED: 767 STAT(mesq_send_lb_overflow); 768 ret = MQE_CONGESTION; 769 break; 770 case CBSS_QLIMIT_REACHED: 771 STAT(mesq_send_qlimit_reached); 772 ret = send_message_queue_full(cb, mqd, mesg, lines); 773 break; 774 case CBSS_AMO_NACKED: 775 STAT(mesq_send_amo_nacked); 776 ret = MQE_CONGESTION; 777 break; 778 case CBSS_PUT_NACKED: 779 STAT(mesq_send_put_nacked); 780 ret = send_message_put_nacked(cb, mqd, mesg, lines); 781 break; 782 case CBSS_PAGE_OVERFLOW: 783 STAT(mesq_page_overflow); 784 /* fallthru */ 785 default: 786 BUG(); 787 } 788 return ret; 789} 790 791/* 792 * Send a message to a message queue 793 * mqd message queue descriptor 794 * mesg message. ust be vaddr within a GSEG 795 * bytes message size (<= 2 CL) 796 */ 797int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg, 798 unsigned int bytes) 799{ 800 struct message_header *mhdr; 801 void *cb; 802 void *dsr; 803 int istatus, clines, ret; 804 805 STAT(mesq_send); 806 BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES); 807 808 clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES); 809 if (gru_get_cpu_resources(bytes, &cb, &dsr)) 810 return MQE_BUG_NO_RESOURCES; 811 memcpy(dsr, mesg, bytes); 812 mhdr = dsr; 813 mhdr->present = MQS_FULL; 814 mhdr->lines = clines; 815 if (clines == 2) { 816 mhdr->present2 = get_present2(mhdr); 817 restore_present2(mhdr, MQS_FULL); 818 } 819 820 do { 821 ret = MQE_OK; 822 gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA); 823 istatus = gru_wait(cb); 824 if (istatus != CBS_IDLE) 825 ret = send_message_failure(cb, mqd, dsr, clines); 826 } while (ret == MQIE_AGAIN); 827 gru_free_cpu_resources(cb, dsr); 828 829 if (ret) 830 STAT(mesq_send_failed); 831 return ret; 832} 833EXPORT_SYMBOL_GPL(gru_send_message_gpa); 834 835/* 836 * Advance the receive pointer for the queue to the next message. 837 */ 838void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg) 839{ 840 struct message_queue *mq = mqd->mq; 841 struct message_header *mhdr = mq->next; 842 void *next, *pnext; 843 int half = -1; 844 int lines = mhdr->lines; 845 846 if (lines == 2) 847 restore_present2(mhdr, MQS_EMPTY); 848 mhdr->present = MQS_EMPTY; 849 850 pnext = mq->next; 851 next = pnext + GRU_CACHE_LINE_BYTES * lines; 852 if (next == mq->limit) { 853 next = mq->start; 854 half = 1; 855 } else if (pnext < mq->start2 && next >= mq->start2) { 856 half = 0; 857 } 858 859 if (half >= 0) 860 mq->hstatus[half] = 1; 861 mq->next = next; 862} 863EXPORT_SYMBOL_GPL(gru_free_message); 864 865/* 866 * Get next message from message queue. Return NULL if no message 867 * present. User must call next_message() to move to next message. 868 * rmq message queue 869 */ 870void *gru_get_next_message(struct gru_message_queue_desc *mqd) 871{ 872 struct message_queue *mq = mqd->mq; 873 struct message_header *mhdr = mq->next; 874 int present = mhdr->present; 875 876 /* skip NOOP messages */ 877 while (present == MQS_NOOP) { 878 gru_free_message(mqd, mhdr); 879 mhdr = mq->next; 880 present = mhdr->present; 881 } 882 883 /* Wait for both halves of 2 line messages */ 884 if (present == MQS_FULL && mhdr->lines == 2 && 885 get_present2(mhdr) == MQS_EMPTY) 886 present = MQS_EMPTY; 887 888 if (!present) { 889 STAT(mesq_receive_none); 890 return NULL; 891 } 892 893 if (mhdr->lines == 2) 894 restore_present2(mhdr, mhdr->present2); 895 896 STAT(mesq_receive); 897 return mhdr; 898} 899EXPORT_SYMBOL_GPL(gru_get_next_message); 900 901/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ 902 903/* 904 * Load a DW from a global GPA. The GPA can be a memory or MMR address. 905 */ 906int gru_read_gpa(unsigned long *value, unsigned long gpa) 907{ 908 void *cb; 909 void *dsr; 910 int ret, iaa; 911 912 STAT(read_gpa); 913 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 914 return MQE_BUG_NO_RESOURCES; 915 iaa = gpa >> 62; 916 gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA); 917 ret = gru_wait(cb); 918 if (ret == CBS_IDLE) 919 *value = *(unsigned long *)dsr; 920 gru_free_cpu_resources(cb, dsr); 921 return ret; 922} 923EXPORT_SYMBOL_GPL(gru_read_gpa); 924 925 926/* 927 * Copy a block of data using the GRU resources 928 */ 929int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, 930 unsigned int bytes) 931{ 932 void *cb; 933 void *dsr; 934 int ret; 935 936 STAT(copy_gpa); 937 if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) 938 return MQE_BUG_NO_RESOURCES; 939 gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), 940 XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA); 941 ret = gru_wait(cb); 942 gru_free_cpu_resources(cb, dsr); 943 return ret; 944} 945EXPORT_SYMBOL_GPL(gru_copy_gpa); 946 947/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ 948/* Temp - will delete after we gain confidence in the GRU */ 949 950static int quicktest0(unsigned long arg) 951{ 952 unsigned long word0; 953 unsigned long word1; 954 void *cb; 955 void *dsr; 956 unsigned long *p; 957 int ret = -EIO; 958 959 if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr)) 960 return MQE_BUG_NO_RESOURCES; 961 p = dsr; 962 word0 = MAGIC; 963 word1 = 0; 964 965 gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 966 if (gru_wait(cb) != CBS_IDLE) { 967 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id()); 968 goto done; 969 } 970 971 if (*p != MAGIC) { 972 printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p); 973 goto done; 974 } 975 gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); 976 if (gru_wait(cb) != CBS_IDLE) { 977 printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id()); 978 goto done; 979 } 980 981 if (word0 != word1 || word1 != MAGIC) { 982 printk(KERN_DEBUG 983 "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n", 984 smp_processor_id(), word1, MAGIC); 985 goto done; 986 } 987 ret = 0; 988 989done: 990 gru_free_cpu_resources(cb, dsr); 991 return ret; 992} 993 994#define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1))) 995 996static int quicktest1(unsigned long arg) 997{ 998 struct gru_message_queue_desc mqd; 999 void *p, *mq; 1000 unsigned long *dw; 1001 int i, ret = -EIO; 1002 char mes[GRU_CACHE_LINE_BYTES], *m; 1003 1004 /* Need 1K cacheline aligned that does not cross page boundary */ 1005 p = kmalloc(4096, 0); 1006 if (p == NULL) 1007 return -ENOMEM; 1008 mq = ALIGNUP(p, 1024); 1009 memset(mes, 0xee, sizeof(mes)); 1010 dw = mq; 1011 1012 gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); 1013 for (i = 0; i < 6; i++) { 1014 mes[8] = i; 1015 do { 1016 ret = gru_send_message_gpa(&mqd, mes, sizeof(mes)); 1017 } while (ret == MQE_CONGESTION); 1018 if (ret) 1019 break; 1020 } 1021 if (ret != MQE_QUEUE_FULL || i != 4) { 1022 printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n", 1023 smp_processor_id(), ret, i); 1024 goto done; 1025 } 1026 1027 for (i = 0; i < 6; i++) { 1028 m = gru_get_next_message(&mqd); 1029 if (!m || m[8] != i) 1030 break; 1031 gru_free_message(&mqd, m); 1032 } 1033 if (i != 4) { 1034 printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n", 1035 smp_processor_id(), i, m, m ? m[8] : -1); 1036 goto done; 1037 } 1038 ret = 0; 1039 1040done: 1041 kfree(p); 1042 return ret; 1043} 1044 1045static int quicktest2(unsigned long arg) 1046{ 1047 static DECLARE_COMPLETION(cmp); 1048 unsigned long han; 1049 int blade_id = 0; 1050 int numcb = 4; 1051 int ret = 0; 1052 unsigned long *buf; 1053 void *cb0, *cb; 1054 struct gru_control_block_status *gen; 1055 int i, k, istatus, bytes; 1056 1057 bytes = numcb * 4 * 8; 1058 buf = kmalloc(bytes, GFP_KERNEL); 1059 if (!buf) 1060 return -ENOMEM; 1061 1062 ret = -EBUSY; 1063 han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp); 1064 if (!han) 1065 goto done; 1066 1067 gru_lock_async_resource(han, &cb0, NULL); 1068 memset(buf, 0xee, bytes); 1069 for (i = 0; i < numcb; i++) 1070 gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0, 1071 XTYPE_DW, 4, 1, IMA_INTERRUPT); 1072 1073 ret = 0; 1074 k = numcb; 1075 do { 1076 gru_wait_async_cbr(han); 1077 for (i = 0; i < numcb; i++) { 1078 cb = cb0 + i * GRU_HANDLE_STRIDE; 1079 istatus = gru_check_status(cb); 1080 if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS) 1081 break; 1082 } 1083 if (i == numcb) 1084 continue; 1085 if (istatus != CBS_IDLE) { 1086 printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i); 1087 ret = -EFAULT; 1088 } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] || 1089 buf[4 * i + 3]) { 1090 printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n", 1091 smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]); 1092 ret = -EIO; 1093 } 1094 k--; 1095 gen = cb; 1096 gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */ 1097 } while (k); 1098 BUG_ON(cmp.done); 1099 1100 gru_unlock_async_resource(han); 1101 gru_release_async_resources(han); 1102done: 1103 kfree(buf); 1104 return ret; 1105} 1106 1107#define BUFSIZE 200 1108static int quicktest3(unsigned long arg) 1109{ 1110 char buf1[BUFSIZE], buf2[BUFSIZE]; 1111 int ret = 0; 1112 1113 memset(buf2, 0, sizeof(buf2)); 1114 memset(buf1, get_cycles() & 255, sizeof(buf1)); 1115 gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE); 1116 if (memcmp(buf1, buf2, BUFSIZE)) { 1117 printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id()); 1118 ret = -EIO; 1119 } 1120 return ret; 1121} 1122 1123/* 1124 * Debugging only. User hook for various kernel tests 1125 * of driver & gru. 1126 */ 1127int gru_ktest(unsigned long arg) 1128{ 1129 int ret = -EINVAL; 1130 1131 switch (arg & 0xff) { 1132 case 0: 1133 ret = quicktest0(arg); 1134 break; 1135 case 1: 1136 ret = quicktest1(arg); 1137 break; 1138 case 2: 1139 ret = quicktest2(arg); 1140 break; 1141 case 3: 1142 ret = quicktest3(arg); 1143 break; 1144 case 99: 1145 ret = gru_free_kernel_contexts(); 1146 break; 1147 } 1148 return ret; 1149 1150} 1151 1152int gru_kservices_init(void) 1153{ 1154 return 0; 1155} 1156 1157void gru_kservices_exit(void) 1158{ 1159 if (gru_free_kernel_contexts()) 1160 BUG(); 1161} 1162 1163