1char netlib_id[]="\ 2@(#)netlib.c (c) Copyright 1993-2012 Hewlett-Packard Company. Version 2.6.0"; 3 4 5/****************************************************************/ 6/* */ 7/* netlib.c */ 8/* */ 9/* the common utility routines available to all... */ 10/* */ 11/* establish_control() establish the control socket */ 12/* calibrate_local_cpu() do local cpu calibration */ 13/* calibrate_remote_cpu() do remote cpu calibration */ 14/* send_request() send a request to the remote */ 15/* recv_response() receive a response from remote */ 16/* send_response() send a response to the remote */ 17/* recv_request() recv a request from the remote */ 18/* dump_request() dump request contents */ 19/* dump_response() dump response contents */ 20/* cpu_start() start measuring cpu */ 21/* cpu_stop() stop measuring cpu */ 22/* calc_cpu_util() calculate the cpu utilization */ 23/* calc_service_demand() calculate the service demand */ 24/* calc_thruput() calulate the tput in units */ 25/* calibrate() really calibrate local cpu */ 26/* identify_local() print local host information */ 27/* identify_remote() print remote host information */ 28/* format_number() format the number (KB, MB,etc) */ 29/* format_units() return the format in english */ 30/* msec_sleep() sleep for some msecs */ 31/* start_timer() start a timer */ 32/* random_ip_address() select a random IP address from */ 33/* specified range */ 34/* */ 35/* the routines you get when WANT_DLPI is defined... */ 36/* ...all moved to src/nettest_dlpi.c */ 37/* */ 38/* dl_open() open a file descriptor and */ 39/* attach to the card */ 40/* dl_mtu() find the MTU of the card */ 41/* dl_bind() bind the sap do the card */ 42/* dl_connect() sender's have of connect */ 43/* dl_accpet() receiver's half of connect */ 44/* dl_set_window() set the window size */ 45/* dl_stats() retrieve statistics */ 46/* dl_send_disc() initiate disconnect (sender) */ 47/* dl_recv_disc() accept disconnect (receiver) */ 48/****************************************************************/ 49 50/****************************************************************/ 51/* */ 52/* Global include files */ 53/* */ 54/****************************************************************/ 55 56#ifdef HAVE_CONFIG_H 57#include <config.h> 58#endif 59 60 /* It would seem that most of the includes being done here from 61 "sys/" actually have higher-level wrappers at just /usr/include. 62 This is based on a spot-check of a couple systems at my disposal. 63 If you have trouble compiling you may want to add "sys/" raj 64 10/95 */ 65#include <limits.h> 66#include <signal.h> 67#ifdef HAVE_SYSCALL_H 68#include <syscall.h> 69#endif 70#ifdef MPE 71# define NSIG _NSIG 72#endif /* MPE */ 73#include <sys/types.h> 74#include <fcntl.h> 75#include <stdio.h> 76#include <stdlib.h> 77#include <math.h> 78#include <string.h> 79#include <assert.h> 80#ifdef HAVE_ENDIAN_H 81#include <endian.h> 82#endif 83 84 85#ifndef WIN32 86 /* at some point, I would like to get rid of all these "sys/" 87 includes where appropriate. if you have a system that requires/ 88 them, speak now, or your system may not compile later revisions of 89 netperf. raj 1/96 */ 90#include <unistd.h> 91#include <sys/stat.h> 92#include <sys/times.h> 93#ifndef MPE 94#include <time.h> 95#include <sys/time.h> 96#endif /* MPE */ 97#include <sys/socket.h> 98#include <netinet/in.h> 99#include <arpa/inet.h> 100#include <netdb.h> 101#include <errno.h> 102#include <sys/utsname.h> 103#if !defined(MPE) && !defined(__VMS) 104#include <sys/param.h> 105#endif /* MPE */ 106 107#else /* WIN32 */ 108 109#include <process.h> 110#include <time.h> 111#include <winsock2.h> 112#define netperf_socklen_t socklen_t 113#include <windows.h> 114#include <mmsystem.h> 115/* the only time someone should need to define DONT_IPV6 in the 116 "sources" file is if they are trying to compile on Windows 2000 or 117 NT4 and I suspect this may not be their only problem :) */ 118#ifndef DONT_IPV6 119#include <ws2tcpip.h> 120#endif 121 122#include <windows.h> 123 124#define SIGALRM (14) 125#define sleep(x) Sleep((x)*1000) 126 127#endif /* WIN32 */ 128 129#ifdef HAVE_UNAME 130#include <sys/utsname.h> 131#endif 132 133#ifdef _AIX 134#include <sys/select.h> 135#include <sys/sched.h> 136#include <sys/pri.h> 137#define PRIORITY PRI_LOW 138#else/* _AIX */ 139#ifdef __sgi 140#include <sys/prctl.h> 141#include <sys/schedctl.h> 142#define PRIORITY NDPLOMIN 143#endif /* __sgi */ 144#endif /* _AIX */ 145 146 147#ifdef HAVE_MPCTL 148#include <sys/mpctl.h> 149#endif 150 151#if !defined(HAVE_GETADDRINFO) || !defined(HAVE_GETNAMEINFO) 152# include "missing/getaddrinfo.h" 153#endif 154 155 156#include "hist.h" 157 158/****************************************************************/ 159/* */ 160/* Local Include Files */ 161/* */ 162/****************************************************************/ 163#define NETLIB 164#include "netlib.h" 165#include "netsh.h" 166#include "netcpu.h" 167#include "netperf_version.h" 168 169/****************************************************************/ 170/* */ 171/* Global constants, macros and variables */ 172/* */ 173/****************************************************************/ 174 175#if defined(WIN32) || defined(__VMS) 176struct timezone { 177 int dummy ; 178 } ; 179#ifndef __VMS 180SOCKET win_kludge_socket = INVALID_SOCKET; 181SOCKET win_kludge_socket2 = INVALID_SOCKET; 182#endif /* __VMS */ 183#endif /* WIN32 || __VMS */ 184 185#ifndef LONG_LONG_MAX 186#define LONG_LONG_MAX 9223372036854775807LL 187#endif /* LONG_LONG_MAX */ 188 189 /* older versions of netperf knew about the HP kernel IDLE counter. 190 this is now obsolete - in favor of either pstat(), times, or a 191 process-level looper process. we also now require support for the 192 "long" integer type. raj 4/95. */ 193 194int 195 lib_num_loc_cpus, /* the number of cpus in the system */ 196 lib_num_rem_cpus; /* how many we think are in the remote */ 197 198struct cpu_stats_struct 199 lib_local_cpu_stats, 200 lib_remote_cpu_stats; 201 202#define PAGES_PER_CHILD 2 203 204int lib_use_idle; 205int cpu_method; 206 207struct timeval time1, time2; 208struct timezone tz; 209float lib_elapsed, 210 lib_local_maxrate, 211 lib_remote_maxrate; 212 213float lib_local_per_cpu_util[MAXCPUS]; 214int lib_cpu_map[MAXCPUS]; 215 216int *request_array; 217int *response_array; 218 219/* INVALID_SOCKET == INVALID_HANDLE_VALUE == (unsigned int)(~0) == -1 */ 220SOCKET netlib_control = INVALID_SOCKET; 221SOCKET server_sock = INVALID_SOCKET; 222int control_family = AF_UNSPEC; 223 224/* global variables to hold the value for processor affinity */ 225int local_proc_affinity = -1,remote_proc_affinity = -1; 226 227/* these are to allow netperf to be run easily through those evil, 228 end-to-end breaking things known as firewalls */ 229char local_data_port[10]; 230char remote_data_port[10]; 231 232char *local_data_address=NULL; 233char *remote_data_address=NULL; 234 235char *local_sysname, *remote_sysname; 236char *local_release, *remote_release; 237char *local_version, *remote_version; 238char *local_machine, *remote_machine; 239 240int local_data_family=AF_UNSPEC; 241int remote_data_family=AF_UNSPEC; 242 243char *netperf_version; 244 245enum netperf_output_modes netperf_output_mode = HUMAN; 246 247/* in the past, I was overlaying a structure on an array of ints. now 248 I am going to have a "real" structure, and point an array of ints 249 at it. the real structure will be forced to the same alignment as 250 the type "double." this change will mean that pre-2.1 netperfs 251 cannot be mixed with 2.1 and later. raj 11/95 */ 252 253union netperf_request_struct netperf_request; 254union netperf_response_struct netperf_response; 255 256FILE *where; 257 258char libfmt = '?'; 259 260#ifdef WIN32 261HANDLE hAlarm = INVALID_HANDLE_VALUE; 262int timed_out=0; 263#endif 264 265int times_up; 266 267#ifdef WIN32 268 /* we use a getopt implementation from net.sources */ 269/* 270 * get option letter from argument vector 271 */ 272int 273 opterr = 1, /* should error messages be printed? */ 274 optind = 1, /* index into parent argv vector */ 275 optopt; /* character checked for validity */ 276char 277 *optarg; /* argument associated with option */ 278 279#define EMSG "" 280 281#endif /* WIN32 */ 282 283static int measuring_cpu; 284int 285netlib_get_page_size(void) { 286 287 /* not all systems seem to have the sysconf for page size. for 288 those which do not, we will assume that the page size is 8192 289 bytes. this should be more than enough to be sure that there is 290 no page or cache thrashing by looper processes on MP 291 systems. otherwise that's really just too bad - such systems 292 should define _SC_PAGE_SIZE - raj 4/95 */ 293 294#ifndef _SC_PAGE_SIZE 295#ifdef WIN32 296 297SYSTEM_INFO SystemInfo; 298 299 GetSystemInfo(&SystemInfo); 300 301 return SystemInfo.dwPageSize; 302#else 303 return(8192L); 304#endif /* WIN32 */ 305#else 306 return(sysconf(_SC_PAGE_SIZE)); 307#endif /* _SC_PAGE_SIZE */ 308 309} 310 311 312 313#ifdef WANT_INTERVALS 314#ifdef WIN32 315HANDLE WinTimer; 316UINT timerRes; 317void stop_itimer() 318{ 319 CancelWaitableTimer(WinTimer); 320 CloseHandle(WinTimer); 321 timeEndPeriod(timerRes); 322} 323#else 324static unsigned int usec_per_itvl; 325 326 327void 328stop_itimer() 329 330{ 331 332 struct itimerval new_interval; 333 struct itimerval old_interval; 334 335 new_interval.it_interval.tv_sec = 0; 336 new_interval.it_interval.tv_usec = 0; 337 new_interval.it_value.tv_sec = 0; 338 new_interval.it_value.tv_usec = 0; 339 if (setitimer(ITIMER_REAL,&new_interval,&old_interval) != 0) { 340 /* there was a problem arming the interval timer */ 341 perror("netperf: setitimer"); 342 exit(1); 343 } 344 return; 345} 346#endif /* WIN32 */ 347#endif /* WANT_INTERVALS */ 348 349 350 351#ifdef WIN32 352static void 353error(char *pch) 354{ 355 if (!opterr) { 356 return; /* without printing */ 357 } 358 fprintf(stderr, "%s: %s: %c\n", 359 (NULL != program) ? program : "getopt", pch, optopt); 360} 361 362int 363getopt(int argc, char **argv, char *ostr) 364{ 365 static char *place = EMSG; /* option letter processing */ 366 register char *oli; /* option letter list index */ 367 368 if (!*place) { 369 /* update scanning pointer */ 370 if (optind >= argc || *(place = argv[optind]) != '-' || !*++place) { 371 return EOF; 372 } 373 if (*place == '-') { 374 /* found "--" */ 375 ++optind; 376 place = EMSG ; /* Added by shiva for Netperf */ 377 return EOF; 378 } 379 } 380 381 /* option letter okay? */ 382 if ((optopt = (int)*place++) == (int)':' 383 || !(oli = strchr(ostr, optopt))) { 384 if (!*place) { 385 ++optind; 386 } 387 error("illegal option"); 388 return BADCH; 389 } 390 if (*++oli != ':') { 391 /* don't need argument */ 392 optarg = NULL; 393 if (!*place) 394 ++optind; 395 } else { 396 /* need an argument */ 397 if (*place) { 398 optarg = place; /* no white space */ 399 } else if (argc <= ++optind) { 400 /* no arg */ 401 place = EMSG; 402 error("option requires an argument"); 403 return BADCH; 404 } else { 405 optarg = argv[optind]; /* white space */ 406 } 407 place = EMSG; 408 ++optind; 409 } 410 return optopt; /* return option letter */ 411} 412#endif /* WIN32 */ 413 414/*---------------------------------------------------------------------------- 415 * WIN32 implementation of perror, does not deal very well with WSA errors 416 * The stdlib.h version of perror only deals with the ancient XENIX error codes. 417 * 418 * +*+SAF Why can't all WSA errors go through GetLastError? Most seem to... 419 *--------------------------------------------------------------------------*/ 420 421#ifdef WIN32 422void PrintWin32Error(FILE *stream, LPSTR text) 423{ 424 LPSTR szTemp; 425 DWORD dwResult; 426 DWORD dwError; 427 428 dwError = GetLastError(); 429 dwResult = FormatMessage( 430 FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM |FORMAT_MESSAGE_ARGUMENT_ARRAY, 431 NULL, 432 dwError, 433 LANG_NEUTRAL, 434 (LPTSTR)&szTemp, 435 0, 436 NULL ); 437 438 if (dwResult) 439 fprintf(stream, "%s: %s\n", text, szTemp); 440 else 441 fprintf(stream, "%s: error 0x%x\n", text, dwError); 442 fflush(stream); 443 444 if (szTemp) 445 LocalFree((HLOCAL)szTemp); 446} 447#endif /* WIN32 */ 448 449char * 450nsec_enabled_to_str(int enabled) { 451 switch (enabled) { 452 case NSEC_UNKNOWN: 453 return("Unknown"); 454 case NSEC_DISABLED: 455 return("Disabled"); 456 case NSEC_PERMISSIVE: 457 return("Permissive"); 458 case NSEC_ENFORCING: 459 return("Enforcing"); 460 default: 461 return("UNKNOWN MODE"); 462 } 463} 464 465char * nsec_type_to_str(int type) { 466 switch (type) { 467 case NSEC_TYPE_UNKNOWN: 468 return("Unknown"); 469 case NSEC_TYPE_SELINUX: 470 return("SELinux"); 471 default: 472 return("UNKNOWN TYPE"); 473 } 474} 475 476 477char * 478inet_ttos(int type) 479{ 480 switch (type) { 481 case SOCK_DGRAM: 482 return("SOCK_DGRAM"); 483 break; 484 case SOCK_STREAM: 485 return("SOCK_STREAM"); 486 break; 487#ifdef SOCK_DCCP 488 case SOCK_DCCP: 489 return("SOCK_DCCP"); 490#endif 491#ifdef SOCK_SEQPACKET 492 case SOCK_SEQPACKET: 493 return("SOCK_SEQPACKET"); 494#endif 495 default: 496 return("SOCK_UNKNOWN"); 497 } 498} 499 500 501 502 503char unknown[32]; 504 505char * 506inet_ptos(int protocol) { 507 switch (protocol) { 508 case IPPROTO_TCP: 509 return("IPPROTO_TCP"); 510 break; 511 case IPPROTO_UDP: 512 return("IPPROTO_UDP"); 513 break; 514#if defined(IPPROTO_SCTP) 515 case IPPROTO_SCTP: 516 return("IPPROTO_SCTP"); 517 break; 518#endif 519#if defined(IPPROTO_DCCP) 520 case IPPROTO_DCCP: 521 return "IPPROTO_DCCP"; 522 break; 523#endif 524#if defined(IPPROTO_UDPLITE) 525 case IPPROTO_UDPLITE: 526 return "IPPROTO_UDPLITE"; 527 break; 528#endif 529 default: 530 snprintf(unknown,sizeof(unknown),"IPPROTO_UNKNOWN(%d)",protocol); 531 return(unknown); 532 } 533} 534 535/* one of these days, this should not be required */ 536#ifndef AF_INET_SDP 537#define AF_INET_SDP 27 538#define PF_INET_SDP AF_INET_SDP 539#endif 540 541char * 542inet_ftos(int family) 543{ 544 switch(family) { 545 case AF_INET: 546 return("AF_INET"); 547#if defined(AF_INET6) 548 case AF_INET6: 549 return("AF_INET6"); 550#endif 551#if defined(AF_INET_SDP) 552 case AF_INET_SDP: 553 return("AF_INET_SDP"); 554#endif 555#if defined(AF_RDS) 556 case AF_RDS: 557 return("AF_RDS"); 558#endif 559 default: 560 return("AF_UNSPEC"); 561 } 562} 563 564int 565inet_nton(int af, const void *src, char *dst, int cnt) 566 567{ 568 569 switch (af) { 570 case AF_INET: 571 /* magic constants again... :) */ 572 if (cnt >= 4) { 573 memcpy(dst,src,4); 574 return 4; 575 } 576 else { 577 Set_errno(ENOSPC); 578 return(-1); 579 } 580 break; 581#if defined(AF_INET6) 582 case AF_INET6: 583 if (cnt >= 16) { 584 memcpy(dst,src,16); 585 return(16); 586 } 587 else { 588 Set_errno(ENOSPC); 589 return(-1); 590 } 591 break; 592#endif 593#if defined(AF_RDS) 594 case AF_RDS: 595 if (cnt >= 4) { 596 memcpy(dst,src,4); 597 return 4; 598 } 599#endif 600 default: 601 Set_errno(EAFNOSUPPORT); 602 return(-1); 603 } 604} 605 606double 607ntohd(double net_double) 608 609{ 610 /* we rely on things being nicely packed */ 611 union { 612 double whole_thing; 613 unsigned int words[2]; 614 unsigned char bytes[8]; 615 } conv_rec; 616 617 unsigned char scratch; 618 int i; 619 620 /* on those systems where ntohl is a no-op, we want to return the 621 original value, unchanged */ 622 623 if (ntohl(1L) == 1L) { 624 return(net_double); 625 } 626 627 conv_rec.whole_thing = net_double; 628 629 /* we know that in the message passing routines that ntohl will have 630 been called on the 32 bit quantities. we need to put those back 631 the way they belong before we swap */ 632 conv_rec.words[0] = htonl(conv_rec.words[0]); 633 conv_rec.words[1] = htonl(conv_rec.words[1]); 634 635 /* now swap */ 636 for (i=0; i<= 3; i++) { 637 scratch = conv_rec.bytes[i]; 638 conv_rec.bytes[i] = conv_rec.bytes[7-i]; 639 conv_rec.bytes[7-i] = scratch; 640 } 641 642#if defined(__FLOAT_WORD_ORDER) && defined(__BYTE_ORDER) 643 if (__FLOAT_WORD_ORDER != __BYTE_ORDER) { 644 /* Fixup mixed endian floating point machines */ 645 unsigned int scratch = conv_rec.words[0]; 646 conv_rec.words[0] = conv_rec.words[1]; 647 conv_rec.words[1] = scratch; 648 } 649#endif 650 651 return(conv_rec.whole_thing); 652 653} 654 655double 656htond(double host_double) 657 658{ 659 /* we rely on things being nicely packed */ 660 union { 661 double whole_thing; 662 unsigned int words[2]; 663 unsigned char bytes[8]; 664 } conv_rec; 665 666 unsigned char scratch; 667 int i; 668 669 /* on those systems where ntohl is a no-op, we want to return the 670 original value, unchanged */ 671 672 if (ntohl(1L) == 1L) { 673 return(host_double); 674 } 675 676 conv_rec.whole_thing = host_double; 677 678 /* now swap */ 679 for (i=0; i<= 3; i++) { 680 scratch = conv_rec.bytes[i]; 681 conv_rec.bytes[i] = conv_rec.bytes[7-i]; 682 conv_rec.bytes[7-i] = scratch; 683 } 684 685#if defined(__FLOAT_WORD_ORDER) && defined(__BYTE_ORDER) 686 if (__FLOAT_WORD_ORDER != __BYTE_ORDER) { 687 /* Fixup mixed endian floating point machines */ 688 unsigned int scratch = conv_rec.words[0]; 689 conv_rec.words[0] = conv_rec.words[1]; 690 conv_rec.words[1] = scratch; 691 } 692#endif 693 694 /* we know that in the message passing routines htonl will be called 695 on the 32 bit quantities. we need to set things up so that when 696 this happens, the proper order will go out on the network */ 697 conv_rec.words[0] = htonl(conv_rec.words[0]); 698 conv_rec.words[1] = htonl(conv_rec.words[1]); 699 700 return(conv_rec.whole_thing); 701 702} 703 704 705 706/* The original patch from Google used lrand48, but I have been 707 informed that is not easily available under Windows. So, rather 708 than have some #ifdefs here I'll just simplistically replace 709 lrand48 with rand(), which should be "good enough" at some point it 710 may be sufficient to just call rand() directly rather than call 711 this raj 20101130 */ 712 713unsigned int 714rand32(){ 715 return (unsigned int)rand() * 2 + rand() % 2; 716} 717 718/* this routine will set the ip address of the sockaddr in the 719 addrinfo to a random number in range, based on the address 720 family. for grins, we will sanity check the value of mask_len 721 against the address family. initial version from google, 722 enhancements by raj 20101129 */ 723void 724random_ip_address(struct addrinfo *res, int mask_len) 725{ 726 switch(res->ai_family) { 727 case AF_INET: { 728 struct sockaddr_in *foo = (struct sockaddr_in *)res->ai_addr; 729 unsigned int addr = ntohl(foo->sin_addr.s_addr); 730 unsigned int mask = ((unsigned int)1 << (32 - mask_len)) - 1; 731 732 if ((mask_len < 0) || (mask_len > 32)) { 733 fprintf(where, 734 "Mask length must be between 0 and 32 inclusive for AF_INET\n"); 735 fflush(where); 736 exit(-1); 737 } 738 739 addr = ntohl(foo->sin_addr.s_addr); 740 do { 741 addr = (addr & ~mask) | (rand32() & mask); 742 } while ((addr & 0xff) == 0xff); 743 foo->sin_addr.s_addr = htonl(addr); 744 break; 745 } 746#if defined(AF_INET6) 747 case AF_INET6: { 748 struct sockaddr_in6 *foo = (struct sockaddr_in6 *)res->ai_addr; 749 750 unsigned int i, len; 751 unsigned int *addr = (unsigned int *)&(foo->sin6_addr.s6_addr); 752 unsigned int mask; 753 754 if ((mask_len < 0) || (mask_len > 128)) { 755 fprintf(where, 756 "Mask length must be between 0 and 128 inclusive for AF_INET\n"); 757 fflush(where); 758 exit(-1); 759 } 760 761 for (i = 0; i < 4; i ++){ 762 addr[i] = ntohl(addr[i]); 763 len = mask_len - i * 32; 764 len = ((len < 32) ? len : 32); 765 len = ((len > 0) ? len : 0); 766 mask = ((unsigned int)1 << (32 - len)) - 1; 767 addr[i] = (addr[i] & ~mask) | (rand32() & mask); 768 addr[i] = htonl(addr[i]); 769 } 770 break; 771 } 772#endif 773 default: 774 fprintf(where, 775 "Unexpected Address Family of %u\n",res->ai_family); 776 fflush(where); 777 exit(-1); 778 } 779} 780 781#if defined(HAVE_SENDFILE) 782int netperf_sendfile(SOCKET send_socket, struct ring_elt *send_ring) { 783 784 int len; 785 int ret = 0; 786 787#if defined(__linux) || defined(__sun) 788 off_t scratch_offset; /* the linux sendfile() call will update 789 the offset variable, which is 790 something we do _not_ want to happen 791 to the value in the send_ring! so, we 792 have to use a scratch variable. */ 793#endif /* __linux || defined(__sun) */ 794 795#if defined (__sun) 796 size_t scratch_len; /* the sun sendfilev() needs a place to 797 tell us how many bytes were written, 798 even though it also returns the value */ 799 sendfilevec_t sv; 800#endif /* __sun */ 801 802 /* you can look at netlib.h for a description of the fields we 803 are passing to sendfile(). 08/2000 */ 804#if defined(__linux) 805 scratch_offset = send_ring->offset; 806 len=sendfile(send_socket, 807 send_ring->fildes, 808 &scratch_offset, /* modified after the call! */ 809 send_ring->length); 810#elif defined (__sun) 811 /* We must call with SFV_NOWAIT and a large file size (>= 16MB) 812 to get zero-copy, as well as compiling with 813 -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 */ 814 sv.sfv_fd = send_ring->fildes; 815 sv.sfv_flag = SFV_NOWAIT; 816 sv.sfv_off = send_ring->offset; 817 sv.sfv_len = send_ring->length; 818 len = sendfilev(send_socket, &sv, 1, &scratch_len); 819#elif defined(__FreeBSD__) 820 /* so close to HP-UX and yet so far away... :) */ 821 ret = sendfile(send_ring->fildes, 822 send_socket, 823 send_ring->offset, 824 send_ring->length, 825 NULL, 826 (off_t *)&len, 827 send_ring->flags); 828#elif defined(USE_OSX) 829 len = send_ring->length; 830 ret = sendfile(send_ring->fildes, 831 send_socket, 832 send_ring->offset, 833 (off_t *)&len, 834 NULL, 835 send_ring->flags); 836#else /* original sendile HP-UX */ 837 len=sendfile(send_socket, 838 send_ring->fildes, 839 send_ring->offset, 840 send_ring->length, 841 send_ring->hdtrl, 842 send_ring->flags); 843#endif 844 845 /* for OSX and FreeBSD, a non-zero ret means something failed. 846 I would hope that the length fields are set to -1 or the 847 like, but at the moment I do not know I can count on 848 that. for other platforms, ret will be set to zero and we can 849 rely directly on len. raj 2013-05-01 */ 850 if (ret != 0) 851 return -1; 852 else 853 return len; 854 855} 856#endif 857 858 859/* one of these days, this should be abstracted-out just like the CPU 860 util stuff. raj 2005-01-27 */ 861int 862get_num_cpus() 863 864{ 865 866 /* on HP-UX, even when we use the looper procs we need the pstat */ 867 /* call */ 868 869 int temp_cpus; 870 871#ifdef __hpux 872#include <sys/pstat.h> 873 874 struct pst_dynamic psd; 875 876 if (pstat_getdynamic((struct pst_dynamic *)&psd, 877 (size_t)sizeof(psd), (size_t)1, 0) != -1) { 878 temp_cpus = psd.psd_proc_cnt; 879 } 880 else { 881 temp_cpus = 1; 882 } 883 884#else 885 /* MW: <unistd.h> was included for non-Windows systems above. */ 886 /* Thus if _SC_NPROC_ONLN is defined, we should be able to use sysconf. */ 887#ifdef _SC_NPROCESSORS_ONLN 888 temp_cpus = sysconf(_SC_NPROCESSORS_ONLN); 889 890#ifdef USE_PERFSTAT 891 temp_cpus = perfstat_cpu(NULL,NULL, sizeof(perfstat_cpu_t), 0); 892#endif /* USE_PERFSTAT */ 893 894#else /* no _SC_NPROCESSORS_ONLN */ 895 896#ifdef WIN32 897 SYSTEM_INFO SystemInfo; 898 GetSystemInfo(&SystemInfo); 899 900 temp_cpus = SystemInfo.dwNumberOfProcessors; 901#else 902 /* we need to know some other ways to do this, or just fall-back on 903 a global command line option - raj 4/95 */ 904 temp_cpus = shell_num_cpus; 905#endif /* WIN32 */ 906#endif /* _SC_NPROCESSORS_ONLN */ 907#endif /* __hpux */ 908 909 if (temp_cpus > MAXCPUS) { 910 fprintf(where, 911 "Sorry, this system has more CPUs (%d) than I can handle (%d).\n" 912 "Please alter MAXCPUS in netlib.h and recompile.\n", 913 temp_cpus, 914 MAXCPUS); 915 fflush(where); 916 exit(1); 917 } 918 919 return(temp_cpus); 920 921} 922 923#ifdef WIN32 924#ifdef __GNUC__ 925 #define S64_SUFFIX(x) x##LL 926#else 927 #define S64_SUFFIX(x) x##i64 928#endif 929 930/* 931 * Number of 100 nanosecond units from 1/1/1601 to 1/1/1970 932 */ 933#define EPOCH_BIAS S64_SUFFIX(116444736000000000) 934 935/* 936 * Union to facilitate converting from FILETIME to unsigned __int64 937 */ 938typedef union { 939 unsigned __int64 ft_scalar; 940 FILETIME ft_struct; 941} FT; 942 943void 944gettimeofday( struct timeval *tv , struct timezone *not_used ) 945{ 946 FT nt_time; 947 __int64 UnixTime; /* microseconds since 1/1/1970 */ 948 949 GetSystemTimeAsFileTime( &(nt_time.ft_struct) ); 950 951 UnixTime = ((nt_time.ft_scalar - EPOCH_BIAS) / S64_SUFFIX(10)); 952 tv->tv_sec = (long)(time_t)(UnixTime / S64_SUFFIX(1000000)); 953 tv->tv_usec = (unsigned long)(UnixTime % S64_SUFFIX(1000000)); 954} 955#endif /* WIN32 */ 956 957 958 /* this routine will disable any running timer */ 959void 960stop_timer() 961{ 962#ifndef WIN32 963 alarm(0); 964#else 965 /* at some point we may need some win32 equivalent */ 966 if (hAlarm != (HANDLE) INVALID_HANDLE_VALUE) { 967 SetEvent(hAlarm); 968 } 969#endif /* WIN32 */ 970 971} 972 973 974 975/************************************************************************/ 976/* */ 977/* signal catcher */ 978/* */ 979/************************************************************************/ 980#ifndef WIN32 981void 982#if defined(__hpux) 983catcher(sig, code, scp) 984 int sig; 985 int code; 986 struct sigcontext *scp; 987#else 988catcher(int sig) 989#endif /* __hpux || __VMS */ 990{ 991 992#ifdef __hpux 993 if (debug > 2) { 994 fprintf(where,"caught signal %d ",sig); 995 if (scp) { 996 fprintf(where,"while in syscall %d\n", 997 scp->sc_syscall); 998 } 999 else { 1000 fprintf(where,"null scp\n"); 1001 } 1002 fflush(where); 1003 } 1004#endif /* RAJ_DEBUG */ 1005 1006 switch(sig) { 1007 1008 case SIGINT: 1009 times_up = 1; 1010 break; 1011 case SIGALRM: 1012 if (--test_len_ticks == 0) { 1013 /* the test is over */ 1014 if (times_up != 0) { 1015 fprintf(where,"catcher: timer popped with times_up != 0\n"); 1016 fflush(where); 1017 } 1018 times_up = 1; 1019#if defined(WANT_INTERVALS) && !defined(WANT_SPIN) 1020 stop_itimer(); 1021 /* we should also stop the normal test timer lest it fire at an 1022 inopportune moment - we do not know if we got here off the 1023 interval timer or the test timer... */ 1024 stop_timer(); 1025#endif /* WANT_INTERVALS */ 1026 break; 1027 } 1028 else { 1029#ifdef WANT_INTERVALS 1030#ifdef __hpux 1031 /* the test is not over yet and we must have been using the 1032 interval timer. if we were in SYS_SIGSUSPEND we want to 1033 re-start the system call. Otherwise, we want to get out of 1034 the sigsuspend call. I NEED TO KNOW HOW TO DO THIS FOR OTHER 1035 OPERATING SYSTEMS. If you know how, please let me know. rick 1036 jones <rick.jones2@hp.com> */ 1037 if (scp->sc_syscall != SYS_SIGSUSPEND) { 1038 if (debug > 2) { 1039 fprintf(where, 1040 "catcher: Time to send burst > interval!\n"); 1041 fflush(where); 1042 } 1043 scp->sc_syscall_action = SIG_RESTART; 1044 } 1045#endif /* __hpux */ 1046#else /* WANT_INTERVALS */ 1047 fprintf(where, 1048 "catcher: interval timer running unexpectedly!\n"); 1049 fflush(where); 1050 times_up = 1; 1051#endif /* WANT_INTERVALS */ 1052 break; 1053 } 1054 } 1055 return; 1056} 1057#endif /* WIN32 */ 1058 1059void 1060install_signal_catchers() 1061 1062{ 1063 /* just a simple little routine to catch a bunch of signals */ 1064 1065#ifndef WIN32 1066 struct sigaction action; 1067 int i; 1068 1069 fprintf(where,"installing catcher for all signals\n"); 1070 fflush(where); 1071 1072 sigemptyset(&(action.sa_mask)); 1073 action.sa_handler = catcher; 1074 1075#ifdef SA_INTERRUPT 1076 action.sa_flags = SA_INTERRUPT; 1077#else /* SA_INTERRUPT */ 1078 action.sa_flags = 0; 1079#endif /* SA_INTERRUPT */ 1080 1081 1082 for (i = 1; i <= NSIG; i++) { 1083 switch (i) { 1084 case SIGALRM: 1085 case SIGPROF: 1086 case SIGSTOP: 1087 case SIGKILL: 1088 break; 1089 default: 1090 if (sigaction(i,&action,NULL) != 0) { 1091 fprintf(where, 1092 "Could not install signal catcher for sig %d, errno %d\n", 1093 i, 1094 errno); 1095 fflush(where); 1096 1097 } 1098 } 1099 } 1100#else 1101 return; 1102#endif /* WIN32 */ 1103} 1104 1105 1106#ifdef WIN32 1107#define SIGALRM (14) 1108void 1109emulate_alarm( int seconds ) 1110{ 1111 DWORD ErrorCode; 1112 DWORD HandlesClosedFlags = 0; 1113 1114 /* Wait on this event for parm seconds. */ 1115 1116 ErrorCode = WaitForSingleObject(hAlarm, seconds*1000); 1117 if (ErrorCode == WAIT_FAILED) 1118 { 1119 perror("WaitForSingleObject failed"); 1120 exit(1); 1121 } 1122 1123 if (ErrorCode == WAIT_TIMEOUT) 1124 { 1125 /* WaitForSingleObject timed out; this means the timer 1126 wasn't canceled. */ 1127 1128 times_up = 1; 1129 1130 /* Give the other threads time to notice that times_up has 1131 changed state before taking the harsh step of closing the 1132 sockets. */ 1133 timed_out=0; 1134 if (WaitForSingleObject(hAlarm, PAD_TIME/2*1000) == 1135 WAIT_TIMEOUT) { 1136 timed_out=1; 1137 /* We have yet to find a good way to fully emulate 1138 the effects of signals and getting EINTR from 1139 system calls under winsock, so what we do here is 1140 close the socket out from under the other thread. 1141 It is rather kludgy, but should be sufficient to 1142 get this puppy shipped. The concept can be 1143 attributed/blamed :) on Robin raj 1/96 */ 1144 1145 if (win_kludge_socket != INVALID_SOCKET) { 1146 HandlesClosedFlags |= 1; 1147 closesocket(win_kludge_socket); 1148 } 1149 if (win_kludge_socket2 != INVALID_SOCKET) { 1150 HandlesClosedFlags |= 2; 1151 closesocket(win_kludge_socket2); 1152 } 1153 } 1154 if(debug) { 1155 fprintf(where, 1156 "emulate_alarm - HandlesClosedFlags: %x\n", 1157 HandlesClosedFlags); 1158 fflush(where); 1159 } 1160 } 1161} 1162 1163 1164#endif /* WIN32 */ 1165 1166void 1167start_timer(int time) 1168{ 1169 1170#ifdef WIN32 1171 /*+*+SAF What if StartTimer is called twice without the first timer */ 1172 /*+*+SAF expiring? */ 1173 1174 DWORD thread_id ; 1175 HANDLE tHandle; 1176 1177 if (hAlarm == (HANDLE) INVALID_HANDLE_VALUE) 1178 { 1179 /* Create the Alarm event object */ 1180 hAlarm = CreateEvent( 1181 (LPSECURITY_ATTRIBUTES) NULL, /* no security */ 1182 FALSE, /* auto reset event */ 1183 FALSE, /* init. state = reset */ 1184 (void *)NULL); /* unnamed event object */ 1185 if (hAlarm == (HANDLE) INVALID_HANDLE_VALUE) 1186 { 1187 perror("CreateEvent failure"); 1188 exit(1); 1189 } 1190 } 1191 else 1192 { 1193 ResetEvent(hAlarm); 1194 } 1195 1196 1197 tHandle = CreateThread(0, 1198 0, 1199 (LPTHREAD_START_ROUTINE)emulate_alarm, 1200 (LPVOID)(ULONG_PTR)time, 1201 0, 1202 &thread_id ) ; 1203 CloseHandle(tHandle); 1204 1205#else /* not WIN32 */ 1206 1207struct sigaction action; 1208int ret; 1209 1210if (debug) { 1211 fprintf(where,"About to start a timer for %d seconds.\n",time); 1212 fflush(where); 1213} 1214 1215 action.sa_handler = catcher; 1216 1217#ifdef SA_INTERRUPT 1218 /* on some systems (SunOS 4.blah), system calls are restarted. we do */ 1219 /* not want that */ 1220 action.sa_flags = SA_INTERRUPT; 1221#else /* SA_INTERRUPT */ 1222 action.sa_flags = 0; 1223#endif /* SA_INTERRUPT */ 1224 1225 sigemptyset(&(action.sa_mask)); 1226 sigaddset(&(action.sa_mask),SIGALRM); 1227 if (sigaction(SIGALRM, &action, NULL) < 0) { 1228 fprintf(where, 1229 "start_timer: error installing alarm handler errno %d\n", 1230 errno); 1231 fflush(where); 1232 exit(-1); 1233 } 1234 1235 sigemptyset(&(action.sa_mask)); 1236 sigaddset(&(action.sa_mask),SIGINT); 1237 if (sigaction(SIGINT, &action, NULL) < 0) { 1238 fprintf(where, 1239 "start_timer: error installing SIGINT handler errno %d\n", 1240 errno); 1241 fflush(where); 1242 exit(-1); 1243 } 1244 1245 /* this is the easy case - just set the timer for so many seconds */ 1246 ret = alarm(time); 1247 if (ret != 0) { 1248 fprintf(where, 1249 "error starting alarm timer, ret %d errno %d\n", 1250 ret, 1251 errno); 1252 fflush(where); 1253 exit(-1); 1254 } 1255#endif /* WIN32 */ 1256 1257 test_len_ticks = 1; 1258 1259} 1260 1261 1262 1263#ifdef WANT_INTERVALS 1264/* this routine will enable the interval timer and set things up so 1265 that for a timed test the test will end at the proper time. it 1266 should detect the presence of POSIX.4 timer_* routines one of these 1267 days */ 1268void 1269start_itimer(unsigned int interval_len_msec ) 1270{ 1271#ifdef WIN32 1272 LARGE_INTEGER liDueTime; 1273 TIMECAPS ptc; 1274 MMRESULT mmr; 1275 1276 /* make sure timer resolution is at least as small as interval length */ 1277 timerRes=interval_len_msec; 1278 mmr=timeGetDevCaps(&ptc, sizeof (ptc)); 1279 if (mmr==TIMERR_NOERROR){ 1280 if (interval_len_msec<ptc.wPeriodMin){ 1281 timerRes=ptc.wPeriodMin; 1282 fprintf(where, "Timer cannot be set to %dmsec. Minimum timer resolution: %d\n", interval_len_msec, ptc.wPeriodMin); 1283 fflush(where); 1284 } 1285 } 1286 /* timeBeginPeriod() affects a global Windows setting. 1287 Windows uses the lowest value (that is, highest resolution) requested by any process. */ 1288 mmr=timeBeginPeriod(timerRes); 1289 /* Create a waitable timer. */ 1290 WinTimer = CreateWaitableTimer(NULL, FALSE, "IntervalTimer"); 1291 if (NULL == WinTimer) 1292 { 1293 fprintf(where, "CreateWaitableTimer failed (%d)\n", GetLastError()); 1294 fflush(where); 1295 exit(1); 1296 } 1297 /*The time after which the state of the timer is to be set to signaled the first time, 1298 in 100 nanosecond intervals. Negative values indicate relative time. */ 1299 liDueTime.QuadPart=-10000LL*interval_len_msec; 1300 /* Set the timer to wait for interval_len_msec and periodically signal every interval_len_msec */ 1301 if (!SetWaitableTimer(WinTimer, &liDueTime, interval_len_msec, NULL, NULL, TRUE)) 1302 { 1303 fprintf(where,"SetWaitableTimer failed (%d)\n", GetLastError()); 1304 fflush(where); 1305 exit(1); 1306 } 1307#else 1308 unsigned int ticks_per_itvl; 1309 1310 struct itimerval new_interval; 1311 struct itimerval old_interval; 1312 1313 /* if -DWANT_INTERVALS was used, we will use the ticking of the 1314 itimer to tell us when the test is over. while the user will be 1315 specifying some number of milliseconds, we know that the interval 1316 timer is really in units of 1/HZ. so, to prevent the test from 1317 running "long" it would be necessary to keep this in mind when 1318 calculating the number of itimer events */ 1319 1320 ticks_per_itvl = ((interval_wate * sysconf(_SC_CLK_TCK) * 1000) / 1321 1000000); 1322 1323 if (ticks_per_itvl == 0) ticks_per_itvl = 1; 1324 1325 /* how many usecs in each interval? */ 1326 usec_per_itvl = ticks_per_itvl * (1000000 / sysconf(_SC_CLK_TCK)); 1327 1328 /* how many times will the timer pop before the test is over? */ 1329 if (test_time > 0) { 1330 /* this was a timed test */ 1331 test_len_ticks = (test_time * 1000000) / usec_per_itvl; 1332 } 1333 else { 1334 /* this was not a timed test, use MAXINT */ 1335 test_len_ticks = INT_MAX; 1336 } 1337 1338 if (debug) { 1339 fprintf(where, 1340 "setting the interval timer to %d sec %d usec test len %d ticks\n", 1341 usec_per_itvl / 1000000, 1342 usec_per_itvl % 1000000, 1343 test_len_ticks); 1344 fflush(where); 1345 } 1346 1347 /* if this was not a timed test, then we really aught to enable the 1348 signal catcher raj 2/95 */ 1349 1350 new_interval.it_interval.tv_sec = usec_per_itvl / 1000000; 1351 new_interval.it_interval.tv_usec = usec_per_itvl % 1000000; 1352 new_interval.it_value.tv_sec = usec_per_itvl / 1000000; 1353 new_interval.it_value.tv_usec = usec_per_itvl % 1000000; 1354 if (setitimer(ITIMER_REAL,&new_interval,&old_interval) != 0) { 1355 /* there was a problem arming the interval timer */ 1356 perror("netperf: setitimer"); 1357 exit(1); 1358 } 1359 #endif /* WIN32*/ 1360} 1361#endif /* WANT_INTERVALS */ 1362 1363void 1364netlib_init_cpu_map() { 1365 1366 int i; 1367#ifdef HAVE_MPCTL 1368 int num; 1369 i = 0; 1370 /* I go back and forth on whether this should be the system-wide set 1371 of calls, or if the processor set versions (sans the _SYS) should 1372 be used. at the moment I believe that the system-wide version 1373 should be used. raj 2006-04-03 */ 1374 num = mpctl(MPC_GETNUMSPUS_SYS,0,0); 1375 lib_cpu_map[i] = mpctl(MPC_GETFIRSTSPU_SYS,0,0); 1376 for (i = 1;((i < num) && (i < MAXCPUS)); i++) { 1377 lib_cpu_map[i] = mpctl(MPC_GETNEXTSPU_SYS,lib_cpu_map[i-1],0); 1378 } 1379 /* from here, we set them all to -1 because if we launch more 1380 loopers than actual CPUs, well, I'm not sure why :) */ 1381 for (; i < MAXCPUS; i++) { 1382 lib_cpu_map[i] = -1; 1383 } 1384 1385#else 1386 /* we assume that there is indeed a contiguous mapping */ 1387 for (i = 0; i < MAXCPUS; i++) { 1388 lib_cpu_map[i] = i; 1389 } 1390#endif 1391} 1392 1393 1394 1395/****************************************************************/ 1396/* */ 1397/* netlib_init() */ 1398/* */ 1399/* initialize the performance library... */ 1400/* */ 1401/****************************************************************/ 1402 1403void 1404netlib_init() 1405{ 1406 int i; 1407 1408 where = stdout; 1409 1410 request_array = (int *)(&netperf_request); 1411 response_array = (int *)(&netperf_response); 1412 1413 for (i = 0; i < MAXCPUS; i++) { 1414 lib_local_per_cpu_util[i] = -1.0; 1415 } 1416 1417 lib_local_cpu_stats.peak_cpu_id = -1; 1418 lib_local_cpu_stats.peak_cpu_util = -1.0; 1419 lib_remote_cpu_stats.peak_cpu_id = -1; 1420 lib_remote_cpu_stats.peak_cpu_util = -1.0; 1421 1422 netperf_version = strdup(NETPERF_VERSION); 1423 1424 /* on those systems where we know that CPU numbers may not start at 1425 zero and be contiguous, we provide a way to map from a 1426 contiguous, starting from 0 CPU id space to the actual CPU ids. 1427 at present this is only used for the netcpu_looper stuff because 1428 we ass-u-me that someone setting processor affinity from the 1429 netperf commandline will provide a "proper" CPU identifier. raj 1430 2006-04-03 */ 1431 1432 netlib_init_cpu_map(); 1433 1434 if (debug) { 1435 fprintf(where, 1436 "netlib_init: request_array at %p\n" 1437 "netlib_init: response_array at %p\n", 1438 request_array, 1439 response_array); 1440 fflush(where); 1441 } 1442 1443 /* some functionality might want to use random numbers, so we should 1444 initialize the random number generator */ 1445 srand(getpid()); 1446 1447} 1448 1449/* this routine will conver the string into an unsigned integer. it is 1450 used primarily for the command-line options taking a number (such 1451 as the socket size) which could be rather large. If someone enters 1452 32M, then the number will be converted to 32 * 1024 * 1024. If 1453 they inter 32m, the number will be converted to 32 * 1000 * 1000 */ 1454unsigned int 1455convert(char *string) 1456 1457{ 1458 unsigned int base; 1459 base = atoi(string); 1460 if (strstr(string,"K")) { 1461 base *= 1024; 1462 } 1463 if (strstr(string,"M")) { 1464 base *= (1024 * 1024); 1465 } 1466 if (strstr(string,"G")) { 1467 base *= (1024 * 1024 * 1024); 1468 } 1469 if (strstr(string,"k")) { 1470 base *= (1000); 1471 } 1472 if (strstr(string,"m")) { 1473 base *= (1000 * 1000); 1474 } 1475 if (strstr(string,"g")) { 1476 base *= (1000 * 1000 * 1000); 1477 } 1478 return(base); 1479} 1480 1481/* this routine is like convert, but it is used for an interval time 1482 specification instead of stuff like socket buffer or send sizes. 1483 it converts everything to microseconds for internal use. if there 1484 is an 'm' at the end it assumes the user provided milliseconds, s 1485 will imply seconds, u will imply microseconds. in the future n 1486 will imply nanoseconds but for now it will be ignored. if there is 1487 no suffix or an unrecognized suffix, it will be assumed the user 1488 provided milliseconds, which was the long-time netperf default. one 1489 of these days, we should probably revisit that nanosecond business 1490 wrt the return value being just an int rather than a uint64_t or 1491 something. raj 2006-02-06 */ 1492 1493unsigned int 1494convert_timespec(char *string) { 1495 1496 unsigned int base; 1497 base = atoi(string); 1498 if (strstr(string,"m")) { 1499 base *= 1000; 1500 } 1501 else if (strstr(string,"u")) { 1502 base *= (1); 1503 } 1504 else if (strstr(string,"s")) { 1505 base *= (1000 * 1000); 1506 } 1507 else { 1508 base *= (1000); 1509 } 1510 return(base); 1511} 1512 1513 1514/* this routine will allocate a circular list of buffers for either 1515 send or receive operations. each of these buffers will be aligned 1516 and offset as per the users request. the circumference of this ring 1517 will be controlled by the setting of width. the buffers will be 1518 filled with data from the file specified in fill_file. if fill_file 1519 is an empty string, the buffers will be filled from "default_fill" 1520 which will be "netperf" so anyone sniffing the traffic will have a 1521 better idea what this traffic happens to be. */ 1522 1523struct ring_elt * 1524allocate_buffer_ring(int width, int buffer_size, int alignment, int offset) 1525{ 1526 1527 struct ring_elt *first_link = NULL; 1528 struct ring_elt *temp_link = NULL; 1529 struct ring_elt *prev_link; 1530 1531 int i; 1532 int malloc_size; 1533 int bytes_left; 1534 int bytes_read; 1535 int do_fill; 1536 1537 FILE *fill_source; 1538 char default_fill[] = "netperf"; 1539 int fill_cursor = 0; 1540 1541 malloc_size = buffer_size + alignment + offset; 1542 1543 /* did the user wish to have the buffers pre-filled with data from a */ 1544 /* particular source? */ 1545 if (strcmp(local_fill_file,"") == 0) { 1546 do_fill = 0; 1547 fill_source = NULL; 1548 } 1549 else { 1550 do_fill = 1; 1551 fill_source = (FILE *)fopen(local_fill_file,"r"); 1552 if (fill_source == (FILE *)NULL) { 1553 fprintf(where,"Could not open requested fill file: %s\n", 1554 strerror(errno)); 1555 fflush(where); 1556 } 1557 } 1558 1559 assert(width >= 1); 1560 1561 prev_link = NULL; 1562 for (i = 1; i <= width; i++) { 1563 /* get the ring element */ 1564 temp_link = (struct ring_elt *)malloc(sizeof(struct ring_elt)); 1565 if (temp_link == NULL) { 1566 fprintf(where, 1567 "malloc(%u) failed!\n", 1568 (unsigned int)sizeof(struct ring_elt)); 1569 exit(-1); 1570 } 1571 temp_link->completion_ptr = NULL; 1572 /* remember the first one so we can close the ring at the end */ 1573 if (i == 1) { 1574 first_link = temp_link; 1575 } 1576 temp_link->buffer_base = (char *)malloc(malloc_size); 1577 if (temp_link->buffer_base == NULL) { 1578 fprintf(where, 1579 "malloc(%d) failed!\n", 1580 malloc_size); 1581 exit(-1); 1582 } 1583 1584#ifndef WIN32 1585 temp_link->buffer_ptr = (char *)(( (long)(temp_link->buffer_base) + 1586 (long)alignment - 1) & 1587 ~((long)alignment - 1)); 1588#else 1589 temp_link->buffer_ptr = (char *)(( (ULONG_PTR)(temp_link->buffer_base) + 1590 (ULONG_PTR)alignment - 1) & 1591 ~((ULONG_PTR)alignment - 1)); 1592#endif 1593 temp_link->buffer_ptr += offset; 1594 /* is where the buffer fill code goes. */ 1595 if (do_fill) { 1596 char *bufptr = temp_link->buffer_ptr; 1597 bytes_left = buffer_size; 1598 while (bytes_left) { 1599 if (((bytes_read = (int)fread(bufptr, 1600 1, 1601 bytes_left, 1602 fill_source)) == 0) && 1603 (feof(fill_source))){ 1604 rewind(fill_source); 1605 } 1606 bufptr += bytes_read; 1607 bytes_left -= bytes_read; 1608 } 1609 } 1610 else { 1611 /* use the default fill to ID our data traffic on the 1612 network. it ain't exactly pretty, but it should work */ 1613 int j; 1614 char *bufptr = temp_link->buffer_ptr; 1615 for (j = 0; j < buffer_size; j++) { 1616 bufptr[j] = default_fill[fill_cursor]; 1617 fill_cursor += 1; 1618 /* the Windows DDK compiler with an x86_64 target wants a cast 1619 here */ 1620 if (fill_cursor > (int)strlen(default_fill)) { 1621 fill_cursor = 0; 1622 } 1623 } 1624 1625 } 1626 temp_link->next = prev_link; 1627 prev_link = temp_link; 1628 } 1629 if (first_link) { /* SAF Prefast made me do it... */ 1630 first_link->next = temp_link; 1631 } 1632 1633 return(first_link); /* it's a circle, doesn't matter which we return */ 1634} 1635 1636/* this routine will dirty the first dirty_count bytes of the 1637 specified buffer and/or read clean_count bytes from the buffer. it 1638 will go N bytes at a time, the only question is how large should N 1639 be and if we should be going continguously, or based on some 1640 assumption of cache line size */ 1641 1642void 1643access_buffer(char *buffer_ptr,int length, int dirty_count, int clean_count) { 1644 1645 char *temp_buffer; 1646 char *limit; 1647 int i, dirty_totals; 1648 1649 temp_buffer = buffer_ptr; 1650 limit = temp_buffer + length; 1651 dirty_totals = 0; 1652 1653 for (i = 0; 1654 ((i < dirty_count) && (temp_buffer < limit)); 1655 i++) { 1656 *temp_buffer += (char)i; 1657 dirty_totals += *temp_buffer; 1658 temp_buffer++; 1659 } 1660 1661 for (i = 0; 1662 ((i < clean_count) && (temp_buffer < limit)); 1663 i++) { 1664 dirty_totals += *temp_buffer; 1665 temp_buffer++; 1666 } 1667 1668 if (debug > 100) { 1669 fprintf(where, 1670 "This was here to try to avoid dead-code elimination %d\n", 1671 dirty_totals); 1672 fflush(where); 1673 } 1674} 1675 1676 1677#ifdef HAVE_ICSC_EXS 1678 1679#include <sys/mman.h> 1680#include <sys/exs.h> 1681 1682/* this routine will allocate a circular list of buffers for either 1683 send or receive operations. each of these buffers will be aligned 1684 and offset as per the users request. the circumference of this ring 1685 will be controlled by the setting of send_width. the buffers will 1686 be filled with data from the file specified in local_fill_file. if 1687 local_fill_file is an empty string, the buffers will not be filled with 1688 any particular data */ 1689 1690struct ring_elt * 1691allocate_exs_buffer_ring (int width, int buffer_size, int alignment, int offset, exs_mhandle_t *mhandlep) 1692{ 1693 1694 struct ring_elt *first_link; 1695 struct ring_elt *temp_link; 1696 struct ring_elt *prev_link; 1697 1698 int i; 1699 int malloc_size; 1700 int bytes_left; 1701 int bytes_read; 1702 int do_fill; 1703 1704 FILE *fill_source; 1705 1706 int mmap_size; 1707 char *mmap_buffer, *mmap_buffer_aligned; 1708 1709 malloc_size = buffer_size + alignment + offset; 1710 1711 /* did the user wish to have the buffers pre-filled with data from a */ 1712 /* particular source? */ 1713 if (strcmp (local_fill_file, "") == 0) { 1714 do_fill = 0; 1715 fill_source = NULL; 1716 } else { 1717 do_fill = 1; 1718 fill_source = (FILE *) fopen (local_fill_file, "r"); 1719 if (fill_source == (FILE *) NULL) { 1720 perror ("Could not open requested fill file"); 1721 exit (1); 1722 } 1723 } 1724 1725 assert (width >= 1); 1726 1727 if (debug) { 1728 fprintf (where, 1729 "allocate_exs_buffer_ring: " 1730 "width=%d buffer_size=%d alignment=%d offset=%d\n", 1731 width, buffer_size, alignment, offset); 1732 } 1733 1734 /* allocate shared memory */ 1735 mmap_size = width * malloc_size; 1736 mmap_buffer = (char *) mmap ((caddr_t)NULL, mmap_size+NBPG-1, 1737 PROT_READ|PROT_WRITE, 1738 MAP_SHARED|MAP_ANONYMOUS, -1, 0); 1739 if (mmap_buffer == NULL) { 1740 perror ("allocate_exs_buffer_ring: mmap failed"); 1741 exit (1); 1742 } 1743 mmap_buffer_aligned = (char *) ((uintptr_t)mmap_buffer & ~(NBPG-1)); 1744 if (debug) { 1745 fprintf (where, 1746 "allocate_exs_buffer_ring: " 1747 "mmap buffer size=%d address=0x%p aligned=0x%p\n", 1748 mmap_size, mmap_buffer, mmap_buffer_aligned); 1749 } 1750 1751 /* register shared memory */ 1752 *mhandlep = exs_mregister ((void *)mmap_buffer_aligned, (size_t)mmap_size, 0); 1753 if (*mhandlep == EXS_MHANDLE_INVALID) { 1754 perror ("allocate_exs_buffer_ring: exs_mregister failed"); 1755 exit (1); 1756 } 1757 if (debug) { 1758 fprintf (where, "allocate_exs_buffer_ring: mhandle=%d\n", 1759 *mhandlep); 1760 } 1761 1762 /* allocate ring elements */ 1763 first_link = (struct ring_elt *) malloc (width * sizeof (struct ring_elt)); 1764 if (first_link == NULL) { 1765 printf ("malloc(%d) failed!\n", width * sizeof (struct ring_elt)); 1766 exit (1); 1767 } 1768 1769 /* initialize buffer ring */ 1770 prev_link = first_link + width - 1; 1771 1772 for (i = 0, temp_link = first_link; i < width; i++, temp_link++) { 1773 1774 temp_link->buffer_base = (char *) mmap_buffer_aligned + (i*malloc_size); 1775#ifndef WIN32 1776 temp_link->buffer_ptr = (char *) 1777 (((long)temp_link->buffer_base + (long)alignment - 1) & 1778 ~((long)alignment - 1)); 1779#else 1780 temp_link->buffer_ptr = (char *) 1781 (((ULONG_PTR)temp_link->buffer_base + (ULONG_PTR)alignment - 1) & 1782 ~((ULONG_PTR)alignment - 1)); 1783#endif 1784 temp_link->buffer_ptr += offset; 1785 1786 if (debug) { 1787 fprintf (where, "allocate_exs_buffer_ring: " 1788 "buffer: index=%d base=0x%p ptr=0x%p\n", 1789 i, temp_link->buffer_base, temp_link->buffer_ptr); 1790 } 1791 1792 /* is where the buffer fill code goes. */ 1793 if (do_fill) { 1794 bytes_left = buffer_size; 1795 while (bytes_left) { 1796 if (((bytes_read = (int) fread (temp_link->buffer_ptr, 1797 1, 1798 bytes_left, 1799 fill_source)) == 0) && 1800 (feof (fill_source))) { 1801 rewind (fill_source); 1802 } 1803 bytes_left -= bytes_read; 1804 } 1805 } 1806 1807 /* do linking */ 1808 prev_link->next = temp_link; 1809 prev_link = temp_link; 1810 } 1811 1812 return (first_link); /* it is a circle, doesn't matter which we return */ 1813} 1814 1815#endif /* HAVE_ICSC_EXS */ 1816 1817 1818 1819#ifdef HAVE_SENDFILE 1820/* this routine will construct a ring of sendfile_ring_elt structs 1821 that the routine sendfile_tcp_stream() will use to get parameters 1822 to its calls to sendfile(). It will setup the ring to point at the 1823 file specified in the global -F option that is already used to 1824 pre-fill buffers in the send() case. 08/2000 1825 1826 if there is no file specified in a global -F option, we will create 1827 a tempoarary file and fill it with random data and use that 1828 instead. raj 2007-08-09 */ 1829 1830struct ring_elt * 1831alloc_sendfile_buf_ring(int width, 1832 int buffer_size, 1833 int alignment, 1834 int offset) 1835 1836{ 1837 1838 struct ring_elt *first_link = NULL; 1839 struct ring_elt *temp_link = NULL; 1840 struct ring_elt *prev_link; 1841 1842 int i; 1843 int fildes; 1844 struct stat statbuf; 1845 1846 /* if the user has not specified a file with the -F option, we will 1847 fail the test. otherwise, go ahead and try to open the 1848 file. 08/2000 */ 1849 if (strcmp(local_fill_file,"") == 0) { 1850 /* use an temp file for the fill file */ 1851 char temp_file[] = {"netperfXXXXXX\0"}; 1852 int *temp_buffer; 1853 1854 /* make sure we have at least an ints worth, even if the user is 1855 using an insane buffer size for a sendfile test. we are 1856 ass-u-me-ing that malloc will return something at least aligned 1857 on an int boundary... */ 1858 temp_buffer = (int *) malloc(buffer_size + sizeof(int)); 1859 if (temp_buffer) { 1860 /* ok, we have the buffer we are going to write, lets get a 1861 temporary filename */ 1862 fildes = mkstemp(temp_file); 1863 /* no need to call open because mkstemp did it */ 1864 if (-1 != fildes) { 1865 int count; 1866 int *int_ptr; 1867 1868 /* we initialize the random number generator in 1869 netlib_init() now. raj 20110111 */ 1870 1871 /* unlink the file so it goes poof when we 1872 exit. unless/until shown to be a problem we will 1873 blissfully ignore the return value. raj 2007-08-09 */ 1874 unlink(temp_file); 1875 1876 /* now fill-out the file with at least buffer_size * width bytes */ 1877 for (count = 0; count < width; count++) { 1878 /* fill the buffer with random data. it doesn't have to be 1879 really random, just "random enough" :) we do this here rather 1880 than up above because we want each write to the file to be 1881 different random data */ 1882 int_ptr = temp_buffer; 1883 for (i = 0; i <= buffer_size/sizeof(int); i++) { 1884 *int_ptr = rand(); 1885 int_ptr++; 1886 } 1887 if (write(fildes,temp_buffer,buffer_size+sizeof(int)) != 1888 buffer_size + sizeof(int)) { 1889 perror("allocate_sendfile_buf_ring: incomplete write"); 1890 exit(-1); 1891 } 1892 } 1893 } 1894 else { 1895 perror("alloc_sendfile_buf_ring: could not allocate temp name"); 1896 exit(-1); 1897 } 1898 } 1899 else { 1900 perror("alloc_sendfile_buf_ring: could not allocate buffer for file"); 1901 exit(-1); 1902 } 1903 } 1904 else { 1905 /* the user pointed us at a file, so try it */ 1906 fildes = open(local_fill_file , O_RDONLY); 1907 if (fildes == -1){ 1908 perror("alloc_sendfile_buf_ring: Could not open requested file"); 1909 exit(1); 1910 } 1911 /* make sure there is enough file there to allow us to make a 1912 complete ring. that way we do not need additional logic in the 1913 ring setup to deal with wrap-around issues. we might want that 1914 someday, but not just now. 08/2000 */ 1915 if (stat(local_fill_file,&statbuf) != 0) { 1916 perror("alloc_sendfile_buf_ring: could not stat file"); 1917 exit(1); 1918 } 1919 if (statbuf.st_size < (width * buffer_size)) { 1920 /* the file is too short */ 1921 fprintf(stderr, 1922 "alloc_sendfile_buf_ring: specified file too small.\n" 1923 "file must be larger than send_width * send_size\n"); 1924 fflush(stderr); 1925 exit(1); 1926 } 1927 } 1928 1929 /* so, at this point we know that fildes is a descriptor which 1930 references a file of sufficient size for our nefarious 1931 porpoises. raj 2007-08-09 */ 1932 1933 prev_link = NULL; 1934 for (i = 1; i <= width; i++) { 1935 /* get the ring element. we should probably make sure the malloc() 1936 was successful, but for now we'll just let the code bomb 1937 mysteriously. 08/2000 */ 1938 1939 temp_link = (struct ring_elt *) 1940 malloc(sizeof(struct ring_elt)); 1941 if (temp_link == NULL) { 1942 fprintf(where, 1943 "malloc(%u) failed!\n", 1944 (unsigned int) sizeof(struct ring_elt)); 1945 exit(1); 1946 } 1947 1948 /* remember the first one so we can close the ring at the end */ 1949 1950 if (i == 1) { 1951 first_link = temp_link; 1952 } 1953 1954 /* now fill-in the fields of the structure with the apropriate 1955 stuff. just how should we deal with alignment and offset I 1956 wonder? until something better comes-up, I think we will just 1957 ignore them. 08/2000 */ 1958 1959 temp_link->fildes = fildes; /* from which file do we send? */ 1960 temp_link->offset = offset; /* starting at which offset? */ 1961 offset += buffer_size; /* get ready for the next elt */ 1962 temp_link->length = buffer_size; /* how many bytes to send */ 1963 temp_link->hdtrl = NULL; /* no header or trailer */ 1964 temp_link->flags = 0; /* no flags */ 1965 1966 /* is where the buffer fill code went. */ 1967 1968 temp_link->next = prev_link; 1969 prev_link = temp_link; 1970 } 1971 /* close the ring */ 1972 first_link->next = temp_link; 1973 1974 return(first_link); /* it's a dummy ring */ 1975} 1976 1977#endif /* HAVE_SENDFILE */ 1978 1979 1980 /***********************************************************************/ 1981 /* */ 1982 /* dump_request() */ 1983 /* */ 1984 /* display the contents of the request array to the user. it will */ 1985 /* display the contents in decimal, hex, and ascii, with four bytes */ 1986 /* per line. */ 1987 /* */ 1988 /***********************************************************************/ 1989 1990void 1991dump_request() 1992{ 1993int counter = 0; 1994fprintf(where,"request contents:\n"); 1995for (counter = 0; counter < ((sizeof(netperf_request)/4)-3); counter += 4) { 1996 fprintf(where,"%d:\t%8x %8x %8x %8x \t|%4.4s| |%4.4s| |%4.4s| |%4.4s|\n", 1997 counter, 1998 request_array[counter], 1999 request_array[counter+1], 2000 request_array[counter+2], 2001 request_array[counter+3], 2002 (char *)&request_array[counter], 2003 (char *)&request_array[counter+1], 2004 (char *)&request_array[counter+2], 2005 (char *)&request_array[counter+3]); 2006} 2007fflush(where); 2008} 2009 2010 2011 /***********************************************************************/ 2012 /* */ 2013 /* dump_response() */ 2014 /* */ 2015 /* display the content of the response array to the user. it will */ 2016 /* display the contents in decimal, hex, and ascii, with four bytes */ 2017 /* per line. */ 2018 /* */ 2019 /***********************************************************************/ 2020 2021void 2022dump_response() 2023{ 2024int counter = 0; 2025 2026fprintf(where,"response contents\n"); 2027for (counter = 0; counter < ((sizeof(netperf_response)/4)-3); counter += 4) { 2028 fprintf(where,"%d:\t%8x %8x %8x %8x \t>%4.4s< >%4.4s< >%4.4s< >%4.4s<\n", 2029 counter, 2030 response_array[counter], 2031 response_array[counter+1], 2032 response_array[counter+2], 2033 response_array[counter+3], 2034 (char *)&response_array[counter], 2035 (char *)&response_array[counter+1], 2036 (char *)&response_array[counter+2], 2037 (char *)&response_array[counter+3]); 2038} 2039fflush(where); 2040} 2041 2042 /* 2043 2044 format_number() 2045 2046 return a pointer to a formatted string containing the value passed 2047 translated into the units specified. It assumes that the base units 2048 are bytes. If the format calls for bits, it will use SI units (10^) 2049 if the format calls for bytes, it will use CS units (2^)... This 2050 routine should look familiar to uses of the latest ttcp... 2051 2052 we would like to use "t" or "T" for transactions, but probably 2053 should leave those for terabits and terabytes respectively, so for 2054 transactions, we will use "x" which will, by default, do absolutely 2055 nothing to the result. why? so we don't have to special case code 2056 elsewhere such as in the TCP_RR-as-bidirectional test case. 2057 2058 */ 2059 2060 2061char * 2062format_number(double number) 2063{ 2064 static char fmtbuf[64]; 2065 2066 switch (libfmt) { 2067 case 'B': 2068 snprintf(fmtbuf, sizeof(fmtbuf), "%-7.2f" , number); 2069 break; 2070 case 'K': 2071 snprintf(fmtbuf, sizeof(fmtbuf), "%-7.2f" , number / 1024.0); 2072 break; 2073 case 'M': 2074 snprintf(fmtbuf, sizeof(fmtbuf), "%-7.2f", number / 1024.0 / 1024.0); 2075 break; 2076 case 'G': 2077 snprintf(fmtbuf, sizeof(fmtbuf), "%-7.2f", number / 1024.0 / 1024.0 / 1024.0); 2078 break; 2079 case 'b': 2080 snprintf(fmtbuf, sizeof(fmtbuf), "%-7.2f" , number * 8); 2081 break; 2082 case 'k': 2083 snprintf(fmtbuf, sizeof(fmtbuf), "%-7.2f", number * 8 / 1000.0); 2084 break; 2085 case 'm': 2086 snprintf(fmtbuf, sizeof(fmtbuf), "%-7.2f", number * 8 / 1000.0 / 1000.0); 2087 break; 2088 case 'g': 2089 snprintf(fmtbuf, sizeof(fmtbuf), "%-7.2f", number * 8 / 1000.0 / 1000.0 / 1000.0); 2090 break; 2091 case 'x': 2092 snprintf(fmtbuf, sizeof(fmtbuf), "%-7.2f", number); 2093 break; 2094 default: 2095 snprintf(fmtbuf, sizeof(fmtbuf), "%-7.2f", number / 1024.0); 2096 } 2097 2098 return fmtbuf; 2099} 2100 2101char 2102format_cpu_method(int method) 2103{ 2104 2105 char method_char; 2106 2107 switch (method) { 2108 case CPU_UNKNOWN: 2109 method_char = 'U'; 2110 break; 2111 case HP_IDLE_COUNTER: 2112 method_char = 'I'; 2113 break; 2114 case PSTAT: 2115 method_char = 'P'; 2116 break; 2117 case KSTAT: 2118 method_char = 'K'; 2119 break; 2120 case KSTAT_10: 2121 method_char = 'M'; 2122 break; 2123 case PERFSTAT: 2124 method_char = 'E'; 2125 break; 2126 case TIMES: /* historical only, completely unsuitable 2127 for netperf's purposes */ 2128 method_char = 'T'; 2129 break; 2130 case GETRUSAGE: /* historical only, completely unsuitable 2131 for netperf;s purposes */ 2132 method_char = 'R'; 2133 break; 2134 case LOOPER: 2135 method_char = 'L'; 2136 break; 2137 case NT_METHOD: 2138 method_char = 'N'; 2139 break; 2140 case PROC_STAT: 2141 method_char = 'S'; 2142 break; 2143 case SYSCTL: 2144 method_char = 'C'; 2145 break; 2146 case OSX: 2147 method_char = 'O'; 2148 break; 2149 default: 2150 method_char = '?'; 2151 } 2152 2153 return method_char; 2154 2155} 2156 2157char * 2158format_units() 2159{ 2160 static char unitbuf[64]; 2161 2162 switch (libfmt) { 2163 case 'B': 2164 strcpy(unitbuf, "Bytes"); 2165 break; 2166 case 'K': 2167 strcpy(unitbuf, "KBytes"); 2168 break; 2169 case 'M': 2170 strcpy(unitbuf, "MBytes"); 2171 break; 2172 case 'G': 2173 strcpy(unitbuf, "GBytes"); 2174 break; 2175 case 'b': 2176 strcpy(unitbuf, "10^0bits"); 2177 break; 2178 case 'k': 2179 strcpy(unitbuf, "10^3bits"); 2180 break; 2181 case 'm': 2182 strcpy(unitbuf, "10^6bits"); 2183 break; 2184 case 'g': 2185 strcpy(unitbuf, "10^9bits"); 2186 break; 2187 case 'x': 2188 strcpy(unitbuf, "Trans"); 2189 break; 2190 case 'u': 2191 strcpy(unitbuf,"Usec"); 2192 break; 2193 2194 default: 2195 strcpy(unitbuf, "KBytes"); 2196 } 2197 2198 return unitbuf; 2199} 2200 2201 2202/****************************************************************/ 2203/* */ 2204/* shutdown_control() */ 2205/* */ 2206/* tear-down the control connection between me and the server. */ 2207/****************************************************************/ 2208 2209void 2210shutdown_control() 2211{ 2212 2213 char *buf = (char *)&netperf_response; 2214 int buflen = sizeof(netperf_response); 2215 2216 /* stuff for select, use fd_set for better compliance */ 2217 fd_set readfds; 2218 struct timeval timeout; 2219 2220 if (debug) { 2221 fprintf(where, 2222 "shutdown_control: shutdown of control connection requested.\n"); 2223 fflush(where); 2224 } 2225 2226 /* first, we say that we will be sending no more data on the */ 2227 /* connection */ 2228 if (shutdown(netlib_control,1) == SOCKET_ERROR) { 2229 Print_errno(where, 2230 "shutdown_control: error in shutdown"); 2231 fflush(where); 2232 exit(1); 2233 } 2234 2235 /* Now, we hang on a select waiting for the socket to become 2236 readable to receive the shutdown indication from the remote. this 2237 will be "just" like the recv_response() code 2238 2239 we only select once. it is assumed that if the response is split 2240 (which should not be happening, that we will receive the whole 2241 thing and not have a problem ;-) */ 2242 2243 FD_ZERO(&readfds); 2244 FD_SET(netlib_control,&readfds); 2245 timeout.tv_sec = 60; /* wait one minute then punt */ 2246 timeout.tv_usec = 0; 2247 2248 /* select had better return one, or there was either a problem or a 2249 timeout... */ 2250 if (select(FD_SETSIZE, 2251 &readfds, 2252 0, 2253 0, 2254 &timeout) != 1) { 2255 Print_errno(where, 2256 "shutdown_control: no response received"); 2257 fflush(where); 2258 exit(1); 2259 } 2260 2261 /* we now assume that the socket has come ready for reading */ 2262 recv(netlib_control, buf, buflen,0); 2263 2264} 2265 2266/* 2267 bind_to_specific_processor will bind the calling process to the 2268 processor in "processor" It has lots of ugly ifdefs to deal with 2269 all the different ways systems do processor affinity. this is a 2270 generalization of work initially done by stephen burger. raj 2271 2004/12/13 */ 2272 2273void 2274bind_to_specific_processor(int processor_affinity, int use_cpu_map) 2275{ 2276 2277 int mapped_affinity; 2278 2279 /* this is in place because the netcpu_looper processor affinity 2280 ass-u-me-s a contiguous CPU id space starting with 0. for the 2281 regular netperf/netserver affinity, we ass-u-me the user has used 2282 a suitable CPU id even when the space is not contiguous and 2283 starting from zero */ 2284 if (use_cpu_map) { 2285 mapped_affinity = lib_cpu_map[processor_affinity]; 2286 } 2287 else { 2288 mapped_affinity = processor_affinity; 2289 } 2290 2291#ifdef HAVE_MPCTL 2292 /* indeed, at some point it would be a good idea to check the return 2293 status and pass-along notification of error... raj 2004/12/13 */ 2294 mpctl(MPC_SETPROCESS_FORCE, mapped_affinity, getpid()); 2295#elif HAVE_PROCESSOR_BIND 2296#include <sys/types.h> 2297#include <sys/processor.h> 2298#include <sys/procset.h> 2299 processor_bind(P_PID,P_MYID,mapped_affinity,NULL); 2300#elif HAVE_BINDPROCESSOR 2301#include <sys/processor.h> 2302 /* this is the call on AIX. It takes a "what" of BINDPROCESS or 2303 BINDTHRAD, then "who" and finally "where" which is a CPU number 2304 or it seems PROCESSOR_CLASS_ANY there also seems to be a mycpu() 2305 call to return the current CPU assignment. this is all based on 2306 the sys/processor.h include file. from empirical testing, it 2307 would seem that the my_cpu() call returns the current CPU on 2308 which we are running rather than the CPU binding, so it's return 2309 value will not tell you if you are bound vs unbound. */ 2310 bindprocessor(BINDPROCESS,getpid(),(cpu_t)mapped_affinity); 2311#elif HAVE_SCHED_SETAFFINITY 2312#include <sched.h> 2313 /* in theory this should cover systems with more CPUs than bits in a 2314 long, without having to specify __USE_GNU. we "cheat" by taking 2315 defines from /usr/include/bits/sched.h, which we ass-u-me is 2316 included by <sched.h>. If they are not there we will just 2317 fall-back on what we had before, which is to use just the size of 2318 an unsigned long. raj 2006-09-14 */ 2319 2320#if defined(__CPU_SETSIZE) 2321#define NETPERF_CPU_SETSIZE __CPU_SETSIZE 2322#if defined(__CPU_SET_S) 2323#define NETPERF_CPU_SET(cpu, cpusetp) __CPU_SET_S(cpu, sizeof (cpu_set_t), cpusetp) 2324#define NETPERF_CPU_ZERO(cpusetp) __CPU_ZERO_S (sizeof (cpu_set_t), cpusetp) 2325#else 2326#define NETPERF_CPU_SET(cpu, cpusetp) __CPU_SET(cpu, cpusetp) 2327#define NETPERF_CPU_ZERO(cpusetp) __CPU_ZERO (cpusetp) 2328#endif 2329 typedef cpu_set_t netperf_cpu_set_t; 2330#else 2331#define NETPERF_CPU_SETSIZE sizeof(unsigned long) 2332#define NETPERF_CPU_SET(cpu, cpusetp) *cpusetp = 1 << cpu 2333#define NETPERF_CPU_ZERO(cpusetp) *cpusetp = (unsigned long)0 2334 typedef unsigned long netperf_cpu_set_t; 2335#endif 2336 2337 netperf_cpu_set_t netperf_cpu_set; 2338 unsigned int len = sizeof(netperf_cpu_set); 2339 2340 if (mapped_affinity < 8*sizeof(netperf_cpu_set)) { 2341 NETPERF_CPU_ZERO(&netperf_cpu_set); 2342 NETPERF_CPU_SET(mapped_affinity,&netperf_cpu_set); 2343 2344 if (sched_setaffinity(getpid(), len, &netperf_cpu_set)) { 2345 if (debug) { 2346 fprintf(stderr, "failed to set PID %d's CPU affinity errno %d\n", 2347 getpid(),errno); 2348 fflush(stderr); 2349 } 2350 } 2351 } 2352 else { 2353 if (debug) { 2354 fprintf(stderr, 2355 "CPU number larger than pre-compiled limits. Consider a recompile.\n"); 2356 fflush(stderr); 2357 } 2358 } 2359 2360#elif HAVE_BIND_TO_CPU_ID 2361 /* this is the one for Tru64 */ 2362#include <sys/types.h> 2363#include <sys/resource.h> 2364#include <sys/processor.h> 2365 2366 /* really should be checking a return code one of these days. raj 2367 2005/08/31 */ 2368 2369 bind_to_cpu_id(getpid(), mapped_affinity,0); 2370 2371#elif WIN32 2372 2373 { 2374 ULONG_PTR AffinityMask; 2375 ULONG_PTR ProcessAffinityMask; 2376 ULONG_PTR SystemAffinityMask; 2377 2378 if ((mapped_affinity < 0) || 2379 (mapped_affinity > MAXIMUM_PROCESSORS)) { 2380 fprintf(where, 2381 "Invalid processor_affinity specified: %d\n", mapped_affinity); fflush(where); 2382 return; 2383 } 2384 2385 if (!GetProcessAffinityMask( 2386 GetCurrentProcess(), 2387 &ProcessAffinityMask, 2388 &SystemAffinityMask)) 2389 { 2390 perror("GetProcessAffinityMask failed"); 2391 fflush(stderr); 2392 exit(1); 2393 } 2394 2395 AffinityMask = (ULONG_PTR)1 << mapped_affinity; 2396 2397 if (AffinityMask & ProcessAffinityMask) { 2398 if (!SetThreadAffinityMask( GetCurrentThread(), AffinityMask)) { 2399 perror("SetThreadAffinityMask failed"); 2400 fflush(stderr); 2401 } 2402 } else if (debug) { 2403 fprintf(where, 2404 "Processor affinity set to CPU# %d\n", mapped_affinity); 2405 fflush(where); 2406 } 2407 } 2408 2409#else 2410 if (debug) { 2411 fprintf(where, 2412 "Processor affinity not available for this platform!\n"); 2413 fflush(where); 2414 } 2415#endif 2416} 2417 2418 2419/* 2420 * Sets a socket to non-blocking operation. 2421 */ 2422int 2423set_nonblock (SOCKET sock) 2424{ 2425#ifdef WIN32 2426 unsigned long flags = 1; 2427 return (ioctlsocket(sock, FIONBIO, &flags) != SOCKET_ERROR); 2428#else 2429 return (fcntl(sock, F_SETFL, O_NONBLOCK) != -1); 2430#endif 2431} 2432 2433 2434 2435/* send a request, only converting the first n ints-worth of the 2436 test-specific data via htonl() before sending on the 2437 connection. the first two ints, which are before the test-specific 2438 portion are always converted. raj 2008-02-05 */ 2439 2440void 2441send_request_n(int n) 2442{ 2443 2444 int counter,count; 2445 2446 if (n < 0) count = sizeof(netperf_request)/4; 2447 else count = 2 + n; 2448 2449 /* silently truncate if the caller called for more than we have */ 2450 if (count > sizeof(netperf_request)/4) { 2451 if (debug > 1) { 2452 fprintf(where, 2453 "WARNING, htonl conversion count of %d was larger than netperf_request\n", 2454 count - 2); 2455 fflush(where); 2456 } 2457 count = sizeof(netperf_request)/4; 2458 } 2459 2460 /* display the contents of the request if the debug level is high 2461 enough. otherwise, just send the darned thing ;-) */ 2462 2463 if (debug > 1) { 2464 fprintf(where, 2465 "entered send_request_n...contents before %d htonls:\n", 2466 count); 2467 dump_request(); 2468 } 2469 2470 /* pass the processor affinity request value to netserver this is a 2471 kludge and I know it. sgb 8/11/04. we keep this here to deal 2472 with there being two paths to this place - direct and via 2473 send_request() */ 2474 2475 netperf_request.content.dummy = remote_proc_affinity; 2476 2477 /* put the entire request array into network order. We do this 2478 arbitrarily rather than trying to figure-out just how much of the 2479 request array contains real information. this should be simpler, 2480 and at any rate, the performance of sending control messages for 2481 this benchmark is not of any real concern. */ 2482 2483 for (counter = 0; counter < count; counter++) { 2484 request_array[counter] = htonl(request_array[counter]); 2485 } 2486 2487 if (debug > 1) { 2488 fprintf(where,"send_request_n...contents after %d htonls:\n", 2489 count); 2490 dump_request(); 2491 2492 fprintf(where, 2493 "\nsend_request: about to send %u bytes from %p\n", 2494 (unsigned int) sizeof(netperf_request), 2495 &netperf_request); 2496 fflush(where); 2497 } 2498 2499 if (send(netlib_control, 2500 (char *)&netperf_request, 2501 sizeof(netperf_request), 2502 0) != sizeof(netperf_request)) { 2503 perror("send_request: send call failure"); 2504 2505 exit(1); 2506 } 2507} 2508 2509 /***********************************************************************/ 2510 /* */ 2511 /* send_request() */ 2512 /* */ 2513 /* send a netperf request on the control socket to the remote half of */ 2514 /* the connection. to get us closer to intervendor interoperability, */ 2515 /* we will call htonl on each of the int that compose the message to */ 2516 /* be sent. the server-half of the connection will call the ntohl */ 2517 /* routine to undo any changes that may have been made... */ 2518 /* */ 2519 /***********************************************************************/ 2520 2521void 2522send_request() 2523{ 2524 2525 /* pass the processor affinity request value to netserver this is a 2526 kludge and I know it. sgb 8/11/04 */ 2527 2528 netperf_request.content.dummy = remote_proc_affinity; 2529 2530 /* call send_request_n telling it to convert everything */ 2531 2532 send_request_n(-1); 2533 2534} 2535 2536/* send a response, only converting the first n ints-worth of the 2537 test-specific data via htonl() before sending on the 2538 connection. the first two ints, which are before the test-specific 2539 portion are always converted. raj 2008-02-05 */ 2540 2541void 2542send_response_n(int n) 2543{ 2544 int counter, count; 2545 int bytes_sent; 2546 2547 if (n < 0) count = sizeof(netperf_request)/4; 2548 else count = 2 + n; 2549 2550 /* silently truncate if the caller called for more than we have */ 2551 if (count > sizeof(netperf_request)/4) { 2552 if (debug > 1) { 2553 fprintf(where, 2554 "WARNING, htonl conversion count of %d was larger than netperf_request\n", 2555 count - 2); 2556 fflush(where); 2557 } 2558 count = sizeof(netperf_request)/4; 2559 } 2560 2561 /* display the contents of the request if the debug level is high */ 2562 /* enough. otherwise, just send the darned thing ;-) */ 2563 2564 if (debug > 1) { 2565 fprintf(where, 2566 "send_response_n: contents of %u ints before %d htonl,\n", 2567 (unsigned int) sizeof(netperf_response)/4, 2568 count); 2569 dump_response(); 2570 } 2571 2572 /* put the entire response_array into network order. We do this 2573 arbitrarily rather than trying to figure-out just how much of the 2574 request array contains real information. this should be simpler, 2575 and at any rate, the performance of sending control messages for 2576 this benchmark is not of any real concern. */ 2577 2578 for (counter = 0; counter < count; counter++) { 2579 response_array[counter] = htonl(response_array[counter]); 2580 } 2581 2582 if (debug > 1) { 2583 fprintf(where, 2584 "send_response_n: contents after htonl\n"); 2585 dump_response(); 2586 fprintf(where, 2587 "about to send %u bytes from %p\n", 2588 (unsigned int) sizeof(netperf_response), 2589 &netperf_response); 2590 fflush(where); 2591 } 2592 2593 /*KC*/ 2594 if ((bytes_sent = send(server_sock, 2595 (char *)&netperf_response, 2596 sizeof(netperf_response), 2597 0)) != sizeof(netperf_response)) { 2598 perror("send_response_n: send call failure"); 2599 fprintf(where, "BytesSent: %d\n", bytes_sent); 2600 exit(1); 2601 } 2602 2603} 2604 2605/***********************************************************************/ 2606 /* */ 2607 /* send_response() */ 2608 /* */ 2609 /* send a netperf response on the control socket to the remote half of */ 2610 /* the connection. to get us closer to intervendor interoperability, */ 2611 /* we will call htonl on each of the int that compose the message to */ 2612 /* be sent. the other half of the connection will call the ntohl */ 2613 /* routine to undo any changes that may have been made... */ 2614 /* */ 2615 /***********************************************************************/ 2616 2617void 2618send_response() 2619{ 2620 2621 send_response_n(-1); 2622 2623} 2624 2625/* go back and "undo" the ntohl that recv_request() did, starting with 2626 the specified point and going to the end of the request array */ 2627void 2628fixup_request_n(int n) 2629{ 2630 int i; 2631 int limit; 2632 2633 limit = sizeof(netperf_request) / 4; 2634 /* we must remember that the request_array also contains two ints of 2635 "other" stuff, so we start the fixup two in - at least I think we 2636 should. raj 2012-04-02 */ 2637 for (i = n + 2; i < limit; i++) { 2638 request_array[i] = htonl(request_array[i]); 2639 } 2640 if (debug > 1) { 2641 fprintf(where, 2642 "%s: request contents after fixup at the %d th int\n", 2643 __FUNCTION__, 2644 n); 2645 dump_request(); 2646 fflush(where); 2647 } 2648} 2649 2650/* receive a request, only converting the first n ints-worth of the 2651 test-specific data via htonl() before sending on the 2652 connection. the first two ints, which are before the test-specific 2653 portion are always converted. raj 2008-02-05 */ 2654 2655int 2656recv_request_timed_n(int n, int seconds) 2657{ 2658 int tot_bytes_recvd, 2659 bytes_recvd, 2660 bytes_left; 2661 char *buf = (char *)&netperf_request; 2662 int buflen = sizeof(netperf_request); 2663 int counter,count; 2664 2665 fd_set readfds; 2666 struct timeval timeout; 2667 2668 if (n < 0) count = sizeof(netperf_request)/4; 2669 else count = 2 + n; 2670 2671 /* silently truncate if the caller called for more than we have */ 2672 if (count > sizeof(netperf_request)/4) { 2673 if (debug > 1) { 2674 fprintf(where, 2675 "WARNING, htonl conversion count of %d was larger than netperf_request\n", 2676 count - 2); 2677 fflush(where); 2678 } 2679 count = sizeof(netperf_request)/4; 2680 } 2681 2682 /* for the time being, we rather rely on select decrementing timeout 2683 each time to preclude someone with nefarious intent from just 2684 dribbling data to us piecemeal. of course, who knows what 2685 someone with nefarious intent might come-up with. raj 2012-01-23 */ 2686 tot_bytes_recvd = 0; 2687 bytes_recvd = 0; /* nt_lint; bytes_recvd uninitialized if buflen == 0 */ 2688 bytes_left = buflen; 2689 timeout.tv_sec = seconds; 2690 timeout.tv_usec = 0; 2691 do { 2692 FD_ZERO(&readfds); 2693 FD_SET(server_sock,&readfds); 2694 if (select(FD_SETSIZE, 2695 &readfds, 2696 0, 2697 0, 2698 (seconds > 0) ? &timeout : NULL) != 1) { 2699 fprintf(where, 2700 "Issue receiving request on control connection. Errno %d (%s)\n", 2701 errno, 2702 strerror(errno)); 2703 fflush(where); 2704 close(server_sock); 2705 return -1; 2706 } 2707 2708 if ((bytes_recvd = recv(server_sock, buf, bytes_left, 0)) > 0) { 2709 tot_bytes_recvd += bytes_recvd; 2710 buf += bytes_recvd; 2711 bytes_left -= bytes_recvd; 2712 } 2713 } while ((tot_bytes_recvd != buflen) && 2714 (bytes_recvd > 0 )); 2715 2716 /* put the request into host order */ 2717 2718 for (counter = 0; counter < count; counter++) { 2719 request_array[counter] = ntohl(request_array[counter]); 2720 } 2721 2722 if (debug) { 2723 fprintf(where, 2724 "recv_request: received %d bytes of request.\n", 2725 tot_bytes_recvd); 2726 fflush(where); 2727 } 2728 2729 if (bytes_recvd == SOCKET_ERROR) { 2730 Print_errno(where, 2731 "recv_request: error on recv"); 2732 fflush(where); 2733 close(server_sock); 2734 return -1; 2735 } 2736 2737 if (bytes_recvd == 0) { 2738 /* the remote has shutdown the control connection, we should shut 2739 it down as well and return */ 2740 if (debug) { 2741 fprintf(where, 2742 "recv_request: remote requested shutdown of control\n"); 2743 fflush(where); 2744 } 2745 2746 close(server_sock); 2747 return 0; 2748 } 2749 2750 if (tot_bytes_recvd < buflen) { 2751 if (debug > 1) 2752 dump_request(); 2753 2754 fprintf(where, 2755 "recv_request: partial request received of %d bytes\n", 2756 tot_bytes_recvd); 2757 fflush(where); 2758 close(server_sock); 2759 return -1; 2760 } 2761 2762 if (debug > 1) { 2763 dump_request(); 2764 } 2765 2766 /* get the processor affinity request value from netperf this is a 2767 kludge and I know it. sgb 8/11/04 */ 2768 2769 local_proc_affinity = netperf_request.content.dummy; 2770 2771 if (local_proc_affinity != -1) { 2772 bind_to_specific_processor(local_proc_affinity,0); 2773 } 2774 2775 return buflen; 2776} 2777 2778/* receive a request, only converting the first n ints-worth of the 2779 test-specific data via htonl() before sending on the 2780 connection. the first two ints, which are before the test-specific 2781 portion are always converted. raj 2008-02-05 */ 2782 2783int 2784recv_request_n(int n) 2785{ 2786 2787 return recv_request_timed_n(n,0); 2788 2789} 2790 2791 /***********************************************************************/ 2792 /* */ 2793 /* recv_request() */ 2794 /* */ 2795 /* receive the remote's request on the control socket. we will put */ 2796 /* the entire response into host order before giving it to the */ 2797 /* calling routine. hopefully, this will go most of the way to */ 2798 /* insuring intervendor interoperability. if there are any problems, */ 2799 /* we will just punt the entire situation. */ 2800 /* */ 2801 /***********************************************************************/ 2802 2803int 2804recv_request() 2805{ 2806 2807 return recv_request_n(-1); 2808 2809} 2810 2811void 2812recv_response_timed_n(int addl_time, int n) 2813{ 2814 int tot_bytes_recvd, 2815 bytes_recvd = 0, 2816 bytes_left; 2817 char *buf = (char *)&netperf_response; 2818 int buflen = sizeof(netperf_response); 2819 int counter,count; 2820 2821 /* stuff for select, use fd_set for better compliance */ 2822 fd_set readfds; 2823 struct timeval timeout; 2824 2825 tot_bytes_recvd = 0; 2826 bytes_left = buflen; 2827 2828 if (n < 0) count = sizeof(netperf_request)/4; 2829 else count = 2 + n; 2830 2831 /* silently truncate if the caller called for more than we have */ 2832 if (count > sizeof(netperf_request)/4) { 2833 if (debug > 1) { 2834 fprintf(where, 2835 "WARNING, htonl conversion count of %d was larger than netperf_response\n", 2836 count - 2); 2837 fflush(where); 2838 } 2839 count = sizeof(netperf_request)/4; 2840 } 2841 2842 /* zero out the response structure */ 2843 2844 /* BUG FIX SJB 2/4/93 - should be < not <= */ 2845 for (counter = 0; 2846 counter < sizeof(netperf_response)/sizeof(int); 2847 counter++) { 2848 response_array[counter] = 0; 2849 } 2850 2851 /* we only select once. it is assumed that if the response is split 2852 (which should not be happening, that we will receive the whole 2853 thing and not have a problem ;-) */ 2854 2855 FD_ZERO(&readfds); 2856 FD_SET(netlib_control,&readfds); 2857 timeout.tv_sec = 120 + addl_time; /* wait at least two minutes 2858 before punting - the 2859 USE_LOOPER CPU stuff may 2860 cause remote's to have a bit 2861 longer time of it than 60 2862 seconds would allow. 2863 triggered by fix from Jeff 2864 Dwork. */ 2865 timeout.tv_usec = 0; 2866 2867 /* select had better return one, or there was either a problem or a */ 2868 /* timeout... */ 2869 2870 if ((counter = select(FD_SETSIZE, 2871 &readfds, 2872 0, 2873 0, 2874 &timeout)) != 1) { 2875 fprintf(where, 2876 "%s: no response received. errno %d counter %d\n", 2877 __FUNCTION__, 2878 errno, 2879 counter); 2880 exit(1); 2881 } 2882 2883 while ((tot_bytes_recvd != buflen) && 2884 ((bytes_recvd = recv(netlib_control, buf, bytes_left,0)) > 0 )) { 2885 tot_bytes_recvd += bytes_recvd; 2886 buf += bytes_recvd; 2887 bytes_left -= bytes_recvd; 2888 } 2889 2890 if (debug) { 2891 fprintf(where,"recv_response: received a %d byte response\n", 2892 tot_bytes_recvd); 2893 fflush(where); 2894 } 2895 2896 /* put the desired quantity of the response into host order */ 2897 2898 for (counter = 0; counter < count; counter++) { 2899 response_array[counter] = ntohl(response_array[counter]); 2900 } 2901 2902 if (bytes_recvd == SOCKET_ERROR) { 2903 perror("recv_response"); 2904 exit(1); 2905 } 2906 if (tot_bytes_recvd < buflen) { 2907 fprintf(stderr, 2908 "recv_response: partial response received: %d bytes\n", 2909 tot_bytes_recvd); 2910 fflush(stderr); 2911 if (debug > 1) 2912 dump_response(); 2913 exit(1); 2914 } 2915 if (debug > 1) { 2916 dump_response(); 2917 } 2918} 2919 2920/* 2921 2922 recv_response_timed() 2923 2924 receive the remote's response on the control socket. we will put the 2925 entire response into host order before giving it to the calling 2926 routine. hopefully, this will go most of the way to insuring 2927 intervendor interoperability. if there are any problems, we will 2928 just punt the entire situation. 2929 2930 The call to select at the beginning is to get us out of hang 2931 situations where the remote gives-up but we don't find-out about 2932 it. This seems to happen only rarely, but it would be nice to be 2933 somewhat robust ;-) 2934 2935 The "_timed" part is to allow the caller to add (or I suppose 2936 subtract) from the length of timeout on the select call. this was 2937 added since not all the CPU utilization mechanisms require a 40 2938 second calibration, and we used to have an aribtrary 40 second sleep 2939 in "calibrate_remote_cpu" - since we don't _always_ need that, we 2940 want to simply add 40 seconds to the select() timeout from that 2941 call, but don't want to change all the "recv_response" calls in the 2942 code right away. sooo, we push the functionality of the old 2943 recv_response() into a new recv_response_timed(addl_timout) call, 2944 and have recv_response() call recv_response_timed(0). raj 2945 2005-05-16 2946 2947 */ 2948 2949 2950void 2951recv_response_timed(int addl_time) 2952{ 2953 2954 /* -1 => convert all the test-specific data via ntohl */ 2955 recv_response_timed_n(addl_time,-1); 2956 2957} 2958 2959void 2960recv_response() 2961{ 2962 /* 0 => no additional time, -1 => convert all test-specific data */ 2963 recv_response_timed_n(0,-1); 2964} 2965 2966void 2967recv_response_n(int n) 2968{ 2969 recv_response_timed_n(0,n); 2970} 2971 2972 2973 2974 2975#if defined(USE_PSTAT) || defined (USE_SYSCTL) 2976int 2977hi_32(big_int) 2978 long long *big_int; 2979{ 2980 union overlay_u { 2981 long long dword; 2982 long words[2]; 2983 } *overlay; 2984 2985 overlay = (union overlay_u *)big_int; 2986 /* on those systems which are byte swapped, we really wish to return 2987 words[1] - at least I think so - raj 4/95 */ 2988 if (htonl(1L) == 1L) { 2989 /* we are a "normal" :) machine */ 2990 return(overlay->words[0]); 2991 } 2992 else { 2993 return(overlay->words[1]); 2994 } 2995} 2996 2997int 2998lo_32(big_int) 2999 long long *big_int; 3000{ 3001 union overlay_u { 3002 long long dword; 3003 long words[2]; 3004 } *overlay; 3005 3006 overlay = (union overlay_u *)big_int; 3007 /* on those systems which are byte swapped, we really wish to return 3008 words[0] - at least I think so - raj 4/95 */ 3009 if (htonl(1L) == 1L) { 3010 /* we are a "normal" :) machine */ 3011 return(overlay->words[1]); 3012 } 3013 else { 3014 return(overlay->words[0]); 3015 } 3016} 3017 3018#endif /* USE_PSTAT || USE_SYSCTL */ 3019 3020 3021void libmain() 3022{ 3023fprintf(where,"hello world\n"); 3024fprintf(where,"debug: %d\n",debug); 3025} 3026 3027 3028void 3029get_sock_buffer (SOCKET sd, enum sock_buffer which, int *effective_sizep) 3030{ 3031#ifdef SO_SNDBUF 3032 int optname = (which == SEND_BUFFER) ? SO_SNDBUF : SO_RCVBUF; 3033 netperf_socklen_t sock_opt_len; 3034 3035 sock_opt_len = sizeof(*effective_sizep); 3036 if (getsockopt(sd, SOL_SOCKET, optname, (char *)effective_sizep, 3037 &sock_opt_len) < 0) { 3038 fprintf(where, "netperf: get_sock_buffer: getsockopt %s: errno %d\n", 3039 (which == SEND_BUFFER) ? "SO_SNDBUF" : "SO_RCVBUF", errno); 3040 fflush(where); 3041 *effective_sizep = -1; 3042 } 3043 3044 if (debug) { 3045 fprintf(where, "netperf: get_sock_buffer: " 3046 "%s socket size determined to be %d\n", 3047 (which == SEND_BUFFER) ? "send" : "receive", *effective_sizep); 3048 fflush(where); 3049 } 3050 3051#else 3052 *effective_sizep = -1; 3053#endif 3054} 3055 3056void 3057set_sock_buffer (SOCKET sd, enum sock_buffer which, int requested_size, int *effective_sizep) 3058{ 3059#ifdef SO_SNDBUF 3060 3061 int optname = (which == SEND_BUFFER) ? SO_SNDBUF : SO_RCVBUF; 3062 3063 /* seems that under Windows, setting a value of zero is how one 3064 tells the stack you wish to enable copy-avoidance. Knuth only 3065 knows what it will do on other stacks, but it might be 3066 interesting to find-out, so we won't bother #ifdef'ing the change 3067 to allow asking for 0 bytes. Courtesy of SAF, 2007-05 raj 3068 2007-05-31 */ 3069 if (requested_size >= 0) { 3070 if (setsockopt(sd, SOL_SOCKET, optname, 3071 (char *)&requested_size, sizeof(int)) < 0) { 3072 fprintf(where, "netperf: set_sock_buffer: %s option: errno %d (%s)\n", 3073 (which == SEND_BUFFER) ? "SO_SNDBUF" : "SO_RCVBUF", 3074 errno, 3075 strerror(errno)); 3076 fflush(where); 3077 exit(1); 3078 } 3079 if (debug > 1) { 3080 fprintf(where, "netperf: set_sock_buffer: %s of %d requested.\n", 3081 (which == SEND_BUFFER) ? "SO_SNDBUF" : "SO_RCVBUF", 3082 requested_size); 3083 fflush(where); 3084 } 3085 } 3086 3087 /* the getsockopt() call that used to be here has been hoisted into 3088 its own routine to be used on those platforms where the socket 3089 buffer sizes might change from the beginning to the end of the 3090 run. raj 2008-01-15 */ 3091 3092 get_sock_buffer(sd, which, effective_sizep); 3093 3094#else /* SO_SNDBUF */ 3095 *effective_sizep = -1; 3096#endif /* SO_SNDBUF */ 3097} 3098 3099void 3100dump_addrinfo(FILE *dumploc, struct addrinfo *info, 3101 const char *host, char *port, int family) 3102{ 3103 struct sockaddr *ai_addr; 3104 struct addrinfo *temp; 3105 temp=info; 3106 3107 fprintf(dumploc, 3108 "getaddrinfo returned the following for host '%s' port '%s' " 3109 " family %s\n", 3110 host, 3111 port, 3112 inet_ftos(family)); 3113 3114 while (temp) { 3115 /* seems that Solaris 10 GA bits will not give a canonical name 3116 for ::0 or 0.0.0.0, and their fprintf() cannot deal with a null 3117 pointer, so we have to check for a null pointer. probably a 3118 safe thing to do anyway, eventhough it was not necessary on 3119 linux or hp-ux. raj 2005-02-09 */ 3120 fprintf(dumploc, 3121 "\tcannonical name: '%s'\n" 3122 "\tflags: %x family: %s: socktype: %s protocol %s addrlen %d\n", 3123 (temp->ai_canonname) ? temp->ai_canonname : "(nil)", 3124 temp->ai_flags, 3125 inet_ftos(temp->ai_family), 3126 inet_ttos(temp->ai_socktype), 3127 inet_ptos(temp->ai_protocol), 3128 temp->ai_addrlen); 3129 ai_addr = temp->ai_addr; 3130 if (ai_addr != NULL) { 3131 int i; 3132 fprintf(dumploc, 3133 "\tsa_family: %s sadata:", 3134 inet_ftos(ai_addr->sa_family)); 3135 for (i = 0; i < (int) temp->ai_addrlen; i++) { 3136 fprintf(dumploc, 3137 (temp->ai_family == AF_INET) ? " %d" : " %.2x", 3138 (u_char)ai_addr->sa_data[i]); 3139 } 3140 fprintf(dumploc,"\n"); 3141 } 3142 temp = temp->ai_next; 3143 } 3144 fflush(dumploc); 3145} 3146 3147struct addrinfo * 3148resolve_host(char *hostname, 3149 char *port, 3150 int family) 3151{ 3152 struct addrinfo hints; 3153 struct addrinfo *ai; 3154 int count; 3155 int error; 3156 3157 if (debug) { 3158 fprintf(where, 3159 "resolve_host called with host '%s' port '%s' family %s\n", 3160 hostname, 3161 port, 3162 inet_ftos(family)); 3163 fflush(where); 3164 } 3165 3166 memset(&hints, 0, sizeof(hints)); 3167 hints.ai_family = family; 3168 hints.ai_socktype = SOCK_STREAM; 3169 hints.ai_protocol = IPPROTO_TCP; 3170 hints.ai_flags = AI_CANONNAME | AI_ADDRCONFIG; 3171 count = 0; 3172 do { 3173 error = getaddrinfo((char *)hostname, 3174 (char *)port, 3175 &hints, 3176 &ai); 3177 count += 1; 3178 if (error == EAI_AGAIN) { 3179 if (debug) { 3180 fprintf(where,"Sleeping on getaddrinfo EAI_AGAIN\n"); 3181 fflush(where); 3182 } 3183 sleep(1); 3184 } 3185 } while ((error == EAI_AGAIN) && (count <= 5)); 3186 3187 if (error) { 3188 printf("%s: could not resolve host '%s' port '%s' af %s" 3189 "\n\tgetaddrinfo returned %d %s\n", 3190 __FUNCTION__, 3191 hostname, 3192 port, 3193 inet_ftos(family), 3194 error, 3195 gai_strerror(error)); 3196 return(NULL); 3197 } 3198 3199 if (debug) { 3200 dump_addrinfo(where, ai, hostname, port, family); 3201 } 3202 3203 return (ai); 3204} 3205 3206/* 3207 establish_control() 3208 3209 set-up the control connection between netperf and the netserver so 3210 we can actually run some tests. if we cannot establish the control 3211 connection, that may or may not be a good thing, so we will let the 3212 caller decide what to do. 3213 3214 to assist with pesky end-to-end-unfriendly things like firewalls, we 3215 allow the caller to specify both the remote hostname and port, and 3216 the local addressing info. i believe that in theory it is possible 3217 3218 another, but for the time being, we are only going to take-in one 3219 requested address family parameter. this means that the only way 3220 (iirc) that we might get a mixed-mode connection would be if the 3221 address family is specified as AF_UNSPEC, and getaddrinfo() returns 3222 different families for the local and server names. 3223 3224 the "names" can also be IP addresses in ASCII string form. 3225 3226 raj 2003-02-27 */ 3227 3228SOCKET 3229establish_control_internal(char *hostname, 3230 char *port, 3231 int remfam, 3232 char *localhost, 3233 char *localport, 3234 int locfam) 3235{ 3236 int not_connected; 3237 SOCKET control_sock; 3238 3239 struct addrinfo *local_res; 3240 struct addrinfo *remote_res; 3241 struct addrinfo *local_res_temp; 3242 struct addrinfo *remote_res_temp; 3243 3244 remote_res = resolve_host(hostname, port, remfam); 3245 if (!remote_res) 3246 return(INVALID_SOCKET); 3247 3248 local_res = resolve_host(localhost, localport, locfam); 3249 if (!local_res) 3250 return(INVALID_SOCKET); 3251 3252 if (debug) { 3253 fprintf(where, 3254 "establish_control called with host '%s' port '%s' remfam %s\n" 3255 "\t\tlocal '%s' port '%s' locfam %s\n", 3256 hostname, 3257 port, 3258 inet_ftos(remfam), 3259 localhost, 3260 localport, 3261 inet_ftos(locfam)); 3262 fflush(where); 3263 } 3264 3265 not_connected = 1; 3266 local_res_temp = local_res; 3267 remote_res_temp = remote_res; 3268 /* we want to loop through all the possibilities. looping on the 3269 local addresses will be handled within the while loop. I suppose 3270 these is some more "C-expert" way to code this, but it has not 3271 lept to mind just yet :) raj 2003-02024 */ 3272 3273 while (remote_res_temp != NULL) { 3274 3275 /* I am guessing that we should use the address family of the 3276 local endpoint, and we will not worry about mixed family types 3277 - presumeably the stack or other transition mechanisms will be 3278 able to deal with that for us. famous last words :) raj 3279 2003-02-26 */ 3280 control_sock = socket(local_res_temp->ai_family, 3281 SOCK_STREAM, 3282 0); 3283 if (control_sock == INVALID_SOCKET) { 3284 /* at some point we'll need a more generic "display error" 3285 message for when/if we use GUIs and the like. unlike a bind 3286 or connect failure, failure to allocate a socket is 3287 "immediately fatal" and so we return to the caller. raj 3288 2003-02-24 */ 3289 if (debug) { 3290 perror("establish_control: unable to allocate control socket"); 3291 } 3292 return(INVALID_SOCKET); 3293 } 3294 3295 /* if we are going to control the local enpoint addressing, we 3296 need to call bind. of course, we should probably be setting one 3297 of the SO_REUSEmumble socket options? raj 2005-02-04 */ 3298 if (bind(control_sock, 3299 local_res_temp->ai_addr, 3300 local_res_temp->ai_addrlen) == 0) { 3301 if (debug) { 3302 fprintf(where, 3303 "bound control socket to %s and %s\n", 3304 localhost, 3305 localport); 3306 } 3307 3308 if (connect(control_sock, 3309 remote_res_temp->ai_addr, 3310 remote_res_temp->ai_addrlen) == 0) { 3311 /* we have successfully connected to the remote netserver */ 3312 if (debug) { 3313 fprintf(where, 3314 "successful connection to remote netserver at %s and %s\n", 3315 hostname, 3316 port); 3317 } 3318 not_connected = 0; 3319 /* this should get us out of the while loop */ 3320 break; 3321 } else { 3322 /* the connect call failed */ 3323 if (debug) { 3324 fprintf(where, 3325 "establish_control: connect failed, errno %d %s\n" 3326 " trying next address combination\n", 3327 errno, 3328 strerror(errno)); 3329 fflush(where); 3330 } 3331 } 3332 } 3333 else { 3334 /* the bind failed */ 3335 if (debug) { 3336 fprintf(where, 3337 "establish_control: bind failed, errno %d %s\n" 3338 " trying next address combination\n", 3339 errno, 3340 strerror(errno)); 3341 fflush(where); 3342 } 3343 } 3344 3345 if ((local_res_temp = local_res_temp->ai_next) == NULL) { 3346 /* wrap the local and move to the next server, don't forget to 3347 close the current control socket. raj 2003-02-24 */ 3348 local_res_temp = local_res; 3349 /* the outer while conditions will deal with the case when we 3350 get to the end of all the possible remote addresses. */ 3351 remote_res_temp = remote_res_temp->ai_next; 3352 /* it is simplest here to just close the control sock. since 3353 this is not a performance critical section of code, we 3354 don't worry about overheads for socket allocation or 3355 close. raj 2003-02-24 */ 3356 } 3357 close(control_sock); 3358 } 3359 3360 control_family = local_res_temp->ai_family; 3361 3362 /* we no longer need the addrinfo stuff */ 3363 freeaddrinfo(local_res); 3364 freeaddrinfo(remote_res); 3365 3366 /* so, we are either connected or not */ 3367 if (not_connected) { 3368 fprintf(where, 3369 "establish control: are you sure there is a netserver " 3370 "listening on %s at port %s?\n", 3371 hostname, 3372 port); 3373 fflush(where); 3374 control_family = AF_UNSPEC; 3375 return(INVALID_SOCKET); 3376 } 3377 /* at this point, we are connected. we probably want some sort of 3378 version check with the remote at some point. raj 2003-02-24 */ 3379 return(control_sock); 3380} 3381 3382void 3383establish_control(char *hostname, 3384 char *port, 3385 int remfam, 3386 char *localhost, 3387 char *localport, 3388 int locfam) 3389 3390{ 3391 3392 netlib_control = establish_control_internal(hostname, 3393 port, 3394 remfam, 3395 localhost, 3396 localport, 3397 locfam); 3398 if (netlib_control == INVALID_SOCKET) { 3399 fprintf(where, 3400 "establish_control could not establish the control" 3401 " connection from %s port %s address family %s to %s" 3402 " port %s address family %s\n", 3403 localhost,localport,inet_ftos(locfam), 3404 hostname,port,inet_ftos(remfam)); 3405 fflush(where); 3406 exit(INVALID_SOCKET); 3407 } 3408} 3409 3410 3411 3412 3413 /***********************************************************************/ 3414 /* */ 3415 /* get_id() */ 3416 /* */ 3417 /* Return a string to the calling routine that contains the */ 3418 /* identifying information for the host we are running on. This */ 3419 /* information will then either be displayed locally, or returned to */ 3420 /* a remote caller for display there. */ 3421 /* */ 3422 /***********************************************************************/ 3423 3424char * 3425get_id() 3426{ 3427 static char id_string[80]; 3428#ifdef WIN32 3429char system_name[MAX_COMPUTERNAME_LENGTH+1] ; 3430DWORD name_len = MAX_COMPUTERNAME_LENGTH + 1 ; 3431#else 3432struct utsname system_name; 3433#endif /* WIN32 */ 3434 3435#ifdef WIN32 3436 SYSTEM_INFO SystemInfo; 3437 GetSystemInfo( &SystemInfo ) ; 3438 if ( !GetComputerName(system_name , &name_len) ) 3439 strcpy(system_name , "no_name") ; 3440#else 3441 if (uname(&system_name) <0) { 3442 perror("identify_local: uname"); 3443 exit(1); 3444 } 3445#endif /* WIN32 */ 3446 3447 snprintf(id_string, sizeof(id_string), 3448#ifdef WIN32 3449 "%-15s%-15s%d.%d%d", 3450 "Windows NT", 3451 system_name , 3452 GetVersion() & 0xFF , 3453 GetVersion() & 0xFF00 , 3454 SystemInfo.dwProcessorType 3455 3456#else 3457 "%-15s%-15s%-15s%-15s%-15s", 3458 system_name.sysname, 3459 system_name.nodename, 3460 system_name.release, 3461 system_name.version, 3462 system_name.machine 3463#endif /* WIN32 */ 3464 ); 3465 return (id_string); 3466} 3467 3468 3469 /***********************************************************************/ 3470 /* */ 3471 /* identify_local() */ 3472 /* */ 3473 /* Display identifying information about the local host to the user. */ 3474 /* At first release, this information will be the same as that which */ 3475 /* is returned by the uname -a command, with the exception of the */ 3476 /* idnumber field, which seems to be a non-POSIX item, and hence */ 3477 /* non-portable. */ 3478 /* */ 3479 /***********************************************************************/ 3480 3481void 3482identify_local() 3483{ 3484 3485char *local_id; 3486 3487local_id = get_id(); 3488 3489fprintf(where,"Local Information \n\ 3490Sysname Nodename Release Version Machine\n"); 3491 3492fprintf(where,"%s\n", 3493 local_id); 3494 3495} 3496 3497 3498 /***********************************************************************/ 3499 /* */ 3500 /* identify_remote() */ 3501 /* */ 3502 /* Display identifying information about the remote host to the user. */ 3503 /* At first release, this information will be the same as that which */ 3504 /* is returned by the uname -a command, with the exception of the */ 3505 /* idnumber field, which seems to be a non-POSIX item, and hence */ 3506 /* non-portable. A request is sent to the remote side, which will */ 3507 /* return a string containing the utsname information in a */ 3508 /* pre-formatted form, which is then displayed after the header. */ 3509 /* */ 3510 /***********************************************************************/ 3511 3512void 3513identify_remote() 3514{ 3515 3516 char *remote_id=""; 3517 3518 /* send a request for node info to the remote */ 3519 netperf_request.content.request_type = NODE_IDENTIFY; 3520 3521 send_request(); 3522 3523 /* and now wait for the reply to come back */ 3524 3525 recv_response(); 3526 3527 if (netperf_response.content.serv_errno) { 3528 Set_errno(netperf_response.content.serv_errno); 3529 perror("identify_remote: on remote"); 3530 exit(1); 3531 } 3532 3533 fprintf(where,"Remote Information \n\ 3534Sysname Nodename Release Version Machine\n"); 3535 3536 fprintf(where,"%s", 3537 remote_id); 3538} 3539 3540void 3541cpu_start(int measure_cpu) 3542{ 3543 3544 gettimeofday(&time1, 3545 &tz); 3546 3547 if (measure_cpu) { 3548 cpu_util_init(); 3549 measuring_cpu = 1; 3550 cpu_method = get_cpu_method(); 3551 cpu_start_internal(); 3552 } 3553} 3554 3555 3556void 3557cpu_stop(int measure_cpu, float *elapsed) 3558 3559{ 3560 3561 int sec, 3562 usec; 3563 3564 if (measure_cpu) { 3565 cpu_stop_internal(); 3566 cpu_util_terminate(); 3567 } 3568 3569 gettimeofday(&time2, 3570 &tz); 3571 3572 if (time2.tv_usec < time1.tv_usec) { 3573 time2.tv_usec += 1000000; 3574 time2.tv_sec -= 1; 3575 } 3576 3577 sec = time2.tv_sec - time1.tv_sec; 3578 usec = time2.tv_usec - time1.tv_usec; 3579 lib_elapsed = (float)sec + ((float)usec/(float)1000000.0); 3580#ifdef WIN32 3581 if (timed_out) lib_elapsed-=PAD_TIME/2; 3582#endif 3583 *elapsed = lib_elapsed; 3584 3585} 3586 3587 3588double 3589calc_thruput_interval(double units_received,double elapsed) 3590 3591{ 3592 double divisor; 3593 3594 /* We will calculate the thruput in libfmt units/second */ 3595 switch (libfmt) { 3596 case 'K': 3597 divisor = 1024.0; 3598 break; 3599 case 'M': 3600 divisor = 1024.0 * 1024.0; 3601 break; 3602 case 'G': 3603 divisor = 1024.0 * 1024.0 * 1024.0; 3604 break; 3605 case 'k': 3606 divisor = 1000.0 / 8.0; 3607 break; 3608 case 'm': 3609 divisor = 1000.0 * 1000.0 / 8.0; 3610 break; 3611 case 'g': 3612 divisor = 1000.0 * 1000.0 * 1000.0 / 8.0; 3613 break; 3614 case 'x': 3615 case 'b': 3616 case 'B': 3617 divisor = 1.0; 3618 break; 3619 case 'u': 3620 /* latency in microseconds a bit squirrely but we don't want to 3621 really muck with things for the default return statement. 3622 invert transactions per second and multiply to get microseconds 3623 per transaction */ 3624 return (1 / (units_received / elapsed)) * 1000000.0; 3625 3626 default: 3627 divisor = 1024.0; 3628 } 3629 3630 return (units_received / divisor / elapsed); 3631 3632} 3633 3634double 3635calc_thruput(double units_received) 3636 3637{ 3638 return(calc_thruput_interval(units_received,lib_elapsed)); 3639} 3640 3641/* these "_omni" versions are ones which understand 'x' as a unit, 3642 meaning transactions/s. we have a separate routine rather than 3643 convert the existing routine so we don't have to go and change 3644 _all_ the nettest_foo.c files at one time. raj 2007-06-08 */ 3645 3646double 3647calc_thruput_interval_omni(double units_received,double elapsed) 3648 3649{ 3650 double divisor; 3651 3652 /* We will calculate the thruput in libfmt units/second */ 3653 switch (libfmt) { 3654 case 'K': 3655 divisor = 1024.0; 3656 break; 3657 case 'M': 3658 divisor = 1024.0 * 1024.0; 3659 break; 3660 case 'G': 3661 divisor = 1024.0 * 1024.0 * 1024.0; 3662 break; 3663 case 'k': 3664 divisor = 1000.0 / 8.0; 3665 break; 3666 case 'm': 3667 divisor = 1000.0 * 1000.0 / 8.0; 3668 break; 3669 case 'g': 3670 divisor = 1000.0 * 1000.0 * 1000.0 / 8.0; 3671 break; 3672 case 'x': 3673 case 'b': 3674 case 'B': 3675 divisor = 1.0; 3676 break; 3677 case 'u': 3678 /* latency in microseconds a bit squirrely but we don't want to 3679 really muck with things for the default return statement. 3680 invert transactions per second and multiply to get microseconds 3681 per transaction */ 3682 return (1 / (units_received / elapsed)) * 1000000.0; 3683 3684 default: 3685 fprintf(where, 3686 "WARNING calc_throughput_internal_omni: unknown units %c\n", 3687 libfmt); 3688 fflush(where); 3689 divisor = 1024.0; 3690 } 3691 3692 return (units_received / divisor / elapsed); 3693 3694} 3695 3696double 3697calc_thruput_omni(double units_received) 3698 3699{ 3700 return(calc_thruput_interval_omni(units_received,lib_elapsed)); 3701} 3702 3703 3704 3705 3706 3707float 3708calc_cpu_util(float elapsed_time) 3709{ 3710 float temp_util; 3711 int i; 3712 temp_util = calc_cpu_util_internal(elapsed_time); 3713 3714 /* now, what was the most utilized CPU and its util? */ 3715 for (i = 0; i < MAXCPUS; i++) { 3716 if (lib_local_per_cpu_util[i] > lib_local_cpu_stats.peak_cpu_util) { 3717 lib_local_cpu_stats.peak_cpu_util = lib_local_per_cpu_util[i]; 3718 lib_local_cpu_stats.peak_cpu_id = lib_cpu_map[i]; 3719 } 3720 } 3721 3722 return temp_util; 3723} 3724 3725float 3726calc_service_demand_internal(double unit_divisor, 3727 double units_sent, 3728 float elapsed_time, 3729 float cpu_utilization, 3730 int num_cpus) 3731 3732{ 3733 3734 double service_demand; 3735 double thruput; 3736 3737 if (debug) { 3738 fprintf(where, 3739 "calc_service_demand called: units_sent = %f\n" 3740 " elapsed_time = %f\n" 3741 " cpu_util = %f\n" 3742 " num cpu = %d\n", 3743 units_sent, 3744 elapsed_time, 3745 cpu_utilization, 3746 num_cpus); 3747 fflush(where); 3748 } 3749 3750 if (num_cpus == 0) num_cpus = lib_num_loc_cpus; 3751 3752 if (elapsed_time == 0.0) { 3753 elapsed_time = lib_elapsed; 3754 } 3755 if (cpu_utilization == 0.0) { 3756 cpu_utilization = lib_local_cpu_stats.cpu_util; 3757 } 3758 3759 thruput = (units_sent / 3760 (double) unit_divisor / 3761 (double) elapsed_time); 3762 3763 /* on MP systems, it is necessary to multiply the service demand by 3764 the number of CPU's. at least, I believe that to be the case:) 3765 raj 10/95 */ 3766 3767 /* thruput has a "per second" component. if we were using 100% ( 3768 100.0) of the CPU in a second, that would be 1 second, or 1 3769 millisecond, so we multiply cpu_utilization by 10 to go to 3770 milliseconds, or 10,000 to go to micro seconds. With revision 3771 2.1, the service demand measure goes to microseconds per unit. 3772 raj 12/95 */ 3773 service_demand = (cpu_utilization*10000.0/thruput) * 3774 (float) num_cpus; 3775 3776 if (debug) { 3777 fprintf(where, 3778 "calc_service_demand using: units_sent = %f\n" 3779 " elapsed_time = %f\n" 3780 " cpu_util = %f\n" 3781 " num cpu = %d\n" 3782 "calc_service_demand got: thruput = %f\n" 3783 " servdem = %f\n", 3784 units_sent, 3785 elapsed_time, 3786 cpu_utilization, 3787 num_cpus, 3788 thruput, 3789 service_demand); 3790 fflush(where); 3791 } 3792 return (float)service_demand; 3793} 3794 3795float calc_service_demand(double units_sent, 3796 float elapsed_time, 3797 float cpu_utilization, 3798 int num_cpus) 3799 3800{ 3801 3802 double unit_divisor = (double)1024.0; 3803 3804 return(calc_service_demand_internal(unit_divisor, 3805 units_sent, 3806 elapsed_time, 3807 cpu_utilization, 3808 num_cpus)); 3809} 3810 3811/* use the value of libfmt to determine the unit_divisor */ 3812float calc_service_demand_fmt(double units_sent, 3813 float elapsed_time, 3814 float cpu_utilization, 3815 int num_cpus) 3816 3817{ 3818 double unit_divisor; 3819 3820 if ('x' == libfmt) unit_divisor = 1.0; 3821 else unit_divisor = 1024.0; 3822 3823 return(calc_service_demand_internal(unit_divisor, 3824 units_sent, 3825 elapsed_time, 3826 cpu_utilization, 3827 num_cpus)); 3828} 3829 3830 3831 3832float 3833calibrate_local_cpu(float local_cpu_rate) 3834{ 3835 3836 lib_num_loc_cpus = get_num_cpus(); 3837 3838 lib_use_idle = 0; 3839#ifdef USE_LOOPER 3840 cpu_util_init(); 3841 lib_use_idle = 1; 3842#endif /* USE_LOOPER */ 3843 3844 if (local_cpu_rate > 0) { 3845 /* The user think that he knows what the cpu rate is. We assume 3846 that all the processors of an MP system are essentially the 3847 same - for this reason we do not have a per processor maxrate. 3848 if the machine has processors which are different in 3849 performance, the CPU utilization will be skewed. raj 4/95 */ 3850 lib_local_maxrate = local_cpu_rate; 3851 } 3852 else { 3853 /* if neither USE_LOOPER nor USE_PSTAT are defined, we return a 3854 0.0 to indicate that times or getrusage should be used. raj 3855 4/95 */ 3856 lib_local_maxrate = (float)0.0; 3857#if defined(USE_PROC_STAT) || defined(USE_LOOPER) || defined(USE_PSTAT) || defined(USE_KSTAT) || defined(USE_PERFSTAT) || defined(USE_SYSCTL) 3858 lib_local_maxrate = calibrate_idle_rate(4,10); 3859#endif 3860 } 3861 return lib_local_maxrate; 3862} 3863 3864 3865float 3866calibrate_remote_cpu() 3867{ 3868 float remrate; 3869 3870 netperf_request.content.request_type = CPU_CALIBRATE; 3871 send_request(); 3872 /* we know that calibration will last at least 40 seconds, so go to 3873 sleep for that long so the 60 second select in recv_response will 3874 not pop. raj 7/95 */ 3875 3876 /* we know that CPU calibration may last as long as 40 seconds, so 3877 make sure we "select" for at least that long while looking for 3878 the response. raj 2005-05-16 */ 3879 recv_response_timed(40); 3880 3881 if (netperf_response.content.serv_errno) { 3882 /* initially, silently ignore remote errors and pass back a zero 3883 to the caller this should allow us to mix rev 1.0 and rev 1.1 3884 netperfs... */ 3885 return((float)0.0); 3886 } 3887 else { 3888 /* the rate is the first word of the test_specific data */ 3889 bcopy((char *)netperf_response.content.test_specific_data, 3890 (char *)&remrate, 3891 sizeof(remrate)); 3892 bcopy((char *)netperf_response.content.test_specific_data + sizeof(remrate), 3893 (char *)&lib_num_rem_cpus, 3894 sizeof(lib_num_rem_cpus)); 3895/* remrate = (float) netperf_response.content.test_specific_data[0]; */ 3896 return(remrate); 3897 } 3898} 3899 3900 3901 3902#ifndef WIN32 3903/* WIN32 requires that at least one of the file sets to select be 3904 non-null. Since msec_sleep routine is only called by nettest_dlpi 3905 & nettest_unix, let's duck this issue. */ 3906 3907int 3908msec_sleep( int msecs ) 3909{ 3910 int rval ; 3911 3912 struct timeval timeout; 3913 3914 timeout.tv_sec = msecs / 1000; 3915 timeout.tv_usec = (msecs - (msecs/1000) *1000) * 1000; 3916 if ((rval = select(0, 3917 0, 3918 0, 3919 0, 3920 &timeout))) { 3921 if ( SOCKET_EINTR(rval) ) { 3922 return(1); 3923 } 3924 perror("msec_sleep: select"); 3925 exit(1); 3926 } 3927 return(0); 3928} 3929#endif /* WIN32 */ 3930 3931#if defined(WANT_INTERVALS) || defined(WANT_DEMO) 3932 3933int demo_mode; /* are we actually in demo mode? = 0 3934 == not in demo mode; 1 == classic 3935 unit based demo mode; 2 == always 3936 timestamp demo mode */ 3937double demo_interval = 1000000.0; /* what is the desired interval to 3938 display interval results. default 3939 is one second in units of 3940 microseconds */ 3941double demo_units = 0.0; /* what is our current best guess as 3942 to how many work units must be 3943 done to be near the desired 3944 reporting interval? */ 3945 3946double units_this_tick; 3947#endif 3948 3949#ifdef WANT_DEMO 3950#ifdef HAVE_GETHRTIME 3951static hrtime_t demo_one; 3952static hrtime_t demo_two; 3953static hrtime_t *demo_one_ptr = &demo_one; 3954static hrtime_t *demo_two_ptr = &demo_two; 3955static hrtime_t *temp_demo_ptr = &demo_one; 3956#elif defined(WIN32) 3957static LARGE_INTEGER demo_one; 3958static LARGE_INTEGER demo_two; 3959static LARGE_INTEGER *demo_one_ptr = &demo_one; 3960static LARGE_INTEGER *demo_two_ptr = &demo_two; 3961static LARGE_INTEGER *temp_demo_ptr = &demo_one; 3962#else 3963static struct timeval demo_one; 3964static struct timeval demo_two; 3965static struct timeval *demo_one_ptr = &demo_one; 3966static struct timeval *demo_two_ptr = &demo_two; 3967static struct timeval *temp_demo_ptr = &demo_one; 3968#endif 3969 3970void demo_first_timestamp() { 3971 HIST_timestamp(demo_one_ptr); 3972} 3973 3974void demo_reset() { 3975 if (debug) { 3976 fprintf(where, 3977 "Resetting interim results\n"); 3978 fflush(where); 3979 } 3980 units_this_tick = 0; 3981 demo_first_timestamp(); 3982} 3983 3984/* for a _STREAM test, "a" should be lss_size and "b" should be 3985 rsr_size. for a _MAERTS test, "a" should be lsr_size and "b" should 3986 be rss_size. raj 2005-04-06 */ 3987void demo_stream_setup(uint32_t a, uint32_t b) { 3988 if ((demo_mode) && (demo_units == 0)) { 3989 /* take our default value of demo_units to be the larger of 3990 twice the remote's SO_RCVBUF or twice our SO_SNDBUF */ 3991 if (a > b) { 3992 demo_units = 2*a; 3993 } 3994 else { 3995 demo_units = 2*b; 3996 } 3997 } 3998} 3999 4000#ifdef WIN32 4001__forceinline void demo_interval_display(double actual_interval) 4002#else 4003 inline void demo_interval_display(double actual_interval) 4004#endif 4005{ 4006 static int count = 0; 4007 struct timeval now; 4008 4009 gettimeofday(&now,NULL); 4010 switch (netperf_output_mode) { 4011 case HUMAN: 4012 fprintf(where, 4013 "Interim result: %7.2f %s/s over %.3f seconds ending at %ld.%.3ld\n", 4014 calc_thruput_interval(units_this_tick, 4015 actual_interval/1000000.0), 4016 format_units(), 4017 actual_interval/1000000.0, 4018 now.tv_sec, 4019 (long) now.tv_usec/1000); 4020 break; 4021 case CSV: 4022 fprintf(where, 4023 "%7.2f,%s/s,%.3f,%ld.%.3ld\n", 4024 calc_thruput_interval(units_this_tick, 4025 actual_interval/1000000.0), 4026 format_units(), 4027 actual_interval/1000000.0, 4028 now.tv_sec, 4029 (long) now.tv_usec/1000); 4030 break; 4031 case KEYVAL: 4032 fprintf(where, 4033 "NETPERF_INTERIM_RESULT[%d]=%.2f\n" 4034 "NETPERF_UNITS[%d]=%s/s\n" 4035 "NETPERF_INTERVAL[%d]=%.3f\n" 4036 "NETPERF_ENDING[%d]=%ld.%.3ld\n", 4037 count, 4038 calc_thruput_interval(units_this_tick, 4039 actual_interval/1000000.0), 4040 count, 4041 format_units(), 4042 count, 4043 actual_interval/1000000.0, 4044 count, 4045 now.tv_sec, 4046 (long) now.tv_usec/1000); 4047 count += 1; 4048 break; 4049 default: 4050 fprintf(where, 4051 "Hey Ricky you not fine, theres a bug at demo time. Hey Ricky!"); 4052 fflush(where); 4053 exit(-1); 4054 } 4055 fflush(where); 4056} 4057 4058/* this has gotten long enough to warrant being an inline function 4059 rather than a macro, and it has been enough years since all the 4060 important compilers have supported such a construct so it should 4061 not be a big deal. raj 2012-01-23 */ 4062 4063#ifdef WIN32 4064/* It would seem that the Microsoft compiler will not inline across 4065 source files. So there is little point in having an inline 4066 directive in that situation. Of course that makes me wonder if an 4067 inline directive has to appear in netlib.h... */ 4068void demo_interval_tick(uint32_t units) 4069#else 4070 inline void demo_interval_tick(uint32_t units) 4071#endif 4072{ 4073 double actual_interval = 0.0; 4074 4075 switch (demo_mode) { 4076 case 0: 4077 return; 4078 case 1: /* use the unit accumulation first */ 4079 units_this_tick += units; 4080 if (units_this_tick >= demo_units) { 4081 /* time to possibly update demo_units and maybe output an 4082 interim result */ 4083 HIST_timestamp(demo_two_ptr); 4084 actual_interval = delta_micro(demo_one_ptr,demo_two_ptr); 4085 /* we always want to fine-tune demo_units here whether we emit 4086 an interim result or not. if we are short, this will 4087 lengthen demo_units. if we are long, this will shorten it */ 4088 demo_units = demo_units * (demo_interval / actual_interval); 4089 } 4090 else 4091 return; 4092 break; 4093 case 2: /* Always timestamp */ 4094 units_this_tick += units; 4095 HIST_timestamp(demo_two_ptr); 4096 actual_interval = delta_micro(demo_one_ptr,demo_two_ptr); 4097 4098 break; 4099 default: 4100 fprintf(where, 4101 "Unexpected value of demo_mode of %d. Please report this as a bug.\n", 4102 demo_mode); 4103 fflush(where); 4104 exit(-1); 4105 } 4106 4107 4108 4109 /* units == 0 will be when we have completed a test. we want to 4110 emit a final interim results if there is anything to report */ 4111 if (actual_interval >= demo_interval) { 4112 /* time to emit an interim result, giving the current time to the 4113 millisecond for compatability with RRD */ 4114 demo_interval_display(actual_interval); 4115 units_this_tick = 0.0; 4116 /* now get a new starting timestamp. we could be clever 4117 and swap pointers - the math we do probably does not 4118 take all that long, but for now this will suffice */ 4119 temp_demo_ptr = demo_one_ptr; 4120 demo_one_ptr = demo_two_ptr; 4121 demo_two_ptr = temp_demo_ptr; 4122 4123 } 4124} 4125 4126void demo_interval_final() { 4127 double actual_interval; 4128 4129 switch (demo_mode) { 4130 case 0: 4131 return; 4132 case 1: 4133 case 2: 4134 if (units_this_tick > 0.0) { 4135 HIST_timestamp(demo_two_ptr); 4136 actual_interval = delta_micro(demo_one_ptr,demo_two_ptr); 4137 demo_interval_display(actual_interval); 4138 units_this_tick = 0.0; 4139 } 4140 } 4141} 4142 4143void demo_stream_interval(uint32_t units) { 4144 demo_interval_tick(units); 4145} 4146 4147void demo_rr_setup(uint32_t a) { 4148 if ((demo_mode) && (demo_units == 0)) { 4149 /* take whatever we are given */ 4150 demo_units = a; 4151 } 4152} 4153 4154void demo_rr_interval(uint32_t units) { 4155 demo_interval_tick(units); 4156} 4157 4158#endif 4159 4160/* hist.c 4161 4162 Given a time difference in microseconds, increment one of 61 4163 different buckets: 4164 4165 0 - 9 in increments of 1 usec 4166 0 - 9 in increments of 10 usecs 4167 0 - 9 in increments of 100 usecs 4168 1 - 9 in increments of 1 msec 4169 1 - 9 in increments of 10 msecs 4170 1 - 9 in increments of 100 msecs 4171 1 - 9 in increments of 1 sec 4172 1 - 9 in increments of 10 sec 4173 > 100 secs 4174 4175 This will allow any time to be recorded to within an accuracy of 4176 10%, and provides a compact representation for capturing the 4177 distribution of a large number of time differences (e.g. 4178 request-response latencies). 4179 4180 Colin Low 10/6/93 4181 Rick Jones 2004-06-15 extend to unit and ten usecs 4182*/ 4183 4184/* #include "sys.h" */ 4185 4186/*#define HIST_TEST*/ 4187 4188HIST 4189HIST_new_n(int max_outstanding) { 4190 HIST h; 4191 4192 if((h = (HIST) malloc(sizeof(struct histogram_struct))) == NULL) { 4193 perror("HIST_new_n - histogram_struct malloc failed"); 4194 exit(1); 4195 } 4196 HIST_clear(h); 4197 4198 /* we never want to have a full queue, so will trade a little space 4199 for that. one day we may still have to check for a full queue */ 4200 h->limit = max_outstanding + 1; 4201 4202 /* now allocate the time_ones based on h->limit */ 4203#ifdef HAVE_GETHRTIME 4204 h->time_ones = (hrtime_t *) malloc(h->limit * sizeof(hrtime_t)); 4205#elif HAVE_GET_HRT 4206 h->time_ones = (hrt_t *) malloc(h->limit * sizeof(hrt_t)); 4207#elif defined(WIN32) 4208 h->time_ones = (LARGE_INTEGER *) malloc(h->limit * 4209 sizeof(LARGE_INTEGER)); 4210#else 4211 h->time_ones = (struct timeval *) malloc(h->limit * 4212 sizeof(struct timeval)); 4213#endif /* HAVE_GETHRTIME */ 4214 if (h->time_ones == NULL) { 4215 perror("HIST_new_n - time_ones malloc failed"); 4216 exit(1); 4217 } 4218 4219 return h; 4220} 4221 4222HIST 4223HIST_new(void){ 4224 return HIST_new_n(0); 4225} 4226 4227 4228void 4229HIST_clear(HIST h){ 4230 int i; 4231 for(i = 0; i < HIST_NUM_OF_BUCKET; i++){ 4232 h->unit_usec[i] = 0; 4233 h->ten_usec[i] = 0; 4234 h->hundred_usec[i] = 0; 4235 h->unit_msec[i] = 0; 4236 h->ten_msec[i] = 0; 4237 h->hundred_msec[i] = 0; 4238 h->unit_sec[i] = 0; 4239 h->ten_sec[i] = 0; 4240 } 4241 h->ridiculous = 0; 4242 h->total = 0; 4243 h->sum = 0; 4244 h->sumsquare = 0; 4245 h->hmin = 0; 4246 h->hmax = 0; 4247 h->limit = 0; 4248 h->count = 0; 4249 h->producer = 0; 4250 h->consumer = 0; 4251 h->time_ones = NULL; 4252} 4253 4254void 4255HIST_purge(HIST h) { 4256 h->count = 0; 4257 h->producer = 0; 4258 h->consumer = 0; 4259} 4260 4261void 4262HIST_add(register HIST h, int time_delta){ 4263 register float val; 4264 register int base = HIST_NUM_OF_BUCKET / 10; 4265 4266 /* check for < 0 added via VMware ESX patches. */ 4267 4268 /* hoisted up to the top because we do not want to count any 4269 ridiculous values in the actual statistics. right? raj 4270 2011-07-28 */ 4271 if (time_delta < 0) { 4272 h->ridiculous++; 4273 return; 4274 } 4275 4276 if (!h->total) 4277 h->hmin = h->hmax = time_delta; 4278 h->total++; 4279 h->sum += time_delta; 4280 /* am I just being paranoid about the overhead of pow() when we 4281 aren't all that interested in the statistics derived from it? 4282 raj 20100914 */ 4283 if (keep_statistics) { 4284 h->sumsquare += pow(time_delta, 2); 4285 } 4286 h->hmin = ((h->hmin < time_delta) ? h->hmin : time_delta); 4287 h->hmax = ((h->hmax > time_delta) ? h->hmax : time_delta); 4288 val = (float) time_delta; 4289 if(val < 10) h->unit_usec[(int)(val * base)]++; 4290 else { 4291 val /= 10; 4292 if(val < 10) h->ten_usec[(int)(val * base)]++; 4293 else { 4294 val /= 10; 4295 if(val < 10) h->hundred_usec[(int)(val * base)]++; 4296 else { 4297 val /= 10; 4298 if(val < 10) h->unit_msec[(int)(val * base)]++; 4299 else { 4300 val /= 10; 4301 if(val < 10) h->ten_msec[(int)(val * base)]++; 4302 else { 4303 val /= 10; 4304 if(val < 10) h->hundred_msec[(int)(val * base)]++; 4305 else { 4306 val /= 10; 4307 if(val < 10) h->unit_sec[(int)(val * base)]++; 4308 else { 4309 val /= 10; 4310 if(val < 10) h->ten_sec[(int)(val * base)]++; 4311 else h->ridiculous++; 4312 } 4313 } 4314 } 4315 } 4316 } 4317 } 4318 } 4319} 4320 4321void 4322output_row(FILE *fd, char *title, int *row){ 4323 register int i; 4324 register int j; 4325 register int base = HIST_NUM_OF_BUCKET / 10; 4326 register int sum; 4327 fprintf(where,"%s", title); 4328 for(i = 0; i < 10; i++){ 4329 sum = 0; 4330 for (j = i * base; j < (i + 1) * base; j++) { 4331 sum += row[j]; 4332 } 4333 fprintf(where,": %4d", sum); 4334 } 4335 fprintf(where,"\n"); 4336} 4337 4338int 4339sum_row(int *row) { 4340 int sum = 0; 4341 int i; 4342 for (i = 0; i < HIST_NUM_OF_BUCKET; i++) sum += row[i]; 4343 return(sum); 4344} 4345 4346void 4347HIST_report(HIST h){ 4348#ifndef OLD_HISTOGRAM 4349 output_row(stdout, "UNIT_USEC ", h->unit_usec); 4350 output_row(stdout, "TEN_USEC ", h->ten_usec); 4351 output_row(stdout, "HUNDRED_USEC ", h->hundred_usec); 4352#else 4353 h->hundred_usec[0] += sum_row(h->unit_usec); 4354 h->hundred_usec[0] += sum_row(h->ten_usec); 4355 output_row(stdout, "TENTH_MSEC ", h->hundred_usec); 4356#endif 4357 output_row(stdout, "UNIT_MSEC ", h->unit_msec); 4358 output_row(stdout, "TEN_MSEC ", h->ten_msec); 4359 output_row(stdout, "HUNDRED_MSEC ", h->hundred_msec); 4360 output_row(stdout, "UNIT_SEC ", h->unit_sec); 4361 output_row(stdout, "TEN_SEC ", h->ten_sec); 4362 fprintf(where,">100_SECS: %d\n", h->ridiculous); 4363 fprintf(where,"HIST_TOTAL: %d\n", h->total); 4364 if (debug) { 4365 fprintf(where, 4366 "sum %"PRIi64", sumsquare %f, limit %d count %d\n", 4367 h->sum, 4368 h->sumsquare, 4369 h->limit, 4370 h->count); 4371 } 4372} 4373 4374/* search buckets for each unit */ 4375int 4376HIST_search_bucket(int *unit, int num, int *last, int *current, double scale){ 4377 int base = HIST_NUM_OF_BUCKET / 10; 4378 int i; 4379 for (i = 0; i < HIST_NUM_OF_BUCKET; i++){ 4380 *last = *current; 4381 *current += unit[i]; 4382 if (*current >= num) 4383 return (int)((i + (double)(num - *last)/(*current - *last)) * scale/base); 4384 } 4385 return 0; 4386} 4387 4388/* get percentile from histogram */ 4389int 4390HIST_get_percentile(HIST h, const double percentile){ 4391 double win_kludge = percentile * (double) h->total; 4392 int num = (int) win_kludge; 4393 int last = 0; 4394 int current = 0; 4395 int result; 4396 4397 if (!num) 4398 return 0; 4399 4400 /* search in unit usec range */ 4401 result = HIST_search_bucket(h->unit_usec, num, &last, ¤t, 1e0); 4402 if (result) 4403 return result; 4404 4405 /* search in ten usec range */ 4406 result = HIST_search_bucket(h->ten_usec, num, &last, ¤t, 1e1); 4407 if (result) 4408 return result; 4409 4410 /* search in ten hundred usec range */ 4411 result = HIST_search_bucket(h->hundred_usec, num, &last, ¤t, 1e2); 4412 if (result) 4413 return result; 4414 4415 /* search in unic msec range */ 4416 result = HIST_search_bucket(h->unit_msec, num, &last, ¤t, 1e3); 4417 if (result) 4418 return result; 4419 4420 /* search in ten msec range */ 4421 result = HIST_search_bucket(h->ten_msec, num, &last, ¤t, 1e4); 4422 if (result) 4423 return result; 4424 4425 /* search in hundred msec range */ 4426 result = HIST_search_bucket(h->hundred_msec, num, &last, ¤t, 1e5); 4427 if (result) 4428 return result; 4429 4430 /* search in unit sec range */ 4431 result = HIST_search_bucket(h->unit_sec, num, &last, ¤t, 1e6); 4432 if (result) 4433 return result; 4434 4435 /* search in ten sec range */ 4436 result = HIST_search_bucket(h->ten_sec, num, &last, ¤t, 1e7); 4437 if (result) 4438 return result; 4439 4440 return (int)(1e8); 4441} 4442 4443 4444/* get basic stats */ 4445void 4446HIST_get_stats(HIST h, int *min, int *max, double *mean, double *stddev){ 4447 *min = h->hmin; 4448 *max = h->hmax; 4449 if (h->total){ 4450 *mean = (double)h->sum / (double)h->total; 4451 *stddev = (h->sumsquare * h->total - pow((double)h->sum, 2)) / 4452 pow(h->total, 2); 4453 *stddev = sqrt(*stddev); 4454 } 4455 else{ 4456 *mean = 0; 4457 *stddev = 0; 4458 } 4459} 4460 4461 4462/* with the advent of sit-and-spin intervals support, we might as well 4463 make these things available all the time, not just for demo or 4464 histogram modes. raj 2006-02-06 */ 4465#ifdef HAVE_GETHRTIME 4466 4467void 4468HIST_timestamp(hrtime_t *timestamp) 4469{ 4470 *timestamp = gethrtime(); 4471} 4472 4473int 4474delta_micro(hrtime_t *begin, hrtime_t *end) 4475{ 4476 long nsecs; 4477 nsecs = (*end) - (*begin); 4478 return(nsecs/1000); 4479} 4480 4481#elif defined(HAVE_GET_HRT) 4482#include "hrt.h" 4483 4484void 4485HIST_timestamp(hrt_t *timestamp) 4486{ 4487 *timestamp = get_hrt(); 4488} 4489 4490int 4491delta_micro(hrt_t *begin, hrt_t *end) 4492{ 4493 4494 return((int)get_hrt_delta(*end,*begin)); 4495 4496} 4497#elif defined(WIN32) 4498void HIST_timestamp(LARGE_INTEGER *timestamp) 4499{ 4500 QueryPerformanceCounter(timestamp); 4501} 4502 4503int delta_micro(LARGE_INTEGER *begin, LARGE_INTEGER *end) 4504{ 4505 LARGE_INTEGER DeltaTimestamp; 4506 static LARGE_INTEGER TickHz = {{0,0}}; 4507 4508 if (TickHz.QuadPart == 0) 4509 { 4510 QueryPerformanceFrequency(&TickHz); 4511 } 4512 4513 /*+*+ Rick; this will overflow after ~2000 seconds, is that 4514 good enough? Spencer: Yes, that should be more than good 4515 enough for histogram support */ 4516 4517 DeltaTimestamp.QuadPart = (end->QuadPart - begin->QuadPart) * 4518 1000000/TickHz.QuadPart; 4519 assert((DeltaTimestamp.HighPart == 0) && 4520 ((int)DeltaTimestamp.LowPart >= 0)); 4521 4522 return (int)DeltaTimestamp.LowPart; 4523} 4524 4525#else 4526 4527void 4528HIST_timestamp(struct timeval *timestamp) 4529{ 4530 gettimeofday(timestamp,NULL); 4531} 4532 4533 /* return the difference (in micro seconds) between two timeval */ 4534 /* timestamps */ 4535int 4536delta_micro(struct timeval *begin,struct timeval *end) 4537 4538{ 4539 4540 int usecs, secs; 4541 4542 if (end->tv_usec < begin->tv_usec) { 4543 /* borrow a second from the tv_sec */ 4544 end->tv_usec += 1000000; 4545 end->tv_sec--; 4546 } 4547 usecs = end->tv_usec - begin->tv_usec; 4548 secs = end->tv_sec - begin->tv_sec; 4549 4550 usecs += (secs * 1000000); 4551 4552 return(usecs); 4553 4554} 4555#endif /* HAVE_GETHRTIME */ 4556 4557void 4558HIST_timestamp_start(HIST h) { 4559 4560 if (NULL == h) { 4561 fprintf(where,"HIST_timestamp_start called with NULL histogram\n"); 4562 fflush(where); 4563 exit(-1); 4564 } 4565 if (h->count == h->limit) { 4566 fprintf(where,"HIST_timestamp_start called with full time_ones\n"); 4567 } 4568 4569 HIST_timestamp(&(h->time_ones[h->producer])); 4570 h->producer += 1; 4571 h->producer %= h->limit; 4572 h->count += 1; 4573 4574 4575} 4576 4577/* snap an ending timestamp and add the delta to the histogram */ 4578void 4579HIST_timestamp_stop_add(HIST h) { 4580 4581 if (NULL == h) { 4582 fprintf(where,"HIST_timestamp_stop called with NULL histogram\n"); 4583 fflush(where); 4584 exit(-1); 4585 } 4586 4587 if (h->consumer == h->producer) { 4588 fprintf(where, 4589 "HIST_timestamp_stop called with empty time_ones consumer %d producer %d\n", 4590 h->consumer, 4591 h->producer); 4592 fflush(where); 4593 exit(-1); 4594 } 4595 /* take our stopping timestamp */ 4596 HIST_timestamp(&(h->time_two)); 4597 4598 /* now add it */ 4599 HIST_add(h,delta_micro(&(h->time_ones[h->consumer]),&(h->time_two))); 4600 h->consumer += 1; 4601 h->consumer %= h->limit; 4602 h->count -= 1; 4603 4604} 4605 4606 4607 4608/* these routines for confidence intervals are courtesy of IBM. They 4609 have been modified slightly for more general usage beyond TCP/UDP 4610 tests. raj 11/94 I would suspect that this code carries an IBM 4611 copyright that is much the same as that for the original HP netperf 4612 code */ 4613int confidence_iterations; /* for iterations */ 4614 4615double 4616 result_confid=-10.0, 4617 loc_cpu_confid=-10.0, 4618 rem_cpu_confid=-10.0, 4619 4620 measured_sum_result=0.0, 4621 measured_square_sum_result=0.0, 4622 measured_mean_result=0.0, 4623 measured_var_result=0.0, 4624 4625 measured_sum_local_cpu=0.0, 4626 measured_square_sum_local_cpu=0.0, 4627 measured_mean_local_cpu=0.0, 4628 measured_var_local_cpu=0.0, 4629 4630 measured_sum_remote_cpu=0.0, 4631 measured_square_sum_remote_cpu=0.0, 4632 measured_mean_remote_cpu=0.0, 4633 measured_var_remote_cpu=0.0, 4634 4635 measured_sum_local_service_demand=0.0, 4636 measured_square_sum_local_service_demand=0.0, 4637 measured_mean_local_service_demand=0.0, 4638 measured_var_local_service_demand=0.0, 4639 4640 measured_sum_remote_service_demand=0.0, 4641 measured_square_sum_remote_service_demand=0.0, 4642 measured_mean_remote_service_demand=0.0, 4643 measured_var_remote_service_demand=0.0, 4644 4645 measured_sum_local_time=0.0, 4646 measured_square_sum_local_time=0.0, 4647 measured_mean_local_time=0.0, 4648 measured_var_local_time=0.0, 4649 4650 measured_mean_remote_time=0.0, 4651 4652 measured_fails, 4653 measured_local_results, 4654 confidence=-10.0; 4655/* interval=0.1; */ 4656 4657/************************************************************************/ 4658/* */ 4659/* Constants for Confidence Intervals */ 4660/* */ 4661/************************************************************************/ 4662void 4663init_stat() 4664{ 4665 measured_sum_result=0.0; 4666 measured_square_sum_result=0.0; 4667 measured_mean_result=0.0; 4668 measured_var_result=0.0; 4669 4670 measured_sum_local_cpu=0.0; 4671 measured_square_sum_local_cpu=0.0; 4672 measured_mean_local_cpu=0.0; 4673 measured_var_local_cpu=0.0; 4674 4675 measured_sum_remote_cpu=0.0; 4676 measured_square_sum_remote_cpu=0.0; 4677 measured_mean_remote_cpu=0.0; 4678 measured_var_remote_cpu=0.0; 4679 4680 measured_sum_local_service_demand=0.0; 4681 measured_square_sum_local_service_demand=0.0; 4682 measured_mean_local_service_demand=0.0; 4683 measured_var_local_service_demand=0.0; 4684 4685 measured_sum_remote_service_demand=0.0; 4686 measured_square_sum_remote_service_demand=0.0; 4687 measured_mean_remote_service_demand=0.0; 4688 measured_var_remote_service_demand=0.0; 4689 4690 measured_sum_local_time=0.0; 4691 measured_square_sum_local_time=0.0; 4692 measured_mean_local_time=0.0; 4693 measured_var_local_time=0.0; 4694 4695 measured_mean_remote_time=0.0; 4696 4697 measured_fails = 0.0; 4698 measured_local_results=0.0, 4699 confidence=-10.0; 4700} 4701 4702/* this routine does a simple table lookup for some statistical 4703 function that I would remember if I stayed awake in my probstats 4704 class... raj 11/94 */ 4705double 4706confid(int level, int freedom) 4707{ 4708double t99[35],t95[35]; 4709 4710 t95[1]=12.706; 4711 t95[2]= 4.303; 4712 t95[3]= 3.182; 4713 t95[4]= 2.776; 4714 t95[5]= 2.571; 4715 t95[6]= 2.447; 4716 t95[7]= 2.365; 4717 t95[8]= 2.306; 4718 t95[9]= 2.262; 4719 t95[10]= 2.228; 4720 t95[11]= 2.201; 4721 t95[12]= 2.179; 4722 t95[13]= 2.160; 4723 t95[14]= 2.145; 4724 t95[15]= 2.131; 4725 t95[16]= 2.120; 4726 t95[17]= 2.110; 4727 t95[18]= 2.101; 4728 t95[19]= 2.093; 4729 t95[20]= 2.086; 4730 t95[21]= 2.080; 4731 t95[22]= 2.074; 4732 t95[23]= 2.069; 4733 t95[24]= 2.064; 4734 t95[25]= 2.060; 4735 t95[26]= 2.056; 4736 t95[27]= 2.052; 4737 t95[28]= 2.048; 4738 t95[29]= 2.045; 4739 t95[30]= 2.042; 4740 4741 t99[1]=63.657; 4742 t99[2]= 9.925; 4743 t99[3]= 5.841; 4744 t99[4]= 4.604; 4745 t99[5]= 4.032; 4746 t99[6]= 3.707; 4747 t99[7]= 3.499; 4748 t99[8]= 3.355; 4749 t99[9]= 3.250; 4750 t99[10]= 3.169; 4751 t99[11]= 3.106; 4752 t99[12]= 3.055; 4753 t99[13]= 3.012; 4754 t99[14]= 2.977; 4755 t99[15]= 2.947; 4756 t99[16]= 2.921; 4757 t99[17]= 2.898; 4758 t99[18]= 2.878; 4759 t99[19]= 2.861; 4760 t99[20]= 2.845; 4761 t99[21]= 2.831; 4762 t99[22]= 2.819; 4763 t99[23]= 2.807; 4764 t99[24]= 2.797; 4765 t99[25]= 2.787; 4766 t99[26]= 2.779; 4767 t99[27]= 2.771; 4768 t99[28]= 2.763; 4769 t99[29]= 2.756; 4770 t99[30]= 2.750; 4771 4772 if(level==95){ 4773 return(t95[freedom]); 4774 } else if(level==99){ 4775 return(t99[freedom]); 4776 } else{ 4777 return(0); 4778 } 4779} 4780 4781void 4782calculate_confidence(int confidence_iterations, 4783 float time, 4784 double result, 4785 float loc_cpu, 4786 float rem_cpu, 4787 float loc_sd, 4788 float rem_sd) 4789{ 4790 4791 if (debug) { 4792 fprintf(where, 4793 "calculate_confidence: itr %d; time %f; res %f\n" 4794 " lcpu %f; rcpu %f\n" 4795 " lsdm %f; rsdm %f\n", 4796 confidence_iterations, 4797 time, 4798 result, 4799 loc_cpu, 4800 rem_cpu, 4801 loc_sd, 4802 rem_sd); 4803 fflush(where); 4804 } 4805 4806 /* the test time */ 4807 measured_sum_local_time += 4808 (double) time; 4809 measured_square_sum_local_time += 4810 (double) time*time; 4811 measured_mean_local_time = 4812 (double) measured_sum_local_time/confidence_iterations; 4813 measured_var_local_time = 4814 (double) measured_square_sum_local_time/confidence_iterations 4815 -measured_mean_local_time*measured_mean_local_time; 4816 4817 /* the test result */ 4818 measured_sum_result += 4819 (double) result; 4820 measured_square_sum_result += 4821 (double) result*result; 4822 measured_mean_result = 4823 (double) measured_sum_result/confidence_iterations; 4824 measured_var_result = 4825 (double) measured_square_sum_result/confidence_iterations 4826 -measured_mean_result*measured_mean_result; 4827 4828 /* local cpu utilization */ 4829 measured_sum_local_cpu += 4830 (double) loc_cpu; 4831 measured_square_sum_local_cpu += 4832 (double) loc_cpu*loc_cpu; 4833 measured_mean_local_cpu = 4834 (double) measured_sum_local_cpu/confidence_iterations; 4835 measured_var_local_cpu = 4836 (double) measured_square_sum_local_cpu/confidence_iterations 4837 -measured_mean_local_cpu*measured_mean_local_cpu; 4838 4839 /* remote cpu util */ 4840 measured_sum_remote_cpu += 4841 (double) rem_cpu; 4842 measured_square_sum_remote_cpu+= 4843 (double) rem_cpu*rem_cpu; 4844 measured_mean_remote_cpu = 4845 (double) measured_sum_remote_cpu/confidence_iterations; 4846 measured_var_remote_cpu = 4847 (double) measured_square_sum_remote_cpu/confidence_iterations 4848 -measured_mean_remote_cpu*measured_mean_remote_cpu; 4849 4850 /* local service demand */ 4851 measured_sum_local_service_demand += 4852 (double) loc_sd; 4853 measured_square_sum_local_service_demand+= 4854 (double) loc_sd*loc_sd; 4855 measured_mean_local_service_demand = 4856 (double) measured_sum_local_service_demand/confidence_iterations; 4857 measured_var_local_service_demand = 4858 (double) measured_square_sum_local_service_demand/confidence_iterations 4859 -measured_mean_local_service_demand*measured_mean_local_service_demand; 4860 4861 /* remote service demand */ 4862 measured_sum_remote_service_demand += 4863 (double) rem_sd; 4864 measured_square_sum_remote_service_demand+= 4865 (double) rem_sd*rem_sd; 4866 measured_mean_remote_service_demand = 4867 (double) measured_sum_remote_service_demand/confidence_iterations; 4868 measured_var_remote_service_demand = 4869 (double) measured_square_sum_remote_service_demand/confidence_iterations 4870 -measured_mean_remote_service_demand*measured_mean_remote_service_demand; 4871 4872 if(confidence_iterations>1){ 4873 result_confid= (double) interval - 4874 2.0 * confid(confidence_level,confidence_iterations-1)* 4875 sqrt(measured_var_result/(confidence_iterations-1.0)) / 4876 measured_mean_result; 4877 4878 loc_cpu_confid= (double) interval - 4879 2.0 * confid(confidence_level,confidence_iterations-1)* 4880 sqrt(measured_var_local_cpu/(confidence_iterations-1.0)) / 4881 measured_mean_local_cpu; 4882 4883 rem_cpu_confid= (double) interval - 4884 2.0 * confid(confidence_level,confidence_iterations-1)* 4885 sqrt(measured_var_remote_cpu/(confidence_iterations-1.0)) / 4886 measured_mean_remote_cpu; 4887 4888 if(debug){ 4889 printf("Conf_itvl %2d: results:%4.1f%% loc_cpu:%4.1f%% rem_cpu:%4.1f%%\n", 4890 confidence_iterations, 4891 (interval-result_confid)*100.0, 4892 (interval-loc_cpu_confid)*100.0, 4893 (interval-rem_cpu_confid)*100.0); 4894 } 4895 4896 /* if the user has requested that we only wait for the result to 4897 be confident rather than the result and CPU util(s) then do 4898 so. raj 2007-08-08 */ 4899 if (!result_confidence_only) { 4900 confidence = min(min(result_confid,loc_cpu_confid),rem_cpu_confid); 4901 } 4902 else { 4903 confidence = result_confid; 4904 } 4905 } 4906} 4907 4908 /* here ends the IBM code */ 4909 4910void 4911retrieve_confident_values(float *elapsed_time, 4912 double *thruput, 4913 float *local_cpu_utilization, 4914 float *remote_cpu_utilization, 4915 float *local_service_demand, 4916 float *remote_service_demand) 4917 4918{ 4919 *elapsed_time = (float)measured_mean_local_time; 4920 *thruput = measured_mean_result; 4921 *local_cpu_utilization = (float)measured_mean_local_cpu; 4922 *remote_cpu_utilization = (float)measured_mean_remote_cpu; 4923 *local_service_demand = (float)measured_mean_local_service_demand; 4924 *remote_service_demand = (float)measured_mean_remote_service_demand; 4925} 4926 4927double 4928get_result_confid() 4929{ 4930 return (double) (100.0 * (interval - result_confid)); 4931} 4932 4933double 4934get_loc_cpu_confid() 4935{ 4936 return (double) (100.0 * (interval - loc_cpu_confid)); 4937} 4938 4939double 4940get_rem_cpu_confid() 4941{ 4942 return (double) (100.0 * (interval - rem_cpu_confid)); 4943} 4944 4945/* display_confidence() is called when we could not achieve the 4946 desired confidence in the results. it will print the achieved 4947 confidence to "where" raj 11/94 */ 4948void 4949display_confidence() 4950 4951{ 4952 fprintf(where, 4953 "!!! WARNING\n" 4954 "!!! Desired confidence was not achieved within " 4955 "the specified iterations.\n" 4956 "!!! This implies that there was variability in " 4957 "the test environment that\n" 4958 "!!! must be investigated before going further.\n" 4959 "!!! Confidence intervals: Throughput : %4.3f%%\n" 4960 "!!! Local CPU util : %4.3f%%\n" 4961 "!!! Remote CPU util : %4.3f%%\n\n", 4962 100.0 * (interval - result_confid), 4963 100.0 * (interval - loc_cpu_confid), 4964 100.0 * (interval - rem_cpu_confid)); 4965} 4966