15eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
25eef716561e904a54f3803f01cd4fd615a6809d9mbligh * Copyright (c) 2004 SuSE, Inc.  All Rights Reserved.
35eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
45eef716561e904a54f3803f01cd4fd615a6809d9mbligh * This program is free software; you can redistribute it and/or modify it
55eef716561e904a54f3803f01cd4fd615a6809d9mbligh * under the terms of version 2 of the GNU General Public License as
65eef716561e904a54f3803f01cd4fd615a6809d9mbligh * published by the Free Software Foundation.
75eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
85eef716561e904a54f3803f01cd4fd615a6809d9mbligh * This program is distributed in the hope that it would be useful, but
95eef716561e904a54f3803f01cd4fd615a6809d9mbligh * WITHOUT ANY WARRANTY; without even the implied warranty of
105eef716561e904a54f3803f01cd4fd615a6809d9mbligh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
115eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
125eef716561e904a54f3803f01cd4fd615a6809d9mbligh * Further, this software is distributed without any warranty that it is
135eef716561e904a54f3803f01cd4fd615a6809d9mbligh * free of the rightful claim of any third person regarding infringement
145eef716561e904a54f3803f01cd4fd615a6809d9mbligh * or the like.  Any license provided herein, whether implied or
155eef716561e904a54f3803f01cd4fd615a6809d9mbligh * otherwise, applies only to this software file.  Patent licenses, if
165eef716561e904a54f3803f01cd4fd615a6809d9mbligh * any, provided herein do not apply to combinations of this program with
175eef716561e904a54f3803f01cd4fd615a6809d9mbligh * other software, or any other product whatsoever.
185eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
195eef716561e904a54f3803f01cd4fd615a6809d9mbligh * You should have received a copy of the GNU General Public License along
205eef716561e904a54f3803f01cd4fd615a6809d9mbligh * with this program; if not, write the Free Software Foundation, Inc., 59
215eef716561e904a54f3803f01cd4fd615a6809d9mbligh * Temple Place - Suite 330, Boston MA 02111-1307, USA.
225eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
235eef716561e904a54f3803f01cd4fd615a6809d9mbligh * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
245eef716561e904a54f3803f01cd4fd615a6809d9mbligh * Mountain View, CA  94043, or:
255eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
265eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
275eef716561e904a54f3803f01cd4fd615a6809d9mbligh * aio-stress
285eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
295eef716561e904a54f3803f01cd4fd615a6809d9mbligh * will open or create each file on the command line, and start a series
305eef716561e904a54f3803f01cd4fd615a6809d9mbligh * of aio to it.
315eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
325eef716561e904a54f3803f01cd4fd615a6809d9mbligh * aio is done in a rotating loop.  first file1 gets 8 requests, then
335eef716561e904a54f3803f01cd4fd615a6809d9mbligh * file2, then file3 etc.  As each file finishes writing, it is switched
345eef716561e904a54f3803f01cd4fd615a6809d9mbligh * to reads
355eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
365eef716561e904a54f3803f01cd4fd615a6809d9mbligh * io buffers are aligned in case you want to do raw io
375eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
385eef716561e904a54f3803f01cd4fd615a6809d9mbligh * compile with gcc -Wall -laio -lpthread -o aio-stress aio-stress.c
395eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
405eef716561e904a54f3803f01cd4fd615a6809d9mbligh * run aio-stress -h to see the options
415eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
425eef716561e904a54f3803f01cd4fd615a6809d9mbligh * Please mail Chris Mason (mason@suse.com) with bug reports or patches
435eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
445eef716561e904a54f3803f01cd4fd615a6809d9mbligh#define _FILE_OFFSET_BITS 64
455eef716561e904a54f3803f01cd4fd615a6809d9mbligh#define PROG_VERSION "0.21"
465eef716561e904a54f3803f01cd4fd615a6809d9mbligh#define NEW_GETEVENTS
475eef716561e904a54f3803f01cd4fd615a6809d9mbligh
485eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <stdio.h>
495eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <errno.h>
505eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <assert.h>
515eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <stdlib.h>
525eef716561e904a54f3803f01cd4fd615a6809d9mbligh
535eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <sys/types.h>
545eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <sys/stat.h>
555eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <fcntl.h>
565eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <unistd.h>
575eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <sys/time.h>
585eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <libaio.h>
595eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <sys/ipc.h>
605eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <sys/shm.h>
615eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <sys/mman.h>
625eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <string.h>
635eef716561e904a54f3803f01cd4fd615a6809d9mbligh#include <pthread.h>
645eef716561e904a54f3803f01cd4fd615a6809d9mbligh
655eef716561e904a54f3803f01cd4fd615a6809d9mbligh#define IO_FREE 0
665eef716561e904a54f3803f01cd4fd615a6809d9mbligh#define IO_PENDING 1
675eef716561e904a54f3803f01cd4fd615a6809d9mbligh#define RUN_FOREVER -1
685eef716561e904a54f3803f01cd4fd615a6809d9mbligh
695eef716561e904a54f3803f01cd4fd615a6809d9mbligh#ifndef O_DIRECT
705eef716561e904a54f3803f01cd4fd615a6809d9mbligh#define O_DIRECT         040000 /* direct disk access hint */
715eef716561e904a54f3803f01cd4fd615a6809d9mbligh#endif
725eef716561e904a54f3803f01cd4fd615a6809d9mbligh
735eef716561e904a54f3803f01cd4fd615a6809d9mblighenum {
745eef716561e904a54f3803f01cd4fd615a6809d9mbligh    WRITE,
755eef716561e904a54f3803f01cd4fd615a6809d9mbligh    READ,
765eef716561e904a54f3803f01cd4fd615a6809d9mbligh    RWRITE,
775eef716561e904a54f3803f01cd4fd615a6809d9mbligh    RREAD,
785eef716561e904a54f3803f01cd4fd615a6809d9mbligh    LAST_STAGE,
795eef716561e904a54f3803f01cd4fd615a6809d9mbligh};
805eef716561e904a54f3803f01cd4fd615a6809d9mbligh
815eef716561e904a54f3803f01cd4fd615a6809d9mbligh#define USE_MALLOC 0
825eef716561e904a54f3803f01cd4fd615a6809d9mbligh#define USE_SHM 1
835eef716561e904a54f3803f01cd4fd615a6809d9mbligh#define USE_SHMFS 2
845eef716561e904a54f3803f01cd4fd615a6809d9mbligh
855eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
865eef716561e904a54f3803f01cd4fd615a6809d9mbligh * various globals, these are effectively read only by the time the threads
875eef716561e904a54f3803f01cd4fd615a6809d9mbligh * are started
885eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
895eef716561e904a54f3803f01cd4fd615a6809d9mblighlong stages = 0;
905eef716561e904a54f3803f01cd4fd615a6809d9mblighunsigned long page_size_mask;
915eef716561e904a54f3803f01cd4fd615a6809d9mblighint o_direct = 0;
925eef716561e904a54f3803f01cd4fd615a6809d9mblighint o_sync = 0;
935eef716561e904a54f3803f01cd4fd615a6809d9mblighint latency_stats = 0;
945eef716561e904a54f3803f01cd4fd615a6809d9mblighint completion_latency_stats = 0;
955eef716561e904a54f3803f01cd4fd615a6809d9mblighint io_iter = 8;
965eef716561e904a54f3803f01cd4fd615a6809d9mblighint iterations = RUN_FOREVER;
975eef716561e904a54f3803f01cd4fd615a6809d9mblighint max_io_submit = 0;
985eef716561e904a54f3803f01cd4fd615a6809d9mblighlong rec_len = 64 * 1024;
995eef716561e904a54f3803f01cd4fd615a6809d9mblighint depth = 64;
1005eef716561e904a54f3803f01cd4fd615a6809d9mblighint num_threads = 1;
1015eef716561e904a54f3803f01cd4fd615a6809d9mblighint num_contexts = 1;
1025eef716561e904a54f3803f01cd4fd615a6809d9mblighoff_t context_offset = 2 * 1024 * 1024;
1035eef716561e904a54f3803f01cd4fd615a6809d9mblighint fsync_stages = 1;
1045eef716561e904a54f3803f01cd4fd615a6809d9mblighint use_shm = 0;
1055eef716561e904a54f3803f01cd4fd615a6809d9mblighint shm_id;
1065eef716561e904a54f3803f01cd4fd615a6809d9mblighchar *unaligned_buffer = NULL;
1075eef716561e904a54f3803f01cd4fd615a6809d9mblighchar *aligned_buffer = NULL;
1085eef716561e904a54f3803f01cd4fd615a6809d9mblighint padded_reclen = 0;
1095eef716561e904a54f3803f01cd4fd615a6809d9mblighint stonewall = 1;
1105eef716561e904a54f3803f01cd4fd615a6809d9mblighint verify = 0;
1115eef716561e904a54f3803f01cd4fd615a6809d9mblighchar *verify_buf = NULL;
1125eef716561e904a54f3803f01cd4fd615a6809d9mblighint unlink_files = 0;
1135eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1145eef716561e904a54f3803f01cd4fd615a6809d9mblighstruct io_unit;
1155eef716561e904a54f3803f01cd4fd615a6809d9mblighstruct thread_info;
1165eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1175eef716561e904a54f3803f01cd4fd615a6809d9mbligh/* pthread mutexes and other globals for keeping the threads in sync */
1185eef716561e904a54f3803f01cd4fd615a6809d9mblighpthread_cond_t stage_cond = PTHREAD_COND_INITIALIZER;
1195eef716561e904a54f3803f01cd4fd615a6809d9mblighpthread_mutex_t stage_mutex = PTHREAD_MUTEX_INITIALIZER;
1205eef716561e904a54f3803f01cd4fd615a6809d9mblighint threads_ending = 0;
1215eef716561e904a54f3803f01cd4fd615a6809d9mblighint threads_starting = 0;
1225eef716561e904a54f3803f01cd4fd615a6809d9mblighstruct timeval global_stage_start_time;
1235eef716561e904a54f3803f01cd4fd615a6809d9mblighstruct thread_info *global_thread_info;
1245eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1255eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
1265eef716561e904a54f3803f01cd4fd615a6809d9mbligh * latencies during io_submit are measured, these are the
1275eef716561e904a54f3803f01cd4fd615a6809d9mbligh * granularities for deviations
1285eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
1295eef716561e904a54f3803f01cd4fd615a6809d9mbligh#define DEVIATIONS 6
1305eef716561e904a54f3803f01cd4fd615a6809d9mblighint deviations[DEVIATIONS] = { 100, 250, 500, 1000, 5000, 10000 };
1315eef716561e904a54f3803f01cd4fd615a6809d9mblighstruct io_latency {
1325eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double max;
1335eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double min;
1345eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double total_io;
1355eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double total_lat;
1365eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double deviations[DEVIATIONS];
1375eef716561e904a54f3803f01cd4fd615a6809d9mbligh};
1385eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1395eef716561e904a54f3803f01cd4fd615a6809d9mbligh/* container for a series of operations to a file */
1405eef716561e904a54f3803f01cd4fd615a6809d9mblighstruct io_oper {
1415eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* already open file descriptor, valid for whatever operation you want */
1425eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int fd;
1435eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1445eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* starting byte of the operation */
1455eef716561e904a54f3803f01cd4fd615a6809d9mbligh    off_t start;
1465eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1475eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* ending byte of the operation */
1485eef716561e904a54f3803f01cd4fd615a6809d9mbligh    off_t end;
1495eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1505eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* size of the read/write buffer */
1515eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int reclen;
1525eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1535eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* max number of pending requests before a wait is triggered */
1545eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int depth;
1555eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1565eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* current number of pending requests */
1575eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int num_pending;
1585eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1595eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* last error, zero if there were none */
1605eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int last_err;
1615eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1625eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* total number of errors hit. */
1635eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int num_err;
1645eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1655eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* read,write, random, etc */
1665eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int rw;
1675eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1685eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* number of ios that will get sent to aio */
1695eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int total_ios;
1705eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1715eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* number of ios we've already sent */
1725eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int started_ios;
1735eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1745eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* last offset used in an io operation */
1755eef716561e904a54f3803f01cd4fd615a6809d9mbligh    off_t last_offset;
1765eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1775eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* stonewalled = 1 when we got cut off before submitting all our ios */
1785eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int stonewalled;
1795eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1805eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* list management */
1815eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_oper *next;
1825eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_oper *prev;
1835eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1845eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct timeval start_time;
1855eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1865eef716561e904a54f3803f01cd4fd615a6809d9mbligh    char *file_name;
1875eef716561e904a54f3803f01cd4fd615a6809d9mbligh};
1885eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1895eef716561e904a54f3803f01cd4fd615a6809d9mbligh/* a single io, and all the tracking needed for it */
1905eef716561e904a54f3803f01cd4fd615a6809d9mblighstruct io_unit {
1915eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* note, iocb must go first! */
1925eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct iocb iocb;
1935eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1945eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* pointer to parent io operation struct */
1955eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_oper *io_oper;
1965eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1975eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* aligned buffer */
1985eef716561e904a54f3803f01cd4fd615a6809d9mbligh    char *buf;
1995eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2005eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* size of the aligned buffer (record size) */
2015eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int buf_size;
2025eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2035eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* state of this io unit (free, pending, done) */
2045eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int busy;
2055eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2065eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* result of last operation */
2075eef716561e904a54f3803f01cd4fd615a6809d9mbligh    long res;
2085eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2095eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_unit *next;
2105eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2115eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct timeval io_start_time;		/* time of io_submit */
2125eef716561e904a54f3803f01cd4fd615a6809d9mbligh};
2135eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2145eef716561e904a54f3803f01cd4fd615a6809d9mblighstruct thread_info {
2155eef716561e904a54f3803f01cd4fd615a6809d9mbligh    io_context_t io_ctx;
2165eef716561e904a54f3803f01cd4fd615a6809d9mbligh    pthread_t tid;
2175eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2185eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* allocated array of io_unit structs */
2195eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_unit *ios;
2205eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2215eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* list of io units available for io */
2225eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_unit *free_ious;
2235eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2245eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* number of io units in the ios array */
2255eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int num_global_ios;
2265eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2275eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* number of io units in flight */
2285eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int num_global_pending;
2295eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2305eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* preallocated array of iocb pointers, only used in run_active */
2315eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct iocb **iocbs;
2325eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2335eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* preallocated array of events */
2345eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_event *events;
2355eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2365eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* size of the events array */
2375eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int num_global_events;
2385eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2395eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* latency stats for io_submit */
2405eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_latency io_submit_latency;
2415eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2425eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* list of operations still in progress, and of those finished */
2435eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_oper *active_opers;
2445eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_oper *finished_opers;
2455eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2465eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* number of files this thread is doing io on */
2475eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int num_files;
2485eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2495eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* how much io this thread did in the last stage */
2505eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double stage_mb_trans;
2515eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2525eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* latency completion stats i/o time from io_submit until io_getevents */
2535eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_latency io_completion_latency;
2545eef716561e904a54f3803f01cd4fd615a6809d9mbligh};
2555eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2565eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
2575eef716561e904a54f3803f01cd4fd615a6809d9mbligh * return seconds between start_tv and stop_tv in double precision
2585eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
2595eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic double time_since(struct timeval *start_tv, struct timeval *stop_tv)
2605eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
2615eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double sec, usec;
2625eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double ret;
2635eef716561e904a54f3803f01cd4fd615a6809d9mbligh    sec = stop_tv->tv_sec - start_tv->tv_sec;
2645eef716561e904a54f3803f01cd4fd615a6809d9mbligh    usec = stop_tv->tv_usec - start_tv->tv_usec;
2655eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (sec > 0 && usec < 0) {
2665eef716561e904a54f3803f01cd4fd615a6809d9mbligh        sec--;
2675eef716561e904a54f3803f01cd4fd615a6809d9mbligh	usec += 1000000;
2685eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
2695eef716561e904a54f3803f01cd4fd615a6809d9mbligh    ret = sec + usec / (double)1000000;
2705eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (ret < 0)
2715eef716561e904a54f3803f01cd4fd615a6809d9mbligh        ret = 0;
2725eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return ret;
2735eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
2745eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2755eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
2765eef716561e904a54f3803f01cd4fd615a6809d9mbligh * return seconds between start_tv and now in double precision
2775eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
2785eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic double time_since_now(struct timeval *start_tv)
2795eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
2805eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct timeval stop_time;
2815eef716561e904a54f3803f01cd4fd615a6809d9mbligh    gettimeofday(&stop_time, NULL);
2825eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return time_since(start_tv, &stop_time);
2835eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
2845eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2855eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
2865eef716561e904a54f3803f01cd4fd615a6809d9mbligh * Add latency info to latency struct
2875eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
2885eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic void calc_latency(struct timeval *start_tv, struct timeval *stop_tv,
2895eef716561e904a54f3803f01cd4fd615a6809d9mbligh			struct io_latency *lat)
2905eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
2915eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double delta;
2925eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int i;
2935eef716561e904a54f3803f01cd4fd615a6809d9mbligh    delta = time_since(start_tv, stop_tv);
2945eef716561e904a54f3803f01cd4fd615a6809d9mbligh    delta = delta * 1000;
2955eef716561e904a54f3803f01cd4fd615a6809d9mbligh
2965eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (delta > lat->max)
2975eef716561e904a54f3803f01cd4fd615a6809d9mbligh    	lat->max = delta;
2985eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (!lat->min || delta < lat->min)
2995eef716561e904a54f3803f01cd4fd615a6809d9mbligh    	lat->min = delta;
3005eef716561e904a54f3803f01cd4fd615a6809d9mbligh    lat->total_io++;
3015eef716561e904a54f3803f01cd4fd615a6809d9mbligh    lat->total_lat += delta;
3025eef716561e904a54f3803f01cd4fd615a6809d9mbligh    for (i = 0 ; i < DEVIATIONS ; i++) {
3035eef716561e904a54f3803f01cd4fd615a6809d9mbligh        if (delta < deviations[i]) {
3045eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    lat->deviations[i]++;
3055eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
3065eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
3075eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
3085eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
3095eef716561e904a54f3803f01cd4fd615a6809d9mbligh
3105eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic void oper_list_add(struct io_oper *oper, struct io_oper **list)
3115eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
3125eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (!*list) {
3135eef716561e904a54f3803f01cd4fd615a6809d9mbligh        *list = oper;
3145eef716561e904a54f3803f01cd4fd615a6809d9mbligh	oper->prev = oper->next = oper;
3155eef716561e904a54f3803f01cd4fd615a6809d9mbligh	return;
3165eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
3175eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper->prev = (*list)->prev;
3185eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper->next = *list;
3195eef716561e904a54f3803f01cd4fd615a6809d9mbligh    (*list)->prev->next = oper;
3205eef716561e904a54f3803f01cd4fd615a6809d9mbligh    (*list)->prev = oper;
3215eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return;
3225eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
3235eef716561e904a54f3803f01cd4fd615a6809d9mbligh
3245eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic void oper_list_del(struct io_oper *oper, struct io_oper **list)
3255eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
3265eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if ((*list)->next == (*list)->prev && *list == (*list)->next) {
3275eef716561e904a54f3803f01cd4fd615a6809d9mbligh        *list = NULL;
3285eef716561e904a54f3803f01cd4fd615a6809d9mbligh	return;
3295eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
3305eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper->prev->next = oper->next;
3315eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper->next->prev = oper->prev;
3325eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (*list == oper)
3335eef716561e904a54f3803f01cd4fd615a6809d9mbligh        *list = oper->next;
3345eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
3355eef716561e904a54f3803f01cd4fd615a6809d9mbligh
3365eef716561e904a54f3803f01cd4fd615a6809d9mbligh/* worker func to check error fields in the io unit */
3375eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic int check_finished_io(struct io_unit *io) {
3385eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int i;
3395eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (io->res != io->buf_size) {
3405eef716561e904a54f3803f01cd4fd615a6809d9mbligh
3415eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		 struct stat s;
3425eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		 fstat(io->io_oper->fd, &s);
3435eef716561e904a54f3803f01cd4fd615a6809d9mbligh
3445eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		 /*
3455eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		  * If file size is large enough for the read, then this short
3465eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		  * read is an error.
3475eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		  */
3485eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		 if ((io->io_oper->rw == READ || io->io_oper->rw == RREAD) &&
3495eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		     s.st_size > (io->iocb.u.c.offset + io->res)) {
3505eef716561e904a54f3803f01cd4fd615a6809d9mbligh
3515eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		 		 fprintf(stderr, "io err %lu (%s) op %d, off %Lu size %d\n",
3525eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		 		 		 io->res, strerror(-io->res), io->iocb.aio_lio_opcode,
3535eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		 		 		 io->iocb.u.c.offset, io->buf_size);
3545eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		 		 io->io_oper->last_err = io->res;
3555eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		 		 io->io_oper->num_err++;
3565eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		 		 return -1;
3575eef716561e904a54f3803f01cd4fd615a6809d9mbligh  		 }
3585eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
3595eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (verify && io->io_oper->rw == READ) {
3605eef716561e904a54f3803f01cd4fd615a6809d9mbligh        if (memcmp(io->buf, verify_buf, io->io_oper->reclen)) {
3615eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    fprintf(stderr, "verify error, file %s offset %Lu contents (offset:bad:good):\n",
3625eef716561e904a54f3803f01cd4fd615a6809d9mbligh	            io->io_oper->file_name, io->iocb.u.c.offset);
3635eef716561e904a54f3803f01cd4fd615a6809d9mbligh
3645eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    for (i = 0 ; i < io->io_oper->reclen ; i++) {
3655eef716561e904a54f3803f01cd4fd615a6809d9mbligh	        if (io->buf[i] != verify_buf[i]) {
3665eef716561e904a54f3803f01cd4fd615a6809d9mbligh		    fprintf(stderr, "%d:%c:%c ", i, io->buf[i], verify_buf[i]);
3675eef716561e904a54f3803f01cd4fd615a6809d9mbligh		}
3685eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    }
3695eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    fprintf(stderr, "\n");
3705eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
3715eef716561e904a54f3803f01cd4fd615a6809d9mbligh
3725eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
3735eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return 0;
3745eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
3755eef716561e904a54f3803f01cd4fd615a6809d9mbligh
3765eef716561e904a54f3803f01cd4fd615a6809d9mbligh/* worker func to check the busy bits and get an io unit ready for use */
3775eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic int grab_iou(struct io_unit *io, struct io_oper *oper) {
3785eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (io->busy == IO_PENDING)
3795eef716561e904a54f3803f01cd4fd615a6809d9mbligh        return -1;
3805eef716561e904a54f3803f01cd4fd615a6809d9mbligh
3815eef716561e904a54f3803f01cd4fd615a6809d9mbligh    io->busy = IO_PENDING;
3825eef716561e904a54f3803f01cd4fd615a6809d9mbligh    io->res = 0;
3835eef716561e904a54f3803f01cd4fd615a6809d9mbligh    io->io_oper = oper;
3845eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return 0;
3855eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
3865eef716561e904a54f3803f01cd4fd615a6809d9mbligh
3875eef716561e904a54f3803f01cd4fd615a6809d9mblighchar *stage_name(int rw) {
3885eef716561e904a54f3803f01cd4fd615a6809d9mbligh    switch(rw) {
3895eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case WRITE:
3905eef716561e904a54f3803f01cd4fd615a6809d9mbligh        return "write";
3915eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case READ:
3925eef716561e904a54f3803f01cd4fd615a6809d9mbligh        return "read";
3935eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case RWRITE:
3945eef716561e904a54f3803f01cd4fd615a6809d9mbligh        return "random write";
3955eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case RREAD:
3965eef716561e904a54f3803f01cd4fd615a6809d9mbligh        return "random read";
3975eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
3985eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return "unknown";
3995eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
4005eef716561e904a54f3803f01cd4fd615a6809d9mbligh
4015eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic inline double oper_mb_trans(struct io_oper *oper) {
4025eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return ((double)oper->started_ios * (double)oper->reclen) /
4035eef716561e904a54f3803f01cd4fd615a6809d9mbligh                (double)(1024 * 1024);
4045eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
4055eef716561e904a54f3803f01cd4fd615a6809d9mbligh
4065eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic void print_time(struct io_oper *oper) {
4075eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double runtime;
4085eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double tput;
4095eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double mb;
4105eef716561e904a54f3803f01cd4fd615a6809d9mbligh
4115eef716561e904a54f3803f01cd4fd615a6809d9mbligh    runtime = time_since_now(&oper->start_time);
4125eef716561e904a54f3803f01cd4fd615a6809d9mbligh    mb = oper_mb_trans(oper);
4135eef716561e904a54f3803f01cd4fd615a6809d9mbligh    tput = mb / runtime;
4145eef716561e904a54f3803f01cd4fd615a6809d9mbligh    fprintf(stderr, "%s on %s (%.2f MB/s) %.2f MB in %.2fs\n",
4155eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    stage_name(oper->rw), oper->file_name, tput, mb, runtime);
4165eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
4175eef716561e904a54f3803f01cd4fd615a6809d9mbligh
4185eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic void print_lat(char *str, struct io_latency *lat) {
4195eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double avg = lat->total_lat / lat->total_io;
4205eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int i;
4215eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double total_counted = 0;
4225eef716561e904a54f3803f01cd4fd615a6809d9mbligh    fprintf(stderr, "%s min %.2f avg %.2f max %.2f\n\t",
4235eef716561e904a54f3803f01cd4fd615a6809d9mbligh            str, lat->min, avg, lat->max);
4245eef716561e904a54f3803f01cd4fd615a6809d9mbligh
4255eef716561e904a54f3803f01cd4fd615a6809d9mbligh    for (i = 0 ; i < DEVIATIONS ; i++) {
4265eef716561e904a54f3803f01cd4fd615a6809d9mbligh	fprintf(stderr, " %.0f < %d", lat->deviations[i], deviations[i]);
4275eef716561e904a54f3803f01cd4fd615a6809d9mbligh	total_counted += lat->deviations[i];
4285eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
4295eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (total_counted && lat->total_io - total_counted)
4305eef716561e904a54f3803f01cd4fd615a6809d9mbligh        fprintf(stderr, " < %.0f", lat->total_io - total_counted);
4315eef716561e904a54f3803f01cd4fd615a6809d9mbligh    fprintf(stderr, "\n");
4325eef716561e904a54f3803f01cd4fd615a6809d9mbligh    memset(lat, 0, sizeof(*lat));
4335eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
4345eef716561e904a54f3803f01cd4fd615a6809d9mbligh
4355eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic void print_latency(struct thread_info *t)
4365eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
4375eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_latency *lat = &t->io_submit_latency;
4385eef716561e904a54f3803f01cd4fd615a6809d9mbligh    print_lat("latency", lat);
4395eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
4405eef716561e904a54f3803f01cd4fd615a6809d9mbligh
4415eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic void print_completion_latency(struct thread_info *t)
4425eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
4435eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_latency *lat = &t->io_completion_latency;
4445eef716561e904a54f3803f01cd4fd615a6809d9mbligh    print_lat("completion latency", lat);
4455eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
4465eef716561e904a54f3803f01cd4fd615a6809d9mbligh
4475eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
4485eef716561e904a54f3803f01cd4fd615a6809d9mbligh * updates the fields in the io operation struct that belongs to this
4495eef716561e904a54f3803f01cd4fd615a6809d9mbligh * io unit, and make the io unit reusable again
4505eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
4515eef716561e904a54f3803f01cd4fd615a6809d9mblighvoid finish_io(struct thread_info *t, struct io_unit *io, long result,
4525eef716561e904a54f3803f01cd4fd615a6809d9mbligh		struct timeval *tv_now) {
4535eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_oper *oper = io->io_oper;
4545eef716561e904a54f3803f01cd4fd615a6809d9mbligh
4555eef716561e904a54f3803f01cd4fd615a6809d9mbligh    calc_latency(&io->io_start_time, tv_now, &t->io_completion_latency);
4565eef716561e904a54f3803f01cd4fd615a6809d9mbligh    io->res = result;
4575eef716561e904a54f3803f01cd4fd615a6809d9mbligh    io->busy = IO_FREE;
4585eef716561e904a54f3803f01cd4fd615a6809d9mbligh    io->next = t->free_ious;
4595eef716561e904a54f3803f01cd4fd615a6809d9mbligh    t->free_ious = io;
4605eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper->num_pending--;
4615eef716561e904a54f3803f01cd4fd615a6809d9mbligh    t->num_global_pending--;
4625eef716561e904a54f3803f01cd4fd615a6809d9mbligh    check_finished_io(io);
4635eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (oper->num_pending == 0 &&
4645eef716561e904a54f3803f01cd4fd615a6809d9mbligh       (oper->started_ios == oper->total_ios || oper->stonewalled))
4655eef716561e904a54f3803f01cd4fd615a6809d9mbligh    {
4665eef716561e904a54f3803f01cd4fd615a6809d9mbligh        print_time(oper);
4675eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
4685eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
4695eef716561e904a54f3803f01cd4fd615a6809d9mbligh
4705eef716561e904a54f3803f01cd4fd615a6809d9mblighint read_some_events(struct thread_info *t) {
4715eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_unit *event_io;
4725eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_event *event;
4735eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int nr;
4745eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int i;
4755eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int min_nr = io_iter;
4765eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct timeval stop_time;
4775eef716561e904a54f3803f01cd4fd615a6809d9mbligh
4785eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (t->num_global_pending < io_iter)
4795eef716561e904a54f3803f01cd4fd615a6809d9mbligh        min_nr = t->num_global_pending;
4805eef716561e904a54f3803f01cd4fd615a6809d9mbligh
4815eef716561e904a54f3803f01cd4fd615a6809d9mbligh#ifdef NEW_GETEVENTS
4825eef716561e904a54f3803f01cd4fd615a6809d9mbligh    nr = io_getevents(t->io_ctx, min_nr, t->num_global_events, t->events,NULL);
4835eef716561e904a54f3803f01cd4fd615a6809d9mbligh#else
4845eef716561e904a54f3803f01cd4fd615a6809d9mbligh    nr = io_getevents(t->io_ctx, t->num_global_events, t->events, NULL);
4855eef716561e904a54f3803f01cd4fd615a6809d9mbligh#endif
4865eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (nr <= 0)
4875eef716561e904a54f3803f01cd4fd615a6809d9mbligh        return nr;
4885eef716561e904a54f3803f01cd4fd615a6809d9mbligh
4895eef716561e904a54f3803f01cd4fd615a6809d9mbligh    gettimeofday(&stop_time, NULL);
4905eef716561e904a54f3803f01cd4fd615a6809d9mbligh    for (i = 0 ; i < nr ; i++) {
4915eef716561e904a54f3803f01cd4fd615a6809d9mbligh	event = t->events + i;
4925eef716561e904a54f3803f01cd4fd615a6809d9mbligh	event_io = (struct io_unit *)((unsigned long)event->obj);
4935eef716561e904a54f3803f01cd4fd615a6809d9mbligh	finish_io(t, event_io, event->res, &stop_time);
4945eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
4955eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return nr;
4965eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
4975eef716561e904a54f3803f01cd4fd615a6809d9mbligh
4985eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
4995eef716561e904a54f3803f01cd4fd615a6809d9mbligh * finds a free io unit, waiting for pending requests if required.  returns
5005eef716561e904a54f3803f01cd4fd615a6809d9mbligh * null if none could be found
5015eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
5025eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic struct io_unit *find_iou(struct thread_info *t, struct io_oper *oper)
5035eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
5045eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_unit *event_io;
5055eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int nr;
5065eef716561e904a54f3803f01cd4fd615a6809d9mbligh
5075eef716561e904a54f3803f01cd4fd615a6809d9mblighretry:
5085eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (t->free_ious) {
5095eef716561e904a54f3803f01cd4fd615a6809d9mbligh        event_io = t->free_ious;
5105eef716561e904a54f3803f01cd4fd615a6809d9mbligh	t->free_ious = t->free_ious->next;
5115eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (grab_iou(event_io, oper)) {
5125eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    fprintf(stderr, "io unit on free list but not free\n");
5135eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    abort();
5145eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
5155eef716561e904a54f3803f01cd4fd615a6809d9mbligh	return event_io;
5165eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
5175eef716561e904a54f3803f01cd4fd615a6809d9mbligh    nr = read_some_events(t);
5185eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (nr > 0)
5195eef716561e904a54f3803f01cd4fd615a6809d9mbligh    	goto retry;
5205eef716561e904a54f3803f01cd4fd615a6809d9mbligh    else
5215eef716561e904a54f3803f01cd4fd615a6809d9mbligh    	fprintf(stderr, "no free ious after read_some_events\n");
5225eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return NULL;
5235eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
5245eef716561e904a54f3803f01cd4fd615a6809d9mbligh
5255eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
5265eef716561e904a54f3803f01cd4fd615a6809d9mbligh * wait for all pending requests for this io operation to finish
5275eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
5285eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic int io_oper_wait(struct thread_info *t, struct io_oper *oper) {
5295eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_event event;
5305eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_unit *event_io;
5315eef716561e904a54f3803f01cd4fd615a6809d9mbligh
5325eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (oper == NULL) {
5335eef716561e904a54f3803f01cd4fd615a6809d9mbligh        return 0;
5345eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
5355eef716561e904a54f3803f01cd4fd615a6809d9mbligh
5365eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (oper->num_pending == 0)
5375eef716561e904a54f3803f01cd4fd615a6809d9mbligh        goto done;
5385eef716561e904a54f3803f01cd4fd615a6809d9mbligh
5395eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* this func is not speed sensitive, no need to go wild reading
5405eef716561e904a54f3803f01cd4fd615a6809d9mbligh     * more than one event at a time
5415eef716561e904a54f3803f01cd4fd615a6809d9mbligh     */
5425eef716561e904a54f3803f01cd4fd615a6809d9mbligh#ifdef NEW_GETEVENTS
5435eef716561e904a54f3803f01cd4fd615a6809d9mbligh    while(io_getevents(t->io_ctx, 1, 1, &event, NULL) > 0) {
5445eef716561e904a54f3803f01cd4fd615a6809d9mbligh#else
5455eef716561e904a54f3803f01cd4fd615a6809d9mbligh    while(io_getevents(t->io_ctx, 1, &event, NULL) > 0) {
5465eef716561e904a54f3803f01cd4fd615a6809d9mbligh#endif
5475eef716561e904a54f3803f01cd4fd615a6809d9mbligh	struct timeval tv_now;
5485eef716561e904a54f3803f01cd4fd615a6809d9mbligh        event_io = (struct io_unit *)((unsigned long)event.obj);
5495eef716561e904a54f3803f01cd4fd615a6809d9mbligh
5505eef716561e904a54f3803f01cd4fd615a6809d9mbligh	gettimeofday(&tv_now, NULL);
5515eef716561e904a54f3803f01cd4fd615a6809d9mbligh	finish_io(t, event_io, event.res, &tv_now);
5525eef716561e904a54f3803f01cd4fd615a6809d9mbligh
5535eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (oper->num_pending == 0)
5545eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
5555eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
5565eef716561e904a54f3803f01cd4fd615a6809d9mblighdone:
5575eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (oper->num_err) {
5585eef716561e904a54f3803f01cd4fd615a6809d9mbligh        fprintf(stderr, "%u errors on oper, last %u\n",
5595eef716561e904a54f3803f01cd4fd615a6809d9mbligh	        oper->num_err, oper->last_err);
5605eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
5615eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return 0;
5625eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
5635eef716561e904a54f3803f01cd4fd615a6809d9mbligh
5645eef716561e904a54f3803f01cd4fd615a6809d9mblighoff_t random_byte_offset(struct io_oper *oper) {
5655eef716561e904a54f3803f01cd4fd615a6809d9mbligh    off_t num;
5665eef716561e904a54f3803f01cd4fd615a6809d9mbligh    off_t rand_byte = oper->start;
5675eef716561e904a54f3803f01cd4fd615a6809d9mbligh    off_t range;
5685eef716561e904a54f3803f01cd4fd615a6809d9mbligh    off_t offset = 1;
5695eef716561e904a54f3803f01cd4fd615a6809d9mbligh
5705eef716561e904a54f3803f01cd4fd615a6809d9mbligh    range = (oper->end - oper->start) / (1024 * 1024);
5715eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if ((page_size_mask+1) > (1024 * 1024))
5725eef716561e904a54f3803f01cd4fd615a6809d9mbligh        offset = (page_size_mask+1) / (1024 * 1024);
5735eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (range < offset)
5745eef716561e904a54f3803f01cd4fd615a6809d9mbligh        range = 0;
5755eef716561e904a54f3803f01cd4fd615a6809d9mbligh    else
5765eef716561e904a54f3803f01cd4fd615a6809d9mbligh        range -= offset;
5775eef716561e904a54f3803f01cd4fd615a6809d9mbligh
5785eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* find a random mb offset */
5795eef716561e904a54f3803f01cd4fd615a6809d9mbligh    num = 1 + (int)((double)range * rand() / (RAND_MAX + 1.0 ));
5805eef716561e904a54f3803f01cd4fd615a6809d9mbligh    rand_byte += num * 1024 * 1024;
5815eef716561e904a54f3803f01cd4fd615a6809d9mbligh
5825eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* find a random byte offset */
5835eef716561e904a54f3803f01cd4fd615a6809d9mbligh    num = 1 + (int)((double)(1024 * 1024) * rand() / (RAND_MAX + 1.0));
5845eef716561e904a54f3803f01cd4fd615a6809d9mbligh
5855eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* page align */
5865eef716561e904a54f3803f01cd4fd615a6809d9mbligh    num = (num + page_size_mask) & ~page_size_mask;
5875eef716561e904a54f3803f01cd4fd615a6809d9mbligh    rand_byte += num;
5885eef716561e904a54f3803f01cd4fd615a6809d9mbligh
5895eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (rand_byte + oper->reclen > oper->end) {
5905eef716561e904a54f3803f01cd4fd615a6809d9mbligh	rand_byte -= oper->reclen;
5915eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
5925eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return rand_byte;
5935eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
5945eef716561e904a54f3803f01cd4fd615a6809d9mbligh
5955eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
5965eef716561e904a54f3803f01cd4fd615a6809d9mbligh * build an aio iocb for an operation, based on oper->rw and the
5975eef716561e904a54f3803f01cd4fd615a6809d9mbligh * last offset used.  This finds the struct io_unit that will be attached
5985eef716561e904a54f3803f01cd4fd615a6809d9mbligh * to the iocb, and things are ready for submission to aio after this
5995eef716561e904a54f3803f01cd4fd615a6809d9mbligh * is called.
6005eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
6015eef716561e904a54f3803f01cd4fd615a6809d9mbligh * returns null on error
6025eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
6035eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic struct io_unit *build_iocb(struct thread_info *t, struct io_oper *oper)
6045eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
6055eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_unit *io;
6065eef716561e904a54f3803f01cd4fd615a6809d9mbligh    off_t rand_byte;
6075eef716561e904a54f3803f01cd4fd615a6809d9mbligh
6085eef716561e904a54f3803f01cd4fd615a6809d9mbligh    io = find_iou(t, oper);
6095eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (!io) {
6105eef716561e904a54f3803f01cd4fd615a6809d9mbligh        fprintf(stderr, "unable to find io unit\n");
6115eef716561e904a54f3803f01cd4fd615a6809d9mbligh	return NULL;
6125eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
6135eef716561e904a54f3803f01cd4fd615a6809d9mbligh
6145eef716561e904a54f3803f01cd4fd615a6809d9mbligh    switch(oper->rw) {
6155eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case WRITE:
6165eef716561e904a54f3803f01cd4fd615a6809d9mbligh        io_prep_pwrite(&io->iocb,oper->fd, io->buf, oper->reclen,
6175eef716561e904a54f3803f01cd4fd615a6809d9mbligh	               oper->last_offset);
6185eef716561e904a54f3803f01cd4fd615a6809d9mbligh	oper->last_offset += oper->reclen;
6195eef716561e904a54f3803f01cd4fd615a6809d9mbligh	break;
6205eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case READ:
6215eef716561e904a54f3803f01cd4fd615a6809d9mbligh        io_prep_pread(&io->iocb,oper->fd, io->buf, oper->reclen,
6225eef716561e904a54f3803f01cd4fd615a6809d9mbligh	              oper->last_offset);
6235eef716561e904a54f3803f01cd4fd615a6809d9mbligh	oper->last_offset += oper->reclen;
6245eef716561e904a54f3803f01cd4fd615a6809d9mbligh	break;
6255eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case RREAD:
6265eef716561e904a54f3803f01cd4fd615a6809d9mbligh	rand_byte = random_byte_offset(oper);
6275eef716561e904a54f3803f01cd4fd615a6809d9mbligh	oper->last_offset = rand_byte;
6285eef716561e904a54f3803f01cd4fd615a6809d9mbligh        io_prep_pread(&io->iocb,oper->fd, io->buf, oper->reclen,
6295eef716561e904a54f3803f01cd4fd615a6809d9mbligh	              rand_byte);
6305eef716561e904a54f3803f01cd4fd615a6809d9mbligh        break;
6315eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case RWRITE:
6325eef716561e904a54f3803f01cd4fd615a6809d9mbligh	rand_byte = random_byte_offset(oper);
6335eef716561e904a54f3803f01cd4fd615a6809d9mbligh	oper->last_offset = rand_byte;
6345eef716561e904a54f3803f01cd4fd615a6809d9mbligh        io_prep_pwrite(&io->iocb,oper->fd, io->buf, oper->reclen,
6355eef716561e904a54f3803f01cd4fd615a6809d9mbligh	              rand_byte);
6365eef716561e904a54f3803f01cd4fd615a6809d9mbligh
6375eef716561e904a54f3803f01cd4fd615a6809d9mbligh        break;
6385eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
6395eef716561e904a54f3803f01cd4fd615a6809d9mbligh
6405eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return io;
6415eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
6425eef716561e904a54f3803f01cd4fd615a6809d9mbligh
6435eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
6445eef716561e904a54f3803f01cd4fd615a6809d9mbligh * wait for any pending requests, and then free all ram associated with
6455eef716561e904a54f3803f01cd4fd615a6809d9mbligh * an operation.  returns the last error the operation hit (zero means none)
6465eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
6475eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic int
6485eef716561e904a54f3803f01cd4fd615a6809d9mblighfinish_oper(struct thread_info *t, struct io_oper *oper)
6495eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
6505eef716561e904a54f3803f01cd4fd615a6809d9mbligh    unsigned long last_err;
6515eef716561e904a54f3803f01cd4fd615a6809d9mbligh
6525eef716561e904a54f3803f01cd4fd615a6809d9mbligh    io_oper_wait(t, oper);
6535eef716561e904a54f3803f01cd4fd615a6809d9mbligh    last_err = oper->last_err;
6545eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (oper->num_pending > 0) {
6555eef716561e904a54f3803f01cd4fd615a6809d9mbligh        fprintf(stderr, "oper num_pending is %d\n", oper->num_pending);
6565eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
6575eef716561e904a54f3803f01cd4fd615a6809d9mbligh    close(oper->fd);
6585eef716561e904a54f3803f01cd4fd615a6809d9mbligh    free(oper);
6595eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return last_err;
6605eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
6615eef716561e904a54f3803f01cd4fd615a6809d9mbligh
6625eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
6635eef716561e904a54f3803f01cd4fd615a6809d9mbligh * allocates an io operation and fills in all the fields.  returns
6645eef716561e904a54f3803f01cd4fd615a6809d9mbligh * null on error
6655eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
6665eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic struct io_oper *
6675eef716561e904a54f3803f01cd4fd615a6809d9mblighcreate_oper(int fd, int rw, off_t start, off_t end, int reclen, int depth,
6685eef716561e904a54f3803f01cd4fd615a6809d9mbligh            int iter, char *file_name)
6695eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
6705eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_oper *oper;
6715eef716561e904a54f3803f01cd4fd615a6809d9mbligh
6725eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper = malloc (sizeof(*oper));
6735eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (!oper) {
6745eef716561e904a54f3803f01cd4fd615a6809d9mbligh	fprintf(stderr, "unable to allocate io oper\n");
6755eef716561e904a54f3803f01cd4fd615a6809d9mbligh	return NULL;
6765eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
6775eef716561e904a54f3803f01cd4fd615a6809d9mbligh    memset(oper, 0, sizeof(*oper));
6785eef716561e904a54f3803f01cd4fd615a6809d9mbligh
6795eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper->depth = depth;
6805eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper->start = start;
6815eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper->end = end;
6825eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper->last_offset = oper->start;
6835eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper->fd = fd;
6845eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper->reclen = reclen;
6855eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper->rw = rw;
6865eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper->total_ios = (oper->end - oper->start) / oper->reclen;
6875eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper->file_name = file_name;
6885eef716561e904a54f3803f01cd4fd615a6809d9mbligh
6895eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return oper;
6905eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
6915eef716561e904a54f3803f01cd4fd615a6809d9mbligh
6925eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
6935eef716561e904a54f3803f01cd4fd615a6809d9mbligh * does setup on num_ios worth of iocbs, but does not actually
6945eef716561e904a54f3803f01cd4fd615a6809d9mbligh * start any io
6955eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
6965eef716561e904a54f3803f01cd4fd615a6809d9mblighint build_oper(struct thread_info *t, struct io_oper *oper, int num_ios,
6975eef716561e904a54f3803f01cd4fd615a6809d9mbligh               struct iocb **my_iocbs)
6985eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
6995eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int i;
7005eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_unit *io;
7015eef716561e904a54f3803f01cd4fd615a6809d9mbligh
7025eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (oper->started_ios == 0)
7035eef716561e904a54f3803f01cd4fd615a6809d9mbligh	gettimeofday(&oper->start_time, NULL);
7045eef716561e904a54f3803f01cd4fd615a6809d9mbligh
7055eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (num_ios == 0)
7065eef716561e904a54f3803f01cd4fd615a6809d9mbligh        num_ios = oper->total_ios;
7075eef716561e904a54f3803f01cd4fd615a6809d9mbligh
7085eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if ((oper->started_ios + num_ios) > oper->total_ios)
7095eef716561e904a54f3803f01cd4fd615a6809d9mbligh        num_ios = oper->total_ios - oper->started_ios;
7105eef716561e904a54f3803f01cd4fd615a6809d9mbligh
7115eef716561e904a54f3803f01cd4fd615a6809d9mbligh    for( i = 0 ; i < num_ios ; i++) {
7125eef716561e904a54f3803f01cd4fd615a6809d9mbligh	io = build_iocb(t, oper);
7135eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (!io) {
7145eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    return -1;
7155eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
7165eef716561e904a54f3803f01cd4fd615a6809d9mbligh	my_iocbs[i] = &io->iocb;
7175eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
7185eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return num_ios;
7195eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
7205eef716561e904a54f3803f01cd4fd615a6809d9mbligh
7215eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
7225eef716561e904a54f3803f01cd4fd615a6809d9mbligh * runs through the iocbs in the array provided and updates
7235eef716561e904a54f3803f01cd4fd615a6809d9mbligh * counters in the associated oper struct
7245eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
7255eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic void update_iou_counters(struct iocb **my_iocbs, int nr,
7265eef716561e904a54f3803f01cd4fd615a6809d9mbligh	struct timeval *tv_now)
7275eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
7285eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_unit *io;
7295eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int i;
7305eef716561e904a54f3803f01cd4fd615a6809d9mbligh    for (i = 0 ; i < nr ; i++) {
7315eef716561e904a54f3803f01cd4fd615a6809d9mbligh	io = (struct io_unit *)(my_iocbs[i]);
7325eef716561e904a54f3803f01cd4fd615a6809d9mbligh	io->io_oper->num_pending++;
7335eef716561e904a54f3803f01cd4fd615a6809d9mbligh	io->io_oper->started_ios++;
7345eef716561e904a54f3803f01cd4fd615a6809d9mbligh	io->io_start_time = *tv_now;	/* set time of io_submit */
7355eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
7365eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
7375eef716561e904a54f3803f01cd4fd615a6809d9mbligh
7385eef716561e904a54f3803f01cd4fd615a6809d9mbligh/* starts some io for a given file, returns zero if all went well */
7395eef716561e904a54f3803f01cd4fd615a6809d9mblighint run_built(struct thread_info *t, int num_ios, struct iocb **my_iocbs)
7405eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
7415eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int ret;
7425eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct timeval start_time;
7435eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct timeval stop_time;
7445eef716561e904a54f3803f01cd4fd615a6809d9mbligh
7455eef716561e904a54f3803f01cd4fd615a6809d9mblighresubmit:
7465eef716561e904a54f3803f01cd4fd615a6809d9mbligh    gettimeofday(&start_time, NULL);
7475eef716561e904a54f3803f01cd4fd615a6809d9mbligh    ret = io_submit(t->io_ctx, num_ios, my_iocbs);
7485eef716561e904a54f3803f01cd4fd615a6809d9mbligh    gettimeofday(&stop_time, NULL);
7495eef716561e904a54f3803f01cd4fd615a6809d9mbligh    calc_latency(&start_time, &stop_time, &t->io_submit_latency);
7505eef716561e904a54f3803f01cd4fd615a6809d9mbligh
7515eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (ret != num_ios) {
7525eef716561e904a54f3803f01cd4fd615a6809d9mbligh	/* some ios got through */
7535eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (ret > 0) {
7545eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    update_iou_counters(my_iocbs, ret, &stop_time);
7555eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    my_iocbs += ret;
7565eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    t->num_global_pending += ret;
7575eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    num_ios -= ret;
7585eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
7595eef716561e904a54f3803f01cd4fd615a6809d9mbligh	/*
7605eef716561e904a54f3803f01cd4fd615a6809d9mbligh	 * we've used all the requests allocated in aio_init, wait and
7615eef716561e904a54f3803f01cd4fd615a6809d9mbligh	 * retry
7625eef716561e904a54f3803f01cd4fd615a6809d9mbligh	 */
7635eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (ret > 0 || ret == -EAGAIN) {
7645eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    int old_ret = ret;
7655eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    if ((ret = read_some_events(t) > 0)) {
7665eef716561e904a54f3803f01cd4fd615a6809d9mbligh		goto resubmit;
7675eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    } else {
7685eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    	fprintf(stderr, "ret was %d and now is %d\n", ret, old_ret);
7695eef716561e904a54f3803f01cd4fd615a6809d9mbligh		abort();
7705eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    }
7715eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
7725eef716561e904a54f3803f01cd4fd615a6809d9mbligh
7735eef716561e904a54f3803f01cd4fd615a6809d9mbligh	fprintf(stderr, "ret %d (%s) on io_submit\n", ret, strerror(-ret));
7745eef716561e904a54f3803f01cd4fd615a6809d9mbligh	return -1;
7755eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
7765eef716561e904a54f3803f01cd4fd615a6809d9mbligh    update_iou_counters(my_iocbs, ret, &stop_time);
7775eef716561e904a54f3803f01cd4fd615a6809d9mbligh    t->num_global_pending += ret;
7785eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return 0;
7795eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
7805eef716561e904a54f3803f01cd4fd615a6809d9mbligh
7815eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
7825eef716561e904a54f3803f01cd4fd615a6809d9mbligh * changes oper->rw to the next in a command sequence, or returns zero
7835eef716561e904a54f3803f01cd4fd615a6809d9mbligh * to say this operation is really, completely done for
7845eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
7855eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic int restart_oper(struct io_oper *oper) {
7865eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int new_rw  = 0;
7875eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (oper->last_err)
7885eef716561e904a54f3803f01cd4fd615a6809d9mbligh        return 0;
7895eef716561e904a54f3803f01cd4fd615a6809d9mbligh
7905eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* this switch falls through */
7915eef716561e904a54f3803f01cd4fd615a6809d9mbligh    switch(oper->rw) {
7925eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case WRITE:
7935eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (stages & (1 << READ))
7945eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    new_rw = READ;
7955eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case READ:
7965eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (!new_rw && stages & (1 << RWRITE))
7975eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    new_rw = RWRITE;
7985eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case RWRITE:
7995eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (!new_rw && stages & (1 << RREAD))
8005eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    new_rw = RREAD;
8015eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
8025eef716561e904a54f3803f01cd4fd615a6809d9mbligh
8035eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (new_rw) {
8045eef716561e904a54f3803f01cd4fd615a6809d9mbligh	oper->started_ios = 0;
8055eef716561e904a54f3803f01cd4fd615a6809d9mbligh	oper->last_offset = oper->start;
8065eef716561e904a54f3803f01cd4fd615a6809d9mbligh	oper->stonewalled = 0;
8075eef716561e904a54f3803f01cd4fd615a6809d9mbligh
8085eef716561e904a54f3803f01cd4fd615a6809d9mbligh	/*
8095eef716561e904a54f3803f01cd4fd615a6809d9mbligh	 * we're restarting an operation with pending requests, so the
8105eef716561e904a54f3803f01cd4fd615a6809d9mbligh	 * timing info won't be printed by finish_io.  Printing it here
8115eef716561e904a54f3803f01cd4fd615a6809d9mbligh	 */
8125eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (oper->num_pending)
8135eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    print_time(oper);
8145eef716561e904a54f3803f01cd4fd615a6809d9mbligh
8155eef716561e904a54f3803f01cd4fd615a6809d9mbligh	oper->rw = new_rw;
8165eef716561e904a54f3803f01cd4fd615a6809d9mbligh	return 1;
8175eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
8185eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return 0;
8195eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
8205eef716561e904a54f3803f01cd4fd615a6809d9mbligh
8215eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic int oper_runnable(struct io_oper *oper) {
8225eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct stat buf;
8235eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int ret;
8245eef716561e904a54f3803f01cd4fd615a6809d9mbligh
8255eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* first context is always runnable, if started_ios > 0, no need to
8265eef716561e904a54f3803f01cd4fd615a6809d9mbligh     * redo the calculations
8275eef716561e904a54f3803f01cd4fd615a6809d9mbligh     */
8285eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (oper->started_ios || oper->start == 0)
8295eef716561e904a54f3803f01cd4fd615a6809d9mbligh        return 1;
8305eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /*
8315eef716561e904a54f3803f01cd4fd615a6809d9mbligh     * only the sequential phases force delays in starting */
8325eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (oper->rw >= RWRITE)
8335eef716561e904a54f3803f01cd4fd615a6809d9mbligh        return 1;
8345eef716561e904a54f3803f01cd4fd615a6809d9mbligh    ret = fstat(oper->fd, &buf);
8355eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (ret < 0) {
8365eef716561e904a54f3803f01cd4fd615a6809d9mbligh        perror("fstat");
8375eef716561e904a54f3803f01cd4fd615a6809d9mbligh	exit(1);
8385eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
8395eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (S_ISREG(buf.st_mode) && buf.st_size < oper->start)
8405eef716561e904a54f3803f01cd4fd615a6809d9mbligh        return 0;
8415eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return 1;
8425eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
8435eef716561e904a54f3803f01cd4fd615a6809d9mbligh
8445eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
8455eef716561e904a54f3803f01cd4fd615a6809d9mbligh * runs through all the io operations on the active list, and starts
8465eef716561e904a54f3803f01cd4fd615a6809d9mbligh * a chunk of io on each.  If any io operations are completely finished,
8475eef716561e904a54f3803f01cd4fd615a6809d9mbligh * it either switches them to the next stage or puts them on the
8485eef716561e904a54f3803f01cd4fd615a6809d9mbligh * finished list.
8495eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
8505eef716561e904a54f3803f01cd4fd615a6809d9mbligh * this function stops after max_io_submit iocbs are sent down the
8515eef716561e904a54f3803f01cd4fd615a6809d9mbligh * pipe, even if it has not yet touched all the operations on the
8525eef716561e904a54f3803f01cd4fd615a6809d9mbligh * active list.  Any operations that have finished are moved onto
8535eef716561e904a54f3803f01cd4fd615a6809d9mbligh * the finished_opers list.
8545eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
8555eef716561e904a54f3803f01cd4fd615a6809d9mblighstatic int run_active_list(struct thread_info *t,
8565eef716561e904a54f3803f01cd4fd615a6809d9mbligh			 int io_iter,
8575eef716561e904a54f3803f01cd4fd615a6809d9mbligh			 int max_io_submit)
8585eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
8595eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_oper *oper;
8605eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_oper *built_opers = NULL;
8615eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct iocb **my_iocbs = t->iocbs;
8625eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int ret = 0;
8635eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int num_built = 0;
8645eef716561e904a54f3803f01cd4fd615a6809d9mbligh
8655eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper = t->active_opers;
8665eef716561e904a54f3803f01cd4fd615a6809d9mbligh    while(oper) {
8675eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (!oper_runnable(oper)) {
8685eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper = oper->next;
8695eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    if (oper == t->active_opers)
8705eef716561e904a54f3803f01cd4fd615a6809d9mbligh	        break;
8715eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    continue;
8725eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
8735eef716561e904a54f3803f01cd4fd615a6809d9mbligh	ret = build_oper(t, oper, io_iter, my_iocbs);
8745eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (ret >= 0) {
8755eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    my_iocbs += ret;
8765eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    num_built += ret;
8775eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper_list_del(oper, &t->active_opers);
8785eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper_list_add(oper, &built_opers);
8795eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper = t->active_opers;
8805eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    if (num_built + io_iter > max_io_submit)
8815eef716561e904a54f3803f01cd4fd615a6809d9mbligh	        break;
8825eef716561e904a54f3803f01cd4fd615a6809d9mbligh	} else
8835eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
8845eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
8855eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (num_built) {
8865eef716561e904a54f3803f01cd4fd615a6809d9mbligh	ret = run_built(t, num_built, t->iocbs);
8875eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (ret < 0) {
8885eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    fprintf(stderr, "error %d on run_built\n", ret);
8895eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    exit(1);
8905eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
8915eef716561e904a54f3803f01cd4fd615a6809d9mbligh	while(built_opers) {
8925eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper = built_opers;
8935eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper_list_del(oper, &built_opers);
8945eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper_list_add(oper, &t->active_opers);
8955eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    if (oper->started_ios == oper->total_ios) {
8965eef716561e904a54f3803f01cd4fd615a6809d9mbligh		oper_list_del(oper, &t->active_opers);
8975eef716561e904a54f3803f01cd4fd615a6809d9mbligh		oper_list_add(oper, &t->finished_opers);
8985eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    }
8995eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
9005eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
9015eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return 0;
9025eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
9035eef716561e904a54f3803f01cd4fd615a6809d9mbligh
9045eef716561e904a54f3803f01cd4fd615a6809d9mblighvoid drop_shm() {
9055eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int ret;
9065eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct shmid_ds ds;
9075eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (use_shm != USE_SHM)
9085eef716561e904a54f3803f01cd4fd615a6809d9mbligh        return;
9095eef716561e904a54f3803f01cd4fd615a6809d9mbligh
9105eef716561e904a54f3803f01cd4fd615a6809d9mbligh    ret = shmctl(shm_id, IPC_RMID, &ds);
9115eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (ret) {
9125eef716561e904a54f3803f01cd4fd615a6809d9mbligh        perror("shmctl IPC_RMID");
9135eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
9145eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
9155eef716561e904a54f3803f01cd4fd615a6809d9mbligh
9165eef716561e904a54f3803f01cd4fd615a6809d9mblighvoid aio_setup(io_context_t *io_ctx, int n)
9175eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
9185eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int res = io_queue_init(n, io_ctx);
9195eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (res != 0) {
9205eef716561e904a54f3803f01cd4fd615a6809d9mbligh	fprintf(stderr, "io_queue_setup(%d) returned %d (%s)\n",
9215eef716561e904a54f3803f01cd4fd615a6809d9mbligh		n, res, strerror(-res));
9225eef716561e904a54f3803f01cd4fd615a6809d9mbligh	exit(3);
9235eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
9245eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
9255eef716561e904a54f3803f01cd4fd615a6809d9mbligh
9265eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
9275eef716561e904a54f3803f01cd4fd615a6809d9mbligh * allocate io operation and event arrays for a given thread
9285eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
9295eef716561e904a54f3803f01cd4fd615a6809d9mblighint setup_ious(struct thread_info *t,
9305eef716561e904a54f3803f01cd4fd615a6809d9mbligh              int num_files, int depth,
9315eef716561e904a54f3803f01cd4fd615a6809d9mbligh	      int reclen, int max_io_submit) {
9325eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int i;
9335eef716561e904a54f3803f01cd4fd615a6809d9mbligh    size_t bytes = num_files * depth * sizeof(*t->ios);
9345eef716561e904a54f3803f01cd4fd615a6809d9mbligh
9355eef716561e904a54f3803f01cd4fd615a6809d9mbligh    t->ios = malloc(bytes);
9365eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (!t->ios) {
9375eef716561e904a54f3803f01cd4fd615a6809d9mbligh	fprintf(stderr, "unable to allocate io units\n");
9385eef716561e904a54f3803f01cd4fd615a6809d9mbligh	return -1;
9395eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
9405eef716561e904a54f3803f01cd4fd615a6809d9mbligh    memset(t->ios, 0, bytes);
9415eef716561e904a54f3803f01cd4fd615a6809d9mbligh
9425eef716561e904a54f3803f01cd4fd615a6809d9mbligh    for (i = 0 ; i < depth * num_files; i++) {
9435eef716561e904a54f3803f01cd4fd615a6809d9mbligh	t->ios[i].buf = aligned_buffer;
9445eef716561e904a54f3803f01cd4fd615a6809d9mbligh	aligned_buffer += padded_reclen;
9455eef716561e904a54f3803f01cd4fd615a6809d9mbligh	t->ios[i].buf_size = reclen;
9465eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (verify)
9475eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    memset(t->ios[i].buf, 'b', reclen);
9485eef716561e904a54f3803f01cd4fd615a6809d9mbligh	else
9495eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    memset(t->ios[i].buf, 0, reclen);
9505eef716561e904a54f3803f01cd4fd615a6809d9mbligh	t->ios[i].next = t->free_ious;
9515eef716561e904a54f3803f01cd4fd615a6809d9mbligh	t->free_ious = t->ios + i;
9525eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
9535eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (verify) {
9545eef716561e904a54f3803f01cd4fd615a6809d9mbligh        verify_buf = aligned_buffer;
9555eef716561e904a54f3803f01cd4fd615a6809d9mbligh        memset(verify_buf, 'b', reclen);
9565eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
9575eef716561e904a54f3803f01cd4fd615a6809d9mbligh
9585eef716561e904a54f3803f01cd4fd615a6809d9mbligh    t->iocbs = malloc(sizeof(struct iocb *) * max_io_submit);
9595eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (!t->iocbs) {
9605eef716561e904a54f3803f01cd4fd615a6809d9mbligh        fprintf(stderr, "unable to allocate iocbs\n");
9615eef716561e904a54f3803f01cd4fd615a6809d9mbligh	goto free_buffers;
9625eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
9635eef716561e904a54f3803f01cd4fd615a6809d9mbligh
9645eef716561e904a54f3803f01cd4fd615a6809d9mbligh    memset(t->iocbs, 0, max_io_submit * sizeof(struct iocb *));
9655eef716561e904a54f3803f01cd4fd615a6809d9mbligh
9665eef716561e904a54f3803f01cd4fd615a6809d9mbligh    t->events = malloc(sizeof(struct io_event) * depth * num_files);
9675eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (!t->events) {
9685eef716561e904a54f3803f01cd4fd615a6809d9mbligh        fprintf(stderr, "unable to allocate ram for events\n");
9695eef716561e904a54f3803f01cd4fd615a6809d9mbligh	goto free_buffers;
9705eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
9715eef716561e904a54f3803f01cd4fd615a6809d9mbligh    memset(t->events, 0, num_files * sizeof(struct io_event)*depth);
9725eef716561e904a54f3803f01cd4fd615a6809d9mbligh
9735eef716561e904a54f3803f01cd4fd615a6809d9mbligh    t->num_global_ios = num_files * depth;
9745eef716561e904a54f3803f01cd4fd615a6809d9mbligh    t->num_global_events = t->num_global_ios;
9755eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return 0;
9765eef716561e904a54f3803f01cd4fd615a6809d9mbligh
9775eef716561e904a54f3803f01cd4fd615a6809d9mblighfree_buffers:
9785eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (t->ios)
9795eef716561e904a54f3803f01cd4fd615a6809d9mbligh        free(t->ios);
9805eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (t->iocbs)
9815eef716561e904a54f3803f01cd4fd615a6809d9mbligh        free(t->iocbs);
9825eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (t->events)
9835eef716561e904a54f3803f01cd4fd615a6809d9mbligh        free(t->events);
9845eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return -1;
9855eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
9865eef716561e904a54f3803f01cd4fd615a6809d9mbligh
9875eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
9885eef716561e904a54f3803f01cd4fd615a6809d9mbligh * The buffers used for file data are allocated as a single big
9895eef716561e904a54f3803f01cd4fd615a6809d9mbligh * malloc, and then each thread and operation takes a piece and uses
9905eef716561e904a54f3803f01cd4fd615a6809d9mbligh * that for file data.  This lets us do a large shm or bigpages alloc
9915eef716561e904a54f3803f01cd4fd615a6809d9mbligh * and without trying to find a special place in each thread to map the
9925eef716561e904a54f3803f01cd4fd615a6809d9mbligh * buffers to
9935eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
9945eef716561e904a54f3803f01cd4fd615a6809d9mblighint setup_shared_mem(int num_threads, int num_files, int depth,
9955eef716561e904a54f3803f01cd4fd615a6809d9mbligh                     int reclen, int max_io_submit)
9965eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
9975eef716561e904a54f3803f01cd4fd615a6809d9mbligh    char *p = NULL;
9985eef716561e904a54f3803f01cd4fd615a6809d9mbligh    size_t total_ram;
9995eef716561e904a54f3803f01cd4fd615a6809d9mbligh
10005eef716561e904a54f3803f01cd4fd615a6809d9mbligh    padded_reclen = (reclen + page_size_mask) / (page_size_mask+1);
10015eef716561e904a54f3803f01cd4fd615a6809d9mbligh    padded_reclen = padded_reclen * (page_size_mask+1);
10025eef716561e904a54f3803f01cd4fd615a6809d9mbligh    total_ram = num_files * depth * padded_reclen + num_threads;
10035eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (verify)
10045eef716561e904a54f3803f01cd4fd615a6809d9mbligh    	total_ram += padded_reclen;
10055eef716561e904a54f3803f01cd4fd615a6809d9mbligh
10065eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (use_shm == USE_MALLOC) {
10075eef716561e904a54f3803f01cd4fd615a6809d9mbligh	p = malloc(total_ram + page_size_mask);
10085eef716561e904a54f3803f01cd4fd615a6809d9mbligh    } else if (use_shm == USE_SHM) {
10095eef716561e904a54f3803f01cd4fd615a6809d9mbligh        shm_id = shmget(IPC_PRIVATE, total_ram, IPC_CREAT | 0700);
10105eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (shm_id < 0) {
10115eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    perror("shmget");
10125eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    drop_shm();
10135eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    goto free_buffers;
10145eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
10155eef716561e904a54f3803f01cd4fd615a6809d9mbligh	p = shmat(shm_id, (char *)0x50000000, 0);
10165eef716561e904a54f3803f01cd4fd615a6809d9mbligh        if ((long)p == -1) {
10175eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    perror("shmat");
10185eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    goto free_buffers;
10195eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
10205eef716561e904a54f3803f01cd4fd615a6809d9mbligh	/* won't really be dropped until we shmdt */
10215eef716561e904a54f3803f01cd4fd615a6809d9mbligh	drop_shm();
10225eef716561e904a54f3803f01cd4fd615a6809d9mbligh    } else if (use_shm == USE_SHMFS) {
10235eef716561e904a54f3803f01cd4fd615a6809d9mbligh        char mmap_name[16]; /* /dev/shm/ + null + XXXXXX */
10245eef716561e904a54f3803f01cd4fd615a6809d9mbligh	int fd;
10255eef716561e904a54f3803f01cd4fd615a6809d9mbligh
10265eef716561e904a54f3803f01cd4fd615a6809d9mbligh	strcpy(mmap_name, "/dev/shm/XXXXXX");
10275eef716561e904a54f3803f01cd4fd615a6809d9mbligh	fd = mkstemp(mmap_name);
10285eef716561e904a54f3803f01cd4fd615a6809d9mbligh        if (fd < 0) {
10295eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    perror("mkstemp");
10305eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    goto free_buffers;
10315eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
10325eef716561e904a54f3803f01cd4fd615a6809d9mbligh	unlink(mmap_name);
10335eef716561e904a54f3803f01cd4fd615a6809d9mbligh	ftruncate(fd, total_ram);
10345eef716561e904a54f3803f01cd4fd615a6809d9mbligh	shm_id = fd;
10355eef716561e904a54f3803f01cd4fd615a6809d9mbligh	p = mmap((char *)0x50000000, total_ram,
10365eef716561e904a54f3803f01cd4fd615a6809d9mbligh	         PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
10375eef716561e904a54f3803f01cd4fd615a6809d9mbligh
10385eef716561e904a54f3803f01cd4fd615a6809d9mbligh        if (p == MAP_FAILED) {
10395eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    perror("mmap");
10405eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    goto free_buffers;
10415eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
10425eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
10435eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (!p) {
10445eef716561e904a54f3803f01cd4fd615a6809d9mbligh        fprintf(stderr, "unable to allocate buffers\n");
10455eef716561e904a54f3803f01cd4fd615a6809d9mbligh	goto free_buffers;
10465eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
10475eef716561e904a54f3803f01cd4fd615a6809d9mbligh    unaligned_buffer = p;
10485eef716561e904a54f3803f01cd4fd615a6809d9mbligh    p = (char*)((intptr_t) (p + page_size_mask) & ~page_size_mask);
10495eef716561e904a54f3803f01cd4fd615a6809d9mbligh    aligned_buffer = p;
10505eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return 0;
10515eef716561e904a54f3803f01cd4fd615a6809d9mbligh
10525eef716561e904a54f3803f01cd4fd615a6809d9mblighfree_buffers:
10535eef716561e904a54f3803f01cd4fd615a6809d9mbligh    drop_shm();
10545eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (unaligned_buffer)
10555eef716561e904a54f3803f01cd4fd615a6809d9mbligh        free(unaligned_buffer);
10565eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return -1;
10575eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
10585eef716561e904a54f3803f01cd4fd615a6809d9mbligh
10595eef716561e904a54f3803f01cd4fd615a6809d9mbligh/*
10605eef716561e904a54f3803f01cd4fd615a6809d9mbligh * runs through all the thread_info structs and calculates a combined
10615eef716561e904a54f3803f01cd4fd615a6809d9mbligh * throughput
10625eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
10635eef716561e904a54f3803f01cd4fd615a6809d9mblighvoid global_thread_throughput(struct thread_info *t, char *this_stage) {
10645eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int i;
10655eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double runtime = time_since_now(&global_stage_start_time);
10665eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double total_mb = 0;
10675eef716561e904a54f3803f01cd4fd615a6809d9mbligh    double min_trans = 0;
10685eef716561e904a54f3803f01cd4fd615a6809d9mbligh
10695eef716561e904a54f3803f01cd4fd615a6809d9mbligh    for (i = 0 ; i < num_threads ; i++) {
10705eef716561e904a54f3803f01cd4fd615a6809d9mbligh        total_mb += global_thread_info[i].stage_mb_trans;
10715eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (!min_trans || t->stage_mb_trans < min_trans)
10725eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    min_trans = t->stage_mb_trans;
10735eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
10745eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (total_mb) {
10755eef716561e904a54f3803f01cd4fd615a6809d9mbligh	fprintf(stderr, "%s throughput (%.2f MB/s) ", this_stage,
10765eef716561e904a54f3803f01cd4fd615a6809d9mbligh	        total_mb / runtime);
10775eef716561e904a54f3803f01cd4fd615a6809d9mbligh	fprintf(stderr, "%.2f MB in %.2fs", total_mb, runtime);
10785eef716561e904a54f3803f01cd4fd615a6809d9mbligh        if (stonewall)
10795eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    fprintf(stderr, " min transfer %.2fMB", min_trans);
10805eef716561e904a54f3803f01cd4fd615a6809d9mbligh        fprintf(stderr, "\n");
10815eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
10825eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
10835eef716561e904a54f3803f01cd4fd615a6809d9mbligh
10845eef716561e904a54f3803f01cd4fd615a6809d9mbligh
10855eef716561e904a54f3803f01cd4fd615a6809d9mbligh/* this is the meat of the state machine.  There is a list of
10865eef716561e904a54f3803f01cd4fd615a6809d9mbligh * active operations structs, and as each one finishes the required
10875eef716561e904a54f3803f01cd4fd615a6809d9mbligh * io it is moved to a list of finished operations.  Once they have
10885eef716561e904a54f3803f01cd4fd615a6809d9mbligh * all finished whatever stage they were in, they are given the chance
10895eef716561e904a54f3803f01cd4fd615a6809d9mbligh * to restart and pick a different stage (read/write/random read etc)
10905eef716561e904a54f3803f01cd4fd615a6809d9mbligh *
10915eef716561e904a54f3803f01cd4fd615a6809d9mbligh * various timings are printed in between the stages, along with
10925eef716561e904a54f3803f01cd4fd615a6809d9mbligh * thread synchronization if there are more than one threads.
10935eef716561e904a54f3803f01cd4fd615a6809d9mbligh */
10945eef716561e904a54f3803f01cd4fd615a6809d9mblighint worker(struct thread_info *t)
10955eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
10965eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_oper *oper;
10975eef716561e904a54f3803f01cd4fd615a6809d9mbligh    char *this_stage = NULL;
10985eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct timeval stage_time;
10995eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int status = 0;
11005eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int iteration = 0;
11015eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int cnt;
11025eef716561e904a54f3803f01cd4fd615a6809d9mbligh
11035eef716561e904a54f3803f01cd4fd615a6809d9mbligh    aio_setup(&t->io_ctx, 512);
11045eef716561e904a54f3803f01cd4fd615a6809d9mbligh
11055eef716561e904a54f3803f01cd4fd615a6809d9mblighrestart:
11065eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (num_threads > 1) {
11075eef716561e904a54f3803f01cd4fd615a6809d9mbligh        pthread_mutex_lock(&stage_mutex);
11085eef716561e904a54f3803f01cd4fd615a6809d9mbligh	threads_starting++;
11095eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (threads_starting == num_threads) {
11105eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    threads_ending = 0;
11115eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    gettimeofday(&global_stage_start_time, NULL);
11125eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    pthread_cond_broadcast(&stage_cond);
11135eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
11145eef716561e904a54f3803f01cd4fd615a6809d9mbligh	while (threads_starting != num_threads)
11155eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    pthread_cond_wait(&stage_cond, &stage_mutex);
11165eef716561e904a54f3803f01cd4fd615a6809d9mbligh        pthread_mutex_unlock(&stage_mutex);
11175eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
11185eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (t->active_opers) {
11195eef716561e904a54f3803f01cd4fd615a6809d9mbligh        this_stage = stage_name(t->active_opers->rw);
11205eef716561e904a54f3803f01cd4fd615a6809d9mbligh	gettimeofday(&stage_time, NULL);
11215eef716561e904a54f3803f01cd4fd615a6809d9mbligh	t->stage_mb_trans = 0;
11225eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
11235eef716561e904a54f3803f01cd4fd615a6809d9mbligh
11245eef716561e904a54f3803f01cd4fd615a6809d9mbligh    cnt = 0;
11255eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* first we send everything through aio */
11265eef716561e904a54f3803f01cd4fd615a6809d9mbligh    while(t->active_opers && (cnt < iterations || iterations == RUN_FOREVER)) {
11275eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (stonewall && threads_ending) {
11285eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper = t->active_opers;
11295eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper->stonewalled = 1;
11305eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper_list_del(oper, &t->active_opers);
11315eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper_list_add(oper, &t->finished_opers);
11325eef716561e904a54f3803f01cd4fd615a6809d9mbligh	} else {
11335eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    run_active_list(t, io_iter,  max_io_submit);
11345eef716561e904a54f3803f01cd4fd615a6809d9mbligh        }
11355eef716561e904a54f3803f01cd4fd615a6809d9mbligh	cnt++;
11365eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
11375eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (latency_stats)
11385eef716561e904a54f3803f01cd4fd615a6809d9mbligh        print_latency(t);
11395eef716561e904a54f3803f01cd4fd615a6809d9mbligh
11405eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (completion_latency_stats)
11415eef716561e904a54f3803f01cd4fd615a6809d9mbligh	print_completion_latency(t);
11425eef716561e904a54f3803f01cd4fd615a6809d9mbligh
11435eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* then we wait for all the operations to finish */
11445eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper = t->finished_opers;
11455eef716561e904a54f3803f01cd4fd615a6809d9mbligh    do {
11465eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (!oper)
11475eef716561e904a54f3803f01cd4fd615a6809d9mbligh		break;
11485eef716561e904a54f3803f01cd4fd615a6809d9mbligh	io_oper_wait(t, oper);
11495eef716561e904a54f3803f01cd4fd615a6809d9mbligh	oper = oper->next;
11505eef716561e904a54f3803f01cd4fd615a6809d9mbligh    } while(oper != t->finished_opers);
11515eef716561e904a54f3803f01cd4fd615a6809d9mbligh
11525eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* then we do an fsync to get the timing for any future operations
11535eef716561e904a54f3803f01cd4fd615a6809d9mbligh     * right, and check to see if any of these need to get restarted
11545eef716561e904a54f3803f01cd4fd615a6809d9mbligh     */
11555eef716561e904a54f3803f01cd4fd615a6809d9mbligh    oper = t->finished_opers;
11565eef716561e904a54f3803f01cd4fd615a6809d9mbligh    while(oper) {
11575eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (fsync_stages)
11585eef716561e904a54f3803f01cd4fd615a6809d9mbligh            fsync(oper->fd);
11595eef716561e904a54f3803f01cd4fd615a6809d9mbligh	t->stage_mb_trans += oper_mb_trans(oper);
11605eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (restart_oper(oper)) {
11615eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper_list_del(oper, &t->finished_opers);
11625eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper_list_add(oper, &t->active_opers);
11635eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper = t->finished_opers;
11645eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    continue;
11655eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
11665eef716561e904a54f3803f01cd4fd615a6809d9mbligh	oper = oper->next;
11675eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (oper == t->finished_opers)
11685eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
11695eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
11705eef716561e904a54f3803f01cd4fd615a6809d9mbligh
11715eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (t->stage_mb_trans && t->num_files > 0) {
11725eef716561e904a54f3803f01cd4fd615a6809d9mbligh        double seconds = time_since_now(&stage_time);
11735eef716561e904a54f3803f01cd4fd615a6809d9mbligh	fprintf(stderr, "thread %d %s totals (%.2f MB/s) %.2f MB in %.2fs\n",
11745eef716561e904a54f3803f01cd4fd615a6809d9mbligh	        t - global_thread_info, this_stage, t->stage_mb_trans/seconds,
11755eef716561e904a54f3803f01cd4fd615a6809d9mbligh		t->stage_mb_trans, seconds);
11765eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
11775eef716561e904a54f3803f01cd4fd615a6809d9mbligh
11785eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (num_threads > 1) {
11795eef716561e904a54f3803f01cd4fd615a6809d9mbligh	pthread_mutex_lock(&stage_mutex);
11805eef716561e904a54f3803f01cd4fd615a6809d9mbligh	threads_ending++;
11815eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (threads_ending == num_threads) {
11825eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    threads_starting = 0;
11835eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    pthread_cond_broadcast(&stage_cond);
11845eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    global_thread_throughput(t, this_stage);
11855eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
11865eef716561e904a54f3803f01cd4fd615a6809d9mbligh	while(threads_ending != num_threads)
11875eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    pthread_cond_wait(&stage_cond, &stage_mutex);
11885eef716561e904a54f3803f01cd4fd615a6809d9mbligh	pthread_mutex_unlock(&stage_mutex);
11895eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
11905eef716561e904a54f3803f01cd4fd615a6809d9mbligh
11915eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* someone got restarted, go back to the beginning */
11925eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (t->active_opers && (cnt < iterations || iterations == RUN_FOREVER)) {
11935eef716561e904a54f3803f01cd4fd615a6809d9mbligh	iteration++;
11945eef716561e904a54f3803f01cd4fd615a6809d9mbligh        goto restart;
11955eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
11965eef716561e904a54f3803f01cd4fd615a6809d9mbligh
11975eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* finally, free all the ram */
11985eef716561e904a54f3803f01cd4fd615a6809d9mbligh    while(t->finished_opers) {
11995eef716561e904a54f3803f01cd4fd615a6809d9mbligh	oper = t->finished_opers;
12005eef716561e904a54f3803f01cd4fd615a6809d9mbligh	oper_list_del(oper, &t->finished_opers);
12015eef716561e904a54f3803f01cd4fd615a6809d9mbligh	status = finish_oper(t, oper);
12025eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
12035eef716561e904a54f3803f01cd4fd615a6809d9mbligh
12045eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (t->num_global_pending) {
12055eef716561e904a54f3803f01cd4fd615a6809d9mbligh        fprintf(stderr, "global num pending is %d\n", t->num_global_pending);
12065eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
12075eef716561e904a54f3803f01cd4fd615a6809d9mbligh    io_queue_release(t->io_ctx);
12085eef716561e904a54f3803f01cd4fd615a6809d9mbligh
12095eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return status;
12105eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
12115eef716561e904a54f3803f01cd4fd615a6809d9mbligh
12125eef716561e904a54f3803f01cd4fd615a6809d9mblightypedef void * (*start_routine)(void *);
12135eef716561e904a54f3803f01cd4fd615a6809d9mblighint run_workers(struct thread_info *t, int num_threads)
12145eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
12155eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int ret;
12165eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int thread_ret;
12175eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int i;
12185eef716561e904a54f3803f01cd4fd615a6809d9mbligh
12195eef716561e904a54f3803f01cd4fd615a6809d9mbligh    for(i = 0 ; i < num_threads ; i++) {
12205eef716561e904a54f3803f01cd4fd615a6809d9mbligh        ret = pthread_create(&t[i].tid, NULL, (start_routine)worker, t + i);
12215eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (ret) {
12225eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    perror("pthread_create");
12235eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    exit(1);
12245eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
12255eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
12265eef716561e904a54f3803f01cd4fd615a6809d9mbligh    for(i = 0 ; i < num_threads ; i++) {
12275eef716561e904a54f3803f01cd4fd615a6809d9mbligh        ret = pthread_join(t[i].tid, (void *)&thread_ret);
12285eef716561e904a54f3803f01cd4fd615a6809d9mbligh        if (ret) {
12295eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    perror("pthread_join");
12305eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    exit(1);
12315eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
12325eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
12335eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return 0;
12345eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
12355eef716561e904a54f3803f01cd4fd615a6809d9mbligh
12365eef716561e904a54f3803f01cd4fd615a6809d9mblighoff_t parse_size(char *size_arg, off_t mult) {
12375eef716561e904a54f3803f01cd4fd615a6809d9mbligh    char c;
12385eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int num;
12395eef716561e904a54f3803f01cd4fd615a6809d9mbligh    off_t ret;
12405eef716561e904a54f3803f01cd4fd615a6809d9mbligh    c = size_arg[strlen(size_arg) - 1];
12415eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (c > '9') {
12425eef716561e904a54f3803f01cd4fd615a6809d9mbligh        size_arg[strlen(size_arg) - 1] = '\0';
12435eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
12445eef716561e904a54f3803f01cd4fd615a6809d9mbligh    num = atoi(size_arg);
12455eef716561e904a54f3803f01cd4fd615a6809d9mbligh    switch(c) {
12465eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case 'g':
12475eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case 'G':
12485eef716561e904a54f3803f01cd4fd615a6809d9mbligh        mult = 1024 * 1024 * 1024;
12495eef716561e904a54f3803f01cd4fd615a6809d9mbligh	break;
12505eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case 'm':
12515eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case 'M':
12525eef716561e904a54f3803f01cd4fd615a6809d9mbligh        mult = 1024 * 1024;
12535eef716561e904a54f3803f01cd4fd615a6809d9mbligh	break;
12545eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case 'k':
12555eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case 'K':
12565eef716561e904a54f3803f01cd4fd615a6809d9mbligh        mult = 1024;
12575eef716561e904a54f3803f01cd4fd615a6809d9mbligh	break;
12585eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case 'b':
12595eef716561e904a54f3803f01cd4fd615a6809d9mbligh    case 'B':
12605eef716561e904a54f3803f01cd4fd615a6809d9mbligh        mult = 1;
12615eef716561e904a54f3803f01cd4fd615a6809d9mbligh	break;
12625eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
12635eef716561e904a54f3803f01cd4fd615a6809d9mbligh    ret = mult * num;
12645eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return ret;
12655eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
12665eef716561e904a54f3803f01cd4fd615a6809d9mbligh
12675eef716561e904a54f3803f01cd4fd615a6809d9mblighvoid print_usage(void) {
12685eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("usage: aio-stress [-s size] [-r size] [-a size] [-d num] [-b num]\n");
12695eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("                  [-i num] [-t num] [-c num] [-C size] [-nxhOS ]\n");
12705eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("                  file1 [file2 ...]\n");
12715eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-a size in KB at which to align buffers\n");
12725eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-b max number of iocbs to give io_submit at once\n");
12735eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-c number of io contexts per file\n");
12745eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-C offset between contexts, default 2MB\n");
12755eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-s size in MB of the test file(s), default 1024MB\n");
12765eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-r record size in KB used for each io, default 64KB\n");
12775eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-d number of pending aio requests for each file, default 64\n");
12785eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-i number of ios per file sent before switching\n\t   to the next file, default 8\n");
12795eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-I total number of ayncs IOs the program will run, default is run until Cntl-C\n");
12805eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-O Use O_DIRECT (not available in 2.4 kernels),\n");
12815eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-S Use O_SYNC for writes\n");
12825eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-o add an operation to the list: write=0, read=1,\n");
12835eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t   random write=2, random read=3.\n");
12845eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t   repeat -o to specify multiple ops: -o 0 -o 1 etc.\n");
12855eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-m shm use ipc shared memory for io buffers instead of malloc\n");
12865eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-m shmfs mmap a file in /dev/shm for io buffers\n");
12875eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-n no fsyncs between write stage and read stage\n");
12885eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-l print io_submit latencies after each stage\n");
12895eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-L print io completion latencies after each stage\n");
12905eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-t number of threads to run\n");
12915eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-u unlink files after completion\n");
12925eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-v verification of bytes written\n");
12935eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-x turn off thread stonewalling\n");
12945eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t-h this message\n");
12955eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\n\t   the size options (-a -s and -r) allow modifiers -s 400{k,m,g}\n");
12965eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("\t   translate to 400KB, 400MB and 400GB\n");
12975eef716561e904a54f3803f01cd4fd615a6809d9mbligh    printf("version %s\n", PROG_VERSION);
12985eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
12995eef716561e904a54f3803f01cd4fd615a6809d9mbligh
13005eef716561e904a54f3803f01cd4fd615a6809d9mblighint main(int ac, char **av)
13015eef716561e904a54f3803f01cd4fd615a6809d9mbligh{
13025eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int rwfd;
13035eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int i;
13045eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int j;
13055eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int c;
13065eef716561e904a54f3803f01cd4fd615a6809d9mbligh
13075eef716561e904a54f3803f01cd4fd615a6809d9mbligh    off_t file_size = 1 * 1024 * 1024 * 1024;
13085eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int first_stage = WRITE;
13095eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct io_oper *oper;
13105eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int status = 0;
13115eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int num_files = 0;
13125eef716561e904a54f3803f01cd4fd615a6809d9mbligh    int open_fds = 0;
13135eef716561e904a54f3803f01cd4fd615a6809d9mbligh    struct thread_info *t;
13145eef716561e904a54f3803f01cd4fd615a6809d9mbligh
13155eef716561e904a54f3803f01cd4fd615a6809d9mbligh    page_size_mask = getpagesize() - 1;
13165eef716561e904a54f3803f01cd4fd615a6809d9mbligh
13175eef716561e904a54f3803f01cd4fd615a6809d9mbligh    while(1) {
13185eef716561e904a54f3803f01cd4fd615a6809d9mbligh	c = getopt(ac, av, "a:b:c:C:m:s:r:d:i:I:o:t:lLnhOSxvu");
13195eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if  (c < 0)
13205eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13215eef716561e904a54f3803f01cd4fd615a6809d9mbligh
13225eef716561e904a54f3803f01cd4fd615a6809d9mbligh        switch(c) {
13235eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'a':
13245eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    page_size_mask = parse_size(optarg, 1024);
13255eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    page_size_mask--;
13265eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13275eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'c':
13285eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    num_contexts = atoi(optarg);
13295eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13305eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'C':
13315eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    context_offset = parse_size(optarg, 1024 * 1024);
13325eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'b':
13335eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    max_io_submit = atoi(optarg);
13345eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13355eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 's':
13365eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    file_size = parse_size(optarg, 1024 * 1024);
13375eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13385eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'd':
13395eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    depth = atoi(optarg);
13405eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13415eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'r':
13425eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    rec_len = parse_size(optarg, 1024);
13435eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13445eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'i':
13455eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    io_iter = atoi(optarg);
13465eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13475eef716561e904a54f3803f01cd4fd615a6809d9mbligh        case 'I':
13485eef716561e904a54f3803f01cd4fd615a6809d9mbligh          iterations = atoi(optarg);
13495eef716561e904a54f3803f01cd4fd615a6809d9mbligh        break;
13505eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'n':
13515eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    fsync_stages = 0;
13525eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13535eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'l':
13545eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    latency_stats = 1;
13555eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13565eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'L':
13575eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    completion_latency_stats = 1;
13585eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13595eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'm':
13605eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    if (!strcmp(optarg, "shm")) {
13615eef716561e904a54f3803f01cd4fd615a6809d9mbligh		fprintf(stderr, "using ipc shm\n");
13625eef716561e904a54f3803f01cd4fd615a6809d9mbligh	        use_shm = USE_SHM;
13635eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    } else if (!strcmp(optarg, "shmfs")) {
13645eef716561e904a54f3803f01cd4fd615a6809d9mbligh	        fprintf(stderr, "using /dev/shm for buffers\n");
13655eef716561e904a54f3803f01cd4fd615a6809d9mbligh		use_shm = USE_SHMFS;
13665eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    }
13675eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13685eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'o':
13695eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    i = atoi(optarg);
13705eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    stages |= 1 << i;
13715eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    fprintf(stderr, "adding stage %s\n", stage_name(i));
13725eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13735eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'O':
13745eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    o_direct = O_DIRECT;
13755eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13765eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'S':
13775eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    o_sync = O_SYNC;
13785eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13795eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 't':
13805eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    num_threads = atoi(optarg);
13815eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13825eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'x':
13835eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    stonewall = 0;
13845eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13855eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'u':
13865eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    unlink_files = 1;
13875eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13885eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'v':
13895eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    verify = 1;
13905eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    break;
13915eef716561e904a54f3803f01cd4fd615a6809d9mbligh	case 'h':
13925eef716561e904a54f3803f01cd4fd615a6809d9mbligh	default:
13935eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    print_usage();
13945eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    exit(1);
13955eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
13965eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
13975eef716561e904a54f3803f01cd4fd615a6809d9mbligh
13985eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /*
13995eef716561e904a54f3803f01cd4fd615a6809d9mbligh     * make sure we don't try to submit more ios than we have allocated
14005eef716561e904a54f3803f01cd4fd615a6809d9mbligh     * memory for
14015eef716561e904a54f3803f01cd4fd615a6809d9mbligh     */
14025eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (depth < io_iter) {
14035eef716561e904a54f3803f01cd4fd615a6809d9mbligh	io_iter = depth;
14045eef716561e904a54f3803f01cd4fd615a6809d9mbligh        fprintf(stderr, "dropping io_iter to %d\n", io_iter);
14055eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
14065eef716561e904a54f3803f01cd4fd615a6809d9mbligh
14075eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (optind >= ac) {
14085eef716561e904a54f3803f01cd4fd615a6809d9mbligh	print_usage();
14095eef716561e904a54f3803f01cd4fd615a6809d9mbligh	exit(1);
14105eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
14115eef716561e904a54f3803f01cd4fd615a6809d9mbligh
14125eef716561e904a54f3803f01cd4fd615a6809d9mbligh    num_files = ac - optind;
14135eef716561e904a54f3803f01cd4fd615a6809d9mbligh
14145eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (num_threads > (num_files * num_contexts)) {
14155eef716561e904a54f3803f01cd4fd615a6809d9mbligh        num_threads = num_files * num_contexts;
14165eef716561e904a54f3803f01cd4fd615a6809d9mbligh	fprintf(stderr, "dropping thread count to the number of contexts %d\n",
14175eef716561e904a54f3803f01cd4fd615a6809d9mbligh	        num_threads);
14185eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
14195eef716561e904a54f3803f01cd4fd615a6809d9mbligh
14205eef716561e904a54f3803f01cd4fd615a6809d9mbligh    t = malloc(num_threads * sizeof(*t));
14215eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (!t) {
14225eef716561e904a54f3803f01cd4fd615a6809d9mbligh        perror("malloc");
14235eef716561e904a54f3803f01cd4fd615a6809d9mbligh	exit(1);
14245eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
14255eef716561e904a54f3803f01cd4fd615a6809d9mbligh    global_thread_info = t;
14265eef716561e904a54f3803f01cd4fd615a6809d9mbligh
14275eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* by default, allow a huge number of iocbs to be sent towards
14285eef716561e904a54f3803f01cd4fd615a6809d9mbligh     * io_submit
14295eef716561e904a54f3803f01cd4fd615a6809d9mbligh     */
14305eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (!max_io_submit)
14315eef716561e904a54f3803f01cd4fd615a6809d9mbligh        max_io_submit = num_files * io_iter * num_contexts;
14325eef716561e904a54f3803f01cd4fd615a6809d9mbligh
14335eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /*
14345eef716561e904a54f3803f01cd4fd615a6809d9mbligh     * make sure we don't try to submit more ios than max_io_submit allows
14355eef716561e904a54f3803f01cd4fd615a6809d9mbligh     */
14365eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (max_io_submit < io_iter) {
14375eef716561e904a54f3803f01cd4fd615a6809d9mbligh        io_iter = max_io_submit;
14385eef716561e904a54f3803f01cd4fd615a6809d9mbligh	fprintf(stderr, "dropping io_iter to %d\n", io_iter);
14395eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
14405eef716561e904a54f3803f01cd4fd615a6809d9mbligh
14415eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (!stages) {
14425eef716561e904a54f3803f01cd4fd615a6809d9mbligh        stages = (1 << WRITE) | (1 << READ) | (1 << RREAD) | (1 << RWRITE);
14435eef716561e904a54f3803f01cd4fd615a6809d9mbligh    } else {
14445eef716561e904a54f3803f01cd4fd615a6809d9mbligh        for (i = 0 ; i < LAST_STAGE; i++) {
14455eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    if (stages & (1 << i)) {
14465eef716561e904a54f3803f01cd4fd615a6809d9mbligh	        first_stage = i;
14475eef716561e904a54f3803f01cd4fd615a6809d9mbligh		fprintf(stderr, "starting with %s\n", stage_name(i));
14485eef716561e904a54f3803f01cd4fd615a6809d9mbligh		break;
14495eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    }
14505eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
14515eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
14525eef716561e904a54f3803f01cd4fd615a6809d9mbligh
14535eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (file_size < num_contexts * context_offset) {
14545eef716561e904a54f3803f01cd4fd615a6809d9mbligh        fprintf(stderr, "file size %Lu too small for %d contexts\n",
14555eef716561e904a54f3803f01cd4fd615a6809d9mbligh	        file_size, num_contexts);
14565eef716561e904a54f3803f01cd4fd615a6809d9mbligh	exit(1);
14575eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
14585eef716561e904a54f3803f01cd4fd615a6809d9mbligh
14595eef716561e904a54f3803f01cd4fd615a6809d9mbligh    fprintf(stderr, "file size %LuMB, record size %luKB, depth %d, ios per iteration %d\n", file_size / (1024 * 1024), rec_len / 1024, depth, io_iter);
14605eef716561e904a54f3803f01cd4fd615a6809d9mbligh    fprintf(stderr, "max io_submit %d, buffer alignment set to %luKB\n",
14615eef716561e904a54f3803f01cd4fd615a6809d9mbligh            max_io_submit, (page_size_mask + 1)/1024);
14625eef716561e904a54f3803f01cd4fd615a6809d9mbligh    fprintf(stderr, "threads %d files %d contexts %d context offset %LuMB verification %s\n",
14635eef716561e904a54f3803f01cd4fd615a6809d9mbligh            num_threads, num_files, num_contexts,
14645eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    context_offset / (1024 * 1024), verify ? "on" : "off");
14655eef716561e904a54f3803f01cd4fd615a6809d9mbligh    /* open all the files and do any required setup for them */
14665eef716561e904a54f3803f01cd4fd615a6809d9mbligh    for (i = optind ; i < ac ; i++) {
14675eef716561e904a54f3803f01cd4fd615a6809d9mbligh	int thread_index;
14685eef716561e904a54f3803f01cd4fd615a6809d9mbligh	for (j = 0 ; j < num_contexts ; j++) {
14695eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    thread_index = open_fds % num_threads;
14705eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    open_fds++;
14715eef716561e904a54f3803f01cd4fd615a6809d9mbligh
14725eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    rwfd = open(av[i], O_CREAT | O_RDWR | o_direct | o_sync, 0600);
14735eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    assert(rwfd != -1);
14745eef716561e904a54f3803f01cd4fd615a6809d9mbligh
14755eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper = create_oper(rwfd, first_stage, j * context_offset,
14765eef716561e904a54f3803f01cd4fd615a6809d9mbligh	                       file_size - j * context_offset, rec_len,
14775eef716561e904a54f3803f01cd4fd615a6809d9mbligh			       depth, io_iter, av[i]);
14785eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    if (!oper) {
14795eef716561e904a54f3803f01cd4fd615a6809d9mbligh		fprintf(stderr, "error in create_oper\n");
14805eef716561e904a54f3803f01cd4fd615a6809d9mbligh		exit(-1);
14815eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    }
14825eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    oper_list_add(oper, &t[thread_index].active_opers);
14835eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    t[thread_index].num_files++;
14845eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
14855eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
14865eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (setup_shared_mem(num_threads, num_files * num_contexts,
14875eef716561e904a54f3803f01cd4fd615a6809d9mbligh                         depth, rec_len, max_io_submit))
14885eef716561e904a54f3803f01cd4fd615a6809d9mbligh    {
14895eef716561e904a54f3803f01cd4fd615a6809d9mbligh        exit(1);
14905eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
14915eef716561e904a54f3803f01cd4fd615a6809d9mbligh    for (i = 0 ; i < num_threads ; i++) {
14925eef716561e904a54f3803f01cd4fd615a6809d9mbligh	if (setup_ious(&t[i], t[i].num_files, depth, rec_len, max_io_submit))
14935eef716561e904a54f3803f01cd4fd615a6809d9mbligh		exit(1);
14945eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
14955eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (num_threads > 1){
14965eef716561e904a54f3803f01cd4fd615a6809d9mbligh        printf("Running multi thread version num_threads:%d\n", num_threads);
14975eef716561e904a54f3803f01cd4fd615a6809d9mbligh        run_workers(t, num_threads);
14985eef716561e904a54f3803f01cd4fd615a6809d9mbligh    } else {
14995eef716561e904a54f3803f01cd4fd615a6809d9mbligh        printf("Running single thread version \n");
15005eef716561e904a54f3803f01cd4fd615a6809d9mbligh	status = worker(t);
15015eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
15025eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (unlink_files) {
15035eef716561e904a54f3803f01cd4fd615a6809d9mbligh	for (i = optind ; i < ac ; i++) {
15045eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    printf("Cleaning up file %s \n", av[i]);
15055eef716561e904a54f3803f01cd4fd615a6809d9mbligh	    unlink(av[i]);
15065eef716561e904a54f3803f01cd4fd615a6809d9mbligh	}
15075eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
15085eef716561e904a54f3803f01cd4fd615a6809d9mbligh
15095eef716561e904a54f3803f01cd4fd615a6809d9mbligh    if (status) {
15105eef716561e904a54f3803f01cd4fd615a6809d9mbligh	exit(1);
15115eef716561e904a54f3803f01cd4fd615a6809d9mbligh    }
15125eef716561e904a54f3803f01cd4fd615a6809d9mbligh    return status;
15135eef716561e904a54f3803f01cd4fd615a6809d9mbligh}
15145eef716561e904a54f3803f01cd4fd615a6809d9mbligh
1515