1// Copyright 2006 Google Inc. All Rights Reserved.
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6
7//      http://www.apache.org/licenses/LICENSE-2.0
8
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// sat.h : sat stress test object interface and data structures
16
17#ifndef STRESSAPPTEST_SAT_H_
18#define STRESSAPPTEST_SAT_H_
19
20#include <signal.h>
21
22#include <map>
23#include <string>
24#include <vector>
25
26// This file must work with autoconf on its public version,
27// so these includes are correct.
28#include "finelock_queue.h"
29#include "queue.h"
30#include "sattypes.h"
31#include "worker.h"
32#include "os.h"
33
34// SAT stress test class.
35class Sat {
36 public:
37  // Enum for page queue implementation switch.
38  enum PageQueueType { SAT_ONELOCK, SAT_FINELOCK };
39
40  Sat();
41  virtual ~Sat();
42
43  // Read configuration from arguments. Called first.
44  bool ParseArgs(int argc, char **argv);
45  virtual bool CheckGoogleSpecificArgs(int argc, char **argv, int *i);
46  // Initialize data structures, subclasses, and resources,
47  // based on command line args.
48  // Called after ParseArgs().
49  bool Initialize();
50
51  // Execute the test. Initialize() and ParseArgs() must be called first.
52  // This must be called from a single-threaded program.
53  bool Run();
54
55  // Pretty print result summary.
56  // Called after Run().
57  // Return value is success or failure of the SAT run, *not* of this function!
58  bool PrintResults();
59
60  // Pretty print version info.
61  bool PrintVersion();
62
63  // Pretty print help.
64  virtual void PrintHelp();
65
66  // Clean up allocations and resources.
67  // Called last.
68  bool Cleanup();
69
70  // Abort Run().  Only for use by Run()-installed signal handlers.
71  void Break() { user_break_ = true; }
72
73  // Fetch and return empty and full pages into the empty and full pools.
74  bool GetValid(struct page_entry *pe);
75  bool PutValid(struct page_entry *pe);
76  bool GetEmpty(struct page_entry *pe);
77  bool PutEmpty(struct page_entry *pe);
78
79  bool GetValid(struct page_entry *pe, int32 tag);
80  bool GetEmpty(struct page_entry *pe, int32 tag);
81
82  // Accessor functions.
83  int verbosity() const { return verbosity_; }
84  int logfile() const { return logfile_; }
85  int page_length() const { return page_length_; }
86  int disk_pages() const { return disk_pages_; }
87  int strict() const { return strict_; }
88  int tag_mode() const { return tag_mode_; }
89  int status() const { return statuscount_; }
90  void bad_status() { statuscount_++; }
91  int errors() const { return errorcount_; }
92  int warm() const { return warm_; }
93  bool stop_on_error() const { return stop_on_error_; }
94  int32 region_mask() const { return region_mask_; }
95  // Semi-accessor to find the "nth" region to avoid replicated bit searching..
96  int32 region_find(int32 num) const {
97    for (int i = 0; i < 32; i++) {
98      if ((1 << i) & region_mask_) {
99        if (num == 0)
100          return i;
101        num--;
102      }
103    }
104    return 0;
105  }
106
107  // Causes false errors for unittesting.
108  // Setting to "true" causes errors to be injected.
109  void set_error_injection(bool errors) { error_injection_ = errors; }
110  bool error_injection() const { return error_injection_; }
111
112 protected:
113  // Opens log file for writing. Returns 0 on failure.
114  bool InitializeLogfile();
115  // Checks for supported environment. Returns 0 on failure.
116  bool CheckEnvironment();
117  // Allocates size_ bytes of test memory.
118  bool AllocateMemory();
119  // Initializes datapattern reference structures.
120  bool InitializePatterns();
121  // Initializes test memory with datapatterns.
122  bool InitializePages();
123
124  // Start up worker threads.
125  virtual void InitializeThreads();
126  // Spawn worker threads.
127  void SpawnThreads();
128  // Reap worker threads.
129  void JoinThreads();
130  // Run bandwidth and error analysis.
131  virtual void RunAnalysis();
132  // Delete worker threads.
133  void DeleteThreads();
134
135  // Return the number of cpus in the system.
136  int CpuCount();
137
138  // Collect error counts from threads.
139  int64 GetTotalErrorCount();
140
141  // Command line arguments.
142  string cmdline_;
143
144  // Memory and test configuration.
145  int runtime_seconds_;               // Seconds to run.
146  int page_length_;                   // Length of each memory block.
147  int64 pages_;                       // Number of memory blocks.
148  int64 size_;                        // Size of memory tested, in bytes.
149  int64 size_mb_;                     // Size of memory tested, in MB.
150  int64 min_hugepages_mbytes_;        // Minimum hugepages size.
151  int64 freepages_;                   // How many invalid pages we need.
152  int disk_pages_;                    // Number of pages per temp file.
153  uint64 paddr_base_;                 // Physical address base.
154
155  // Control flags.
156  volatile sig_atomic_t user_break_;  // User has signalled early exit.  Used as
157                                      // a boolean.
158  int verbosity_;                     // How much to print.
159  int strict_;                        // Check results per transaction.
160  int warm_;                          // FPU warms CPU while coying.
161  int address_mode_;                  // 32 or 64 bit binary.
162  bool stop_on_error_;                // Exit immendiately on any error.
163  bool findfiles_;                    // Autodetect tempfile locations.
164
165  bool error_injection_;              // Simulate errors, for unittests.
166  bool crazy_error_injection_;        // Simulate lots of errors.
167  uint64 max_errorcount_;             // Number of errors before forced exit.
168  int run_on_anything_;               // Ignore unknown machine ereor.
169  int use_logfile_;                   // Log to a file.
170  char logfilename_[255];             // Name of file to log to.
171  int logfile_;                       // File handle to log to.
172
173  // Disk thread options.
174  int read_block_size_;               // Size of block to read from disk.
175  int write_block_size_;              // Size of block to write to disk.
176  int64 segment_size_;                // Size of segment to split disk into.
177  int cache_size_;                    // Size of disk cache.
178  int blocks_per_segment_;            // Number of blocks to test per segment.
179  int read_threshold_;                // Maximum time (in us) a read should take
180                                      // before warning of a slow read.
181  int write_threshold_;               // Maximum time (in us) a write should
182                                      // take before warning of a slow write.
183  int non_destructive_;               // Whether to use non-destructive mode for
184                                      // the disk test.
185
186  // Generic Options.
187  int monitor_mode_;                  // Switch for monitor-only mode SAT.
188                                      // This switch trumps most of the other
189                                      // argument, as SAT will only run error
190                                      // polling threads.
191  int tag_mode_;                      // Do tagging of memory and strict
192                                      // checking for misplaced cachelines.
193
194  bool do_page_map_;                  // Should we print a list of used pages?
195  unsigned char *page_bitmap_;        // Store bitmap of physical pages seen.
196  uint64 page_bitmap_size_;           // Length of physical memory represented.
197
198  // Cpu Cache Coherency Options.
199  bool cc_test_;                      // Flag to decide whether to start the
200                                      // cache coherency threads.
201  int cc_cacheline_count_;            // Number of cache line size structures.
202  int cc_inc_count_;                  // Number of times to increment the shared
203                                      // cache lines structure members.
204
205  // Thread control.
206  int file_threads_;                  // Threads of file IO.
207  int net_threads_;                   // Threads of network IO.
208  int listen_threads_;                // Threads for network IO to connect.
209  int memory_threads_;                // Threads of memcpy.
210  int invert_threads_;                // Threads of invert.
211  int fill_threads_;                  // Threads of memset.
212  int check_threads_;                 // Threads of strcmp.
213  int cpu_stress_threads_;            // Threads of CPU stress workload.
214  int disk_threads_;                  // Threads of disk test.
215  int random_threads_;                // Number of random disk threads.
216  int total_threads_;                 // Total threads used.
217  bool error_poll_;                   // Poll for system errors.
218
219  // Resources.
220  cc_cacheline_data *cc_cacheline_data_;  // The cache line sized datastructure
221                                          // used by the ccache threads
222                                          // (in worker.h).
223  vector<string> filename_;           // Filenames for file IO.
224  vector<string> ipaddrs_;            // Addresses for network IO.
225  vector<string> diskfilename_;       // Filename for disk IO device.
226  // Block table for IO device.
227  vector<DiskBlockTable*> blocktables_;
228
229  int32 region_mask_;                 // Bitmask of available NUMA regions.
230  int32 region_count_;                // Count of available NUMA regions.
231  int32 region_[32];                  // Pagecount per region.
232  int region_mode_;                   // What to do with NUMA hints?
233  static const int kLocalNuma = 1;    // Target local memory.
234  static const int kRemoteNuma = 2;   // Target remote memory.
235
236  // Results.
237  int64 errorcount_;                  // Total hardware incidents seen.
238  int statuscount_;                   // Total test errors seen.
239
240  // Thread type constants and types
241  enum ThreadType {
242    kMemoryType = 0,
243    kFileIOType = 1,
244    kNetIOType = 2,
245    kNetSlaveType = 3,
246    kCheckType = 4,
247    kInvertType = 5,
248    kDiskType = 6,
249    kRandomDiskType = 7,
250    kCPUType = 8,
251    kErrorType = 9,
252    kCCType = 10
253  };
254
255  // Helper functions.
256  virtual void AcquireWorkerLock();
257  virtual void ReleaseWorkerLock();
258  pthread_mutex_t worker_lock_;  // Lock access to the worker thread structure.
259  typedef vector<WorkerThread*> WorkerVector;
260  typedef map<int, WorkerVector*> WorkerMap;
261  // Contains all worker threads.
262  WorkerMap workers_map_;
263  // Delay between power spikes.
264  time_t pause_delay_;
265  // The duration of each pause (for power spikes).
266  time_t pause_duration_;
267  // For the workers we pause and resume to create power spikes.
268  WorkerStatus power_spike_status_;
269  // For the workers we never pause.
270  WorkerStatus continuous_status_;
271
272  class OsLayer *os_;                   // Os abstraction: put hacks here.
273  class PatternList *patternlist_;      // Access to global data patterns.
274
275  // RunAnalysis methods
276  void AnalysisAllStats();              // Summary of all runs.
277  void MemoryStats();
278  void FileStats();
279  void NetStats();
280  void CheckStats();
281  void InvertStats();
282  void DiskStats();
283
284  void QueueStats();
285
286  // Physical page use reporting.
287  void AddrMapInit();
288  void AddrMapUpdate(struct page_entry *pe);
289  void AddrMapPrint();
290
291  // additional memory data from google-specific tests.
292  virtual void GoogleMemoryStats(float *memcopy_data,
293                                 float *memcopy_bandwidth);
294
295  virtual void GoogleOsOptions(std::map<std::string, std::string> *options);
296
297  // Page queues, only one of (valid_+empty_) or (finelock_q_) will be used
298  // at a time. A commandline switch controls which queue implementation will
299  // be used.
300  class PageEntryQueue *valid_;        // Page queue structure, valid pages.
301  class PageEntryQueue *empty_;        // Page queue structure, free pages.
302  class FineLockPEQueue *finelock_q_;  // Page queue with fine-grain locks
303  Sat::PageQueueType pe_q_implementation_;   // Queue implementation switch
304
305  DISALLOW_COPY_AND_ASSIGN(Sat);
306};
307
308Sat *SatFactory();
309
310#endif  // STRESSAPPTEST_SAT_H_
311