1b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// Copyright 2008 Google Inc. All Rights Reserved. 2b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 3b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// Licensed under the Apache License, Version 2.0 (the "License"); 4b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// you may not use this file except in compliance with the License. 5b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// You may obtain a copy of the License at 6b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 7b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// http://www.apache.org/licenses/LICENSE-2.0 8b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 9b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// Unless required by applicable law or agreed to in writing, software 10b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// distributed under the License is distributed on an "AS IS" BASIS, 11b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// See the License for the specific language governing permissions and 13b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// limitations under the License. 14b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 15b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// error_diag.h: Ambiguous error diagnosis class 16b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 17b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson#ifndef STRESSAPPTEST_ERROR_DIAG_H_ 18b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson#define STRESSAPPTEST_ERROR_DIAG_H_ 19b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 20b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson#include <pthread.h> 21b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson#include <list> 22b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson#include <map> 23b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson#include <set> 24b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson#include <string> 25b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 26b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// This file must work with autoconf on its public version, 27b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// so these includes are correct. 28b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson#include "sattypes.h" 29b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson#include "os.h" 30b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 31b0114cb9f332db144f65291211ae65f7f0e814e6Scott Andersonclass ErrorInstance; 32b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 33b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// This describes the components of the system. 34b0114cb9f332db144f65291211ae65f7f0e814e6Scott Andersonclass DeviceTree { 35b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson public: 36b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson explicit DeviceTree(string name); 37b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson ~DeviceTree(); 38b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 39b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Atomically find arbitrary device in subtree. 40b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson DeviceTree *FindInSubTree(string name); 41b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Find or add named device. 42b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson DeviceTree *FindOrAddDevice(string name); 43b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Atomically add sub device. 44b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson void InsertSubDevice(string name); 45b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Returns parent device. 46b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson DeviceTree *GetParent() { return parent_; } 47b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Pretty prints device tree. 48b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson void PrettyPrint(string spacer = " "); 49b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Atomically add error instance to device. 50b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson void AddErrorInstance(ErrorInstance *error_instance); 51b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Returns true of device is known to be bad. 52b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson bool KnownBad(); 53b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Returns number of direct sub devices. 54b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson int NumDirectSubDevices() { return subdevices_.size(); } 55b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 56b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson private: 57b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Unlocked version of FindInSubTree. 58b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson DeviceTree *UnlockedFindInSubTree(string name); 59b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 60b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson std::map<string, DeviceTree*> subdevices_; // Map of sub-devices. 61b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson std::list<ErrorInstance*> errors_; // Log of errors. 62b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson DeviceTree *parent_; // Pointer to parent device. 63b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson string name_; // Device name. 64b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson pthread_mutex_t device_tree_mutex_; // Mutex protecting device tree. 65b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson}; 66b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 67b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 68b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// enum type for collected errors. 69b0114cb9f332db144f65291211ae65f7f0e814e6Scott Andersonenum SATErrorType { 70b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson SAT_ERROR_NONE = 0, 71b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson SAT_ERROR_ECC, 72b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson SAT_ERROR_MISCOMPARE, 73b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson SAT_ERROR_SECTOR_TAG, 74b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson}; 75b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 76b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// enum type for error severity. 77b0114cb9f332db144f65291211ae65f7f0e814e6Scott Andersonenum SATErrorSeverity { 78b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson SAT_ERROR_CORRECTABLE = 0, 79b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson SAT_ERROR_FATAL, 80b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson}; 81b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 82b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// This describes an error and it's likely causes. 83b0114cb9f332db144f65291211ae65f7f0e814e6Scott Andersonclass ErrorInstance { 84b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson public: 85b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson ErrorInstance(): type_(SAT_ERROR_NONE), severity_(SAT_ERROR_CORRECTABLE) {} 86b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 87b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson SATErrorType type_; // Type of error: ECC, miscompare, sector. 88b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson SATErrorSeverity severity_; // Correctable, or fatal. 89b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson std::set<DeviceTree*> causes_; // Devices that can cause this type of error. 90b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson}; 91b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 92b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// This describes ECC errors. 93b0114cb9f332db144f65291211ae65f7f0e814e6Scott Andersonclass ECCErrorInstance: public ErrorInstance { 94b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson public: 95b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson ECCErrorInstance() { type_ = SAT_ERROR_ECC; } 96b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 97b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson uint64 addr_; // Address where error occured. 98b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson}; 99b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 100b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// This describes miscompare errors. 101b0114cb9f332db144f65291211ae65f7f0e814e6Scott Andersonclass MiscompareErrorInstance: public ErrorInstance { 102b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson public: 103b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson MiscompareErrorInstance() { type_ = SAT_ERROR_MISCOMPARE; } 104b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 105b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson uint64 addr_; // Address where miscompare occured. 106b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson}; 107b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 108b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// This describes HDD miscompare errors. 109b0114cb9f332db144f65291211ae65f7f0e814e6Scott Andersonclass HDDMiscompareErrorInstance: public MiscompareErrorInstance { 110b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson public: 111b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson uint64 addr2_; // addr_ and addr2_ are src and dst memory addr. 112b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson int offset_; // offset. 113b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson int block_; // error block. 114b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson}; 115b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 116b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// This describes HDD miscompare errors. 117b0114cb9f332db144f65291211ae65f7f0e814e6Scott Andersonclass HDDSectorTagErrorInstance: public ErrorInstance { 118b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson public: 119b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson HDDSectorTagErrorInstance() { type_ = SAT_ERROR_SECTOR_TAG; } 120b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 121b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson uint64 addr_; 122b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson uint64 addr2_; // addr_ and addr2_ are src and dst memory addr. 123b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson int sector_; // error sector. 124b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson int block_; // error block. 125b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson}; 126b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 127b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson// Generic error storage and sorting class. 128b0114cb9f332db144f65291211ae65f7f0e814e6Scott Andersonclass ErrorDiag { 129b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson public: 130b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson ErrorDiag(); 131b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson virtual ~ErrorDiag(); 132b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 133b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Add info about a CECC. 134b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson virtual int AddCeccError(string dimm_string); 135b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 136b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Add info about a UECC. 137b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson virtual int AddUeccError(string dimm_string); 138b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 139b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Add info about a miscompare. 140b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson virtual int AddMiscompareError(string dimm_string, uint64 addr, int count); 141b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 142b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Add info about a miscompare from a drive. 143b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson virtual int AddHDDMiscompareError(string devicename, int block, int offset, 144b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson void *src_addr, void *dst_addr); 145b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 146b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Add info about a sector tag miscompare from a drive. 147b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson virtual int AddHDDSectorTagError(string devicename, int block, int offset, 148b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson int sector, void *src_addr, void *dst_addr); 149b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 150b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Set platform specific handle and initialize device tree. 151b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson bool set_os(OsLayer *os); 152b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 153b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson protected: 154b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Create and initialize system device tree. 155b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson virtual bool InitializeDeviceTree(); 156b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 157b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson // Utility Function to translate a virtual address to DIMM number. 158b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson string AddressToDimmString(OsLayer *os, void *addr, int offset); 159b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 160b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson DeviceTree *system_tree_root_; // System device tree. 161b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson OsLayer *os_; // Platform handle. 162b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 163b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson private: 164b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson DISALLOW_COPY_AND_ASSIGN(ErrorDiag); 165b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson}; 166b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson 167b0114cb9f332db144f65291211ae65f7f0e814e6Scott Anderson#endif // STRESSAPPTEST_ERROR_DIAG_H_ 168