122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B#!/usr/bin/python 222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B""" 322243eb016c251c72ac7f9002e75450d88e063d2Prashanth BUsage: ./cron_scripts/log_distiller.py job_id path_to_logfile 422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B If the job_id is a suite it will find all subjobs. 522243eb016c251c72ac7f9002e75450d88e063d2Prashanth BYou need to change the location of the log it will parse. 622243eb016c251c72ac7f9002e75450d88e063d2Prashanth BThe job_id needs to be in the afe database. 722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B""" 822243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport abc 922243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport datetime 1022243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport os 1122243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport re 1222243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport pprint 1322243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport subprocess 1422243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport sys 1522243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport time 1622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 1722243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport common 1822243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bfrom autotest_lib.server import frontend 1922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 2022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 2122243eb016c251c72ac7f9002e75450d88e063d2Prashanth BLOGFIE = './logs/scheduler.log.2014-04-17-16.51.47' 2222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B# logfile name format: scheduler.log.2014-02-14-18.10.56 2322243eb016c251c72ac7f9002e75450d88e063d2Prashanth Btime_format = '%Y-%m-%d-%H.%M.%S' 2422243eb016c251c72ac7f9002e75450d88e063d2Prashanth Blogfile_regex = r'scheduler.log.([0-9,.,-]+)' 2522243eb016c251c72ac7f9002e75450d88e063d2Prashanth Blogdir = os.path.join('/usr/local/autotest', 'logs') 2622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 2722243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass StateMachineViolation(Exception): 2822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B pass 2922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 3022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 3122243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass LogLineException(Exception): 3222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B pass 3322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 3422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 3522243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bdef should_process_log(time_str, time_format, cutoff_days=7): 3622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Returns true if the logs was created after cutoff days. 3722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 3822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @param time_str: A string representing the time. 3922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B eg: 2014-02-14-18.10.56 4022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @param time_format: A string representing the format of the time string. 4122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B ref: http://docs.python.org/2/library/datetime.html#strftime-strptime-behavior 4222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @param cutoff_days: Int representind the cutoff in days. 4322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 4422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @return: Returns True if time_str has aged more than cutoff_days. 4522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 4622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B log_time = datetime.datetime.strptime(time_str, time_format) 4722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B now = datetime.datetime.strptime(time.strftime(time_format), time_format) 4822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B cutoff = now - datetime.timedelta(days=cutoff_days) 4922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B return log_time < cutoff 5022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 5122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 5222243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bdef apply_regex(regex, line): 5322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Simple regex applicator. 5422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 5522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @param regex: Regex to apply. 5622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @param line: The line to apply regex on. 5722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 5822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @return: A tuple with the matching groups, if there was a match. 5922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 6022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B log_match = re.match(regex, line) 6122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if log_match: 6222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B return log_match.groups() 6322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 6422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 6522243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass StateMachineParser(object): 6622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Abstract class that enforces state transition ordering. 6722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 6822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B Classes inheriting from StateMachineParser need to define an 6922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B expected_transitions dictionary. The SMP will pop 'to' states 7022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B from the dictionary as they occur, so you cannot same state transitions 7122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B unless you specify 2 of them. 7222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 7322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B __metaclass__ = abc.ABCMeta 7422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 7522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 7622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @abc.abstractmethod 7722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def __init__(self): 7822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.visited_states = [] 7922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.expected_transitions = {} 8022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 8122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 8222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def advance_state(self, from_state, to_state): 8322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Checks that a transition is valid. 8422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 8522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @param from_state: A string representind the state the host is leaving. 8622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @param to_state: The state The host is going to, represented as a string. 8722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 8822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @raises LogLineException: If an invalid state transition was 8922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B detected. 9022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 9122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B # TODO: Updating to the same state is a waste of bw. 9222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if from_state and from_state == to_state: 9322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B return ('Updating to the same state is a waste of BW: %s->%s' % 9422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B (from_state, to_state)) 9522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B return 9622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 9722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if (from_state in self.expected_transitions and 9822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B to_state in self.expected_transitions[from_state]): 9922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.expected_transitions[from_state].remove(to_state) 10022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.visited_states.append(to_state) 10122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B else: 10222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B return (from_state, to_state) 10322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 10422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 10522243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass SingleJobHostSMP(StateMachineParser): 10622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def __init__(self): 10722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.visited_states = [] 10822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.expected_transitions = { 10922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'Ready': ['Resetting', 'Verifying', 'Pending', 'Provisioning'], 11022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'Resetting': ['Ready', 'Provisioning'], 11122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'Pending': ['Running'], 11222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'Provisioning': ['Repairing'], 11322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'Running': ['Ready'] 11422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B } 11522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 11622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 11722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def check_transitions(self, hostline): 11822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if hostline.line_info['field'] == 'status': 11922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.advance_state(hostline.line_info['state'], 12022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B hostline.line_info['value']) 12122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 12222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 12322243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass SingleJobHqeSMP(StateMachineParser): 12422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def __init__(self): 12522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.visited_states = [] 12622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.expected_transitions = { 12722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'Queued': ['Starting', 'Resetting', 'Aborted'], 12822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'Resetting': ['Pending', 'Provisioning'], 12922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'Provisioning': ['Pending', 'Queued', 'Repairing'], 13022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'Pending': ['Starting'], 13122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'Starting': ['Running'], 13222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'Running': ['Gathering', 'Parsing'], 13322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'Gathering': ['Parsing'], 13422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'Parsing': ['Completed', 'Aborted'] 13522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B } 13622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 13722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 13822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def check_transitions(self, hqeline): 13922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B invalid_states = self.advance_state( 14022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B hqeline.line_info['from_state'], hqeline.line_info['to_state']) 14122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if not invalid_states: 14222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B return 14322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 14422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B # Deal with repair. 14522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if (invalid_states[0] == 'Queued' and 14622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'Running' in self.visited_states): 14722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B raise StateMachineViolation('Unrecognized state transition ' 14822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B '%s->%s, expected transitions are %s' % 14922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B (invalid_states[0], invalid_states[1], 15022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.expected_transitions)) 15122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 15222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 15322243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass LogLine(object): 15422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Line objects. 15522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 15622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B All classes inheriting from LogLine represent a line of some sort. 15722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B A line is responsible for parsing itself, and invoking an SMP to 15822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B validate state transitions. A line can be part of several state machines. 15922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 16022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B line_format = '%s' 16122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 16222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 16322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def __init__(self, state_machine_parsers): 16422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 16522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @param state_machine_parsers: A list of smp objects to use to validate 16622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B state changes on these types of lines.. 16722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 16822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.smps = state_machine_parsers 16922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 17022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B # Because, this is easier to flush. 17122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info = {} 17222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 17322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 17422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def parse_line(self, line): 17522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Apply a line regex and save any information the parsed line contains. 17622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 17722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @param line: A string representing a line. 17822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 17922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B # Regex for all the things. 18022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B line_rgx = '(.*)' 18122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B parsed_line = apply_regex(line_rgx, line) 18222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if parsed_line: 18322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info['line'] = parsed_line[0] 18422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 18522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 18622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def flush(self): 18722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Call any state machine parsers, persist line info if needed. 18822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 18922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B for smp in self.smps: 19022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B smp.check_transitions(self) 19122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B # TODO: persist this? 19222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info={} 19322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 19422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 19522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def format_line(self): 19622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B try: 19722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B return self.line_format % self.line_info 19822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B except KeyError: 19922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B return self.line_info['line'] 20022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 20122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 20222243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass TimeLine(LogLine): 20322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Filters timestamps for scheduler logs. 20422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 20522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 20622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def parse_line(self, line): 20722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B super(TimeLine, self).parse_line(line) 20822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 20922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B # Regex for isolating the date and time from scheduler logs, eg: 21022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B # 02/16 16:04:36.573 INFO |scheduler_:0574|... 21122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B line_rgx = '([0-9,/,:,., ]+)(.*)' 21222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B parsed_line = apply_regex(line_rgx, self.line_info['line']) 21322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if parsed_line: 21422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info['time'] = parsed_line[0] 21522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info['line'] = parsed_line[1] 21622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 21722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 21822243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass HostLine(TimeLine): 21922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Manages hosts line parsing. 22022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 22122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B line_format = (' \t\t %(time)s %(host)s, currently in %(state)s, ' 22222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'updated %(field)s->%(value)s') 22322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 22422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 22522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def record_state_transition(self, line): 22622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Apply the state_transition_rgx to a line and record state changes. 22722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 22822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @param line: The line we're expecting to contain a state transition. 22922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 23022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B state_transition_rgx = ".* ([a-zA-Z]+) updating {'([a-zA-Z]+)': ('[a-zA-Z]+'|[0-9])}.*" 23122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B match = apply_regex(state_transition_rgx, line) 23222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if match: 23322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info['state'] = match[0] 23422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info['field'] = match[1] 23522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info['value'] = match[2].replace("'", "") 23622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 23722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 23822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def parse_line(self, line): 23922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B super(HostLine, self).parse_line(line) 24022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 24122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B # Regex for getting host status. Eg: 24222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B # 172.22.4 in Running updating {'status': 'Running'} 24322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B line_rgx = '.*Host (([0-9,.,a-z,-]+).*)' 24422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B parsed_line = apply_regex(line_rgx, self.line_info['line']) 24522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if parsed_line: 24622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info['line'] = parsed_line[0] 24722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info['host'] = parsed_line[1] 24822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.record_state_transition(self.line_info['line']) 24922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B return self.format_line() 25022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 25122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 25222243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass HQELine(TimeLine): 25322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Manages HQE line parsing. 25422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 25522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B line_format = ('%(time)s %(hqe)s, currently in %(from_state)s, ' 25622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'updated to %(to_state)s. Flags: %(flags)s') 25722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 25822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 25922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def record_state_transition(self, line): 26022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Apply the state_transition_rgx to a line and record state changes. 26122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 26222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B @param line: The line we're expecting to contain a state transition. 26322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 26422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B # Regex for getting hqe status. Eg: 26522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B # status:Running [active] -> Gathering 26622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B state_transition_rgx = ".*status:([a-zA-Z]+)( \[[a-z\,]+\])? -> ([a-zA-Z]+)" 26722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B match = apply_regex(state_transition_rgx, line) 26822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if match: 26922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info['from_state'] = match[0] 27022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info['flags'] = match[1] 27122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info['to_state'] = match[2] 27222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 27322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 27422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def parse_line(self, line): 27522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B super(HQELine, self).parse_line(line) 27622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B line_rgx = r'.*\| HQE: (([0-9]+).*)' 27722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B parsed_line = apply_regex(line_rgx, self.line_info['line']) 27822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if parsed_line: 27922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info['line'] = parsed_line[0] 28022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_info['hqe'] = parsed_line[1] 28122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.record_state_transition(self.line_info['line']) 28222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B return self.format_line() 28322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 28422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 28522243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass LogCrawler(object): 28622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Crawl logs. 28722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 28822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B Log crawlers are meant to apply some basic preprocessing to a log, and crawl 28922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B the output validating state changes. They manage line and state machine 29022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B creation. The initial filtering applied to the log needs to be grab all lines 29122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B that match an action, such as the running of a job. 29222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 29322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 29422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def __init__(self, log_name): 29522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.log = log_name 29622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.filter_command = 'cat %s' % log_name 29722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 29822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 29922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def preprocess_log(self): 30022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Apply some basic filtering to the log. 30122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 30222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B proc = subprocess.Popen(self.filter_command, 30322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B shell=True, stdout=subprocess.PIPE) 30422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B out, err = proc.communicate() 30522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B return out 30622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 30722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 30822243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass SchedulerLogCrawler(LogCrawler): 30922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """A log crawler for the scheduler logs. 31022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 31122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B This crawler is only capable of processing information about a single job. 31222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 31322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 31422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def __init__(self, log_name, **kwargs): 31522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B super(SchedulerLogCrawler, self).__init__(log_name) 31622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.job_id = kwargs['job_id'] 31722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.line_processors = [HostLine([SingleJobHostSMP()]), 31822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B HQELine([SingleJobHqeSMP()])] 31922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B self.filter_command = ('%s | grep "for job: %s"' % 32022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B (self.filter_command, self.job_id)) 32122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 32222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 32322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B def parse_log(self): 32422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """Parse each line of the preprocessed log output. 32522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 32622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B Pass each line through each possible line_processor. The one that matches 32722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B will populate itself, call flush, this will walk the state machine of that 32822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B line to the next step. 32922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B """ 33022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B out = self.preprocess_log() 33122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B response = [] 33222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B for job_line in out.split('\n'): 33322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B parsed_line = None 33422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B for processor in self.line_processors: 33522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B line = processor.parse_line(job_line) 33622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if line and parsed_line: 33722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B raise LogLineException('Multiple Parsers claiming the line %s: ' 33822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'previous parsing: %s, current parsing: %s ' % 33922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B (job_line, parsed_line, line)) 34022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B elif line: 34122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B parsed_line = line 34222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B try: 34322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B processor.flush() 34422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B except StateMachineViolation as e: 34522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B response.append(str(e)) 34622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B raise StateMachineViolation(response) 34722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B response.append(parsed_line if parsed_line else job_line) 34822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B return response 34922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 35022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 35122243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bdef process_logs(): 35222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if len(sys.argv) < 2: 35322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B print ('Usage: ./cron_scripts/log_distiller.py 0 8415620 ' 35422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'You need to change the location of the log it will parse.' 35522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 'The job_id needs to be in the afe database.') 35622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B sys.exit(1) 35722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 35822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B job_id = int(sys.argv[1]) 35922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B rpc = frontend.AFE() 36022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B suite_jobs = rpc.run('get_jobs', id=job_id) 36122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B if not suite_jobs[0]['parent_job']: 36222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B suite_jobs = rpc.run('get_jobs', parent_job=job_id) 36322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B try: 36422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B logfile = sys.argv[2] 36522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B except Exception: 36622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B logfile = LOGFILE 36722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 36822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B for job in suite_jobs: 36922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B log_crawler = SchedulerLogCrawler(logfile, job_id=job['id']) 37022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B for line in log_crawler.parse_log(): 37122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B print line 37222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B return 37322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 37422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B 37522243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bif __name__ == '__main__': 37622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B process_logs() 377