122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B#!/usr/bin/python
222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B"""
322243eb016c251c72ac7f9002e75450d88e063d2Prashanth BUsage: ./cron_scripts/log_distiller.py job_id path_to_logfile
422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    If the job_id is a suite it will find all subjobs.
522243eb016c251c72ac7f9002e75450d88e063d2Prashanth BYou need to change the location of the log it will parse.
622243eb016c251c72ac7f9002e75450d88e063d2Prashanth BThe job_id needs to be in the afe database.
722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B"""
822243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport abc
922243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport datetime
1022243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport os
1122243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport re
1222243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport pprint
1322243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport subprocess
1422243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport sys
1522243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport time
1622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
1722243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bimport common
1822243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bfrom autotest_lib.server import frontend
1922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
2022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
2122243eb016c251c72ac7f9002e75450d88e063d2Prashanth BLOGFIE = './logs/scheduler.log.2014-04-17-16.51.47'
2222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B# logfile name format: scheduler.log.2014-02-14-18.10.56
2322243eb016c251c72ac7f9002e75450d88e063d2Prashanth Btime_format = '%Y-%m-%d-%H.%M.%S'
2422243eb016c251c72ac7f9002e75450d88e063d2Prashanth Blogfile_regex = r'scheduler.log.([0-9,.,-]+)'
2522243eb016c251c72ac7f9002e75450d88e063d2Prashanth Blogdir = os.path.join('/usr/local/autotest', 'logs')
2622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
2722243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass StateMachineViolation(Exception):
2822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    pass
2922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
3022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
3122243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass LogLineException(Exception):
3222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    pass
3322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
3422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
3522243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bdef should_process_log(time_str, time_format, cutoff_days=7):
3622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """Returns true if the logs was created after cutoff days.
3722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
3822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    @param time_str: A string representing the time.
3922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        eg: 2014-02-14-18.10.56
4022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    @param time_format: A string representing the format of the time string.
4122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        ref: http://docs.python.org/2/library/datetime.html#strftime-strptime-behavior
4222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    @param cutoff_days: Int representind the cutoff in days.
4322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
4422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    @return: Returns True if time_str has aged more than cutoff_days.
4522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """
4622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    log_time = datetime.datetime.strptime(time_str, time_format)
4722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    now = datetime.datetime.strptime(time.strftime(time_format), time_format)
4822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    cutoff = now - datetime.timedelta(days=cutoff_days)
4922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    return log_time < cutoff
5022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
5122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
5222243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bdef apply_regex(regex, line):
5322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """Simple regex applicator.
5422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
5522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    @param regex: Regex to apply.
5622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    @param line: The line to apply regex on.
5722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
5822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    @return: A tuple with the matching groups, if there was a match.
5922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """
6022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    log_match  = re.match(regex, line)
6122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    if log_match:
6222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        return log_match.groups()
6322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
6422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
6522243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass StateMachineParser(object):
6622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """Abstract class that enforces state transition ordering.
6722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
6822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    Classes inheriting from StateMachineParser need to define an
6922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    expected_transitions dictionary. The SMP will pop 'to' states
7022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    from the dictionary as they occur, so you cannot same state transitions
7122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    unless you specify 2 of them.
7222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """
7322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    __metaclass__ = abc.ABCMeta
7422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
7522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
7622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    @abc.abstractmethod
7722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def __init__(self):
7822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        self.visited_states = []
7922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        self.expected_transitions = {}
8022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
8122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
8222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def advance_state(self, from_state, to_state):
8322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """Checks that a transition is valid.
8422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
8522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        @param from_state: A string representind the state the host is leaving.
8622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        @param to_state: The state The host is going to, represented as a string.
8722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
8822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        @raises LogLineException: If an invalid state transition was
8922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            detected.
9022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """
9122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        # TODO: Updating to the same state is a waste of bw.
9222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        if from_state and from_state == to_state:
9322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            return ('Updating to the same state is a waste of BW: %s->%s' %
9422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                    (from_state, to_state))
9522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            return
9622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
9722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        if (from_state in self.expected_transitions and
9822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            to_state in self.expected_transitions[from_state]):
9922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.expected_transitions[from_state].remove(to_state)
10022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.visited_states.append(to_state)
10122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        else:
10222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            return (from_state, to_state)
10322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
10422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
10522243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass SingleJobHostSMP(StateMachineParser):
10622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def __init__(self):
10722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        self.visited_states = []
10822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        self.expected_transitions = {
10922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'Ready': ['Resetting', 'Verifying', 'Pending', 'Provisioning'],
11022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'Resetting': ['Ready', 'Provisioning'],
11122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'Pending': ['Running'],
11222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'Provisioning': ['Repairing'],
11322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'Running': ['Ready']
11422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        }
11522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
11622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
11722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def check_transitions(self, hostline):
11822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        if hostline.line_info['field'] == 'status':
11922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.advance_state(hostline.line_info['state'],
12022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                    hostline.line_info['value'])
12122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
12222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
12322243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass SingleJobHqeSMP(StateMachineParser):
12422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def __init__(self):
12522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        self.visited_states = []
12622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        self.expected_transitions = {
12722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'Queued': ['Starting', 'Resetting', 'Aborted'],
12822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'Resetting': ['Pending', 'Provisioning'],
12922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'Provisioning': ['Pending', 'Queued', 'Repairing'],
13022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'Pending': ['Starting'],
13122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'Starting': ['Running'],
13222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'Running': ['Gathering', 'Parsing'],
13322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'Gathering': ['Parsing'],
13422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'Parsing': ['Completed', 'Aborted']
13522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        }
13622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
13722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
13822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def check_transitions(self, hqeline):
13922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        invalid_states = self.advance_state(
14022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                hqeline.line_info['from_state'], hqeline.line_info['to_state'])
14122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        if not invalid_states:
14222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            return
14322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
14422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        # Deal with repair.
14522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        if (invalid_states[0] == 'Queued' and
14622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            'Running' in self.visited_states):
14722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            raise StateMachineViolation('Unrecognized state transition '
14822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                    '%s->%s, expected transitions are %s' %
14922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                    (invalid_states[0], invalid_states[1],
15022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                     self.expected_transitions))
15122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
15222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
15322243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass LogLine(object):
15422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """Line objects.
15522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
15622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    All classes inheriting from LogLine represent a line of some sort.
15722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    A line is responsible for parsing itself, and invoking an SMP to
15822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    validate state transitions. A line can be part of several state machines.
15922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """
16022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    line_format = '%s'
16122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
16222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
16322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def __init__(self, state_machine_parsers):
16422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """
16522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        @param state_machine_parsers: A list of smp objects to use to validate
16622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            state changes on these types of lines..
16722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """
16822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        self.smps = state_machine_parsers
16922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
17022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        # Because, this is easier to flush.
17122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        self.line_info = {}
17222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
17322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
17422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def parse_line(self, line):
17522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """Apply a line regex and save any information the parsed line contains.
17622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
17722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        @param line: A string representing a line.
17822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """
17922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        # Regex for all the things.
18022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        line_rgx = '(.*)'
18122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        parsed_line = apply_regex(line_rgx, line)
18222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        if parsed_line:
18322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.line_info['line'] = parsed_line[0]
18422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
18522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
18622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def flush(self):
18722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """Call any state machine parsers, persist line info if needed.
18822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """
18922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        for smp in self.smps:
19022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            smp.check_transitions(self)
19122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        # TODO: persist this?
19222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        self.line_info={}
19322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
19422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
19522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def format_line(self):
19622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        try:
19722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            return self.line_format % self.line_info
19822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        except KeyError:
19922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            return self.line_info['line']
20022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
20122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
20222243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass TimeLine(LogLine):
20322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """Filters timestamps for scheduler logs.
20422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """
20522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
20622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def parse_line(self, line):
20722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        super(TimeLine, self).parse_line(line)
20822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
20922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        # Regex for isolating the date and time from scheduler logs, eg:
21022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        # 02/16 16:04:36.573 INFO |scheduler_:0574|...
21122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        line_rgx = '([0-9,/,:,., ]+)(.*)'
21222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        parsed_line = apply_regex(line_rgx, self.line_info['line'])
21322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        if parsed_line:
21422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.line_info['time'] = parsed_line[0]
21522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.line_info['line'] = parsed_line[1]
21622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
21722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
21822243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass HostLine(TimeLine):
21922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """Manages hosts line parsing.
22022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """
22122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    line_format = (' \t\t %(time)s %(host)s, currently in %(state)s, '
22222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'updated %(field)s->%(value)s')
22322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
22422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
22522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def record_state_transition(self, line):
22622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """Apply the state_transition_rgx to a line and record state changes.
22722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
22822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        @param line: The line we're expecting to contain a state transition.
22922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """
23022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        state_transition_rgx = ".* ([a-zA-Z]+) updating {'([a-zA-Z]+)': ('[a-zA-Z]+'|[0-9])}.*"
23122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        match = apply_regex(state_transition_rgx, line)
23222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        if match:
23322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.line_info['state'] = match[0]
23422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.line_info['field'] = match[1]
23522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.line_info['value'] = match[2].replace("'", "")
23622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
23722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
23822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def parse_line(self, line):
23922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        super(HostLine, self).parse_line(line)
24022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
24122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        # Regex for getting host status. Eg:
24222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        # 172.22.4 in Running updating {'status': 'Running'}
24322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        line_rgx = '.*Host (([0-9,.,a-z,-]+).*)'
24422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        parsed_line = apply_regex(line_rgx, self.line_info['line'])
24522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        if parsed_line:
24622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.line_info['line'] = parsed_line[0]
24722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.line_info['host'] = parsed_line[1]
24822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.record_state_transition(self.line_info['line'])
24922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            return self.format_line()
25022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
25122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
25222243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass HQELine(TimeLine):
25322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """Manages HQE line parsing.
25422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """
25522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    line_format = ('%(time)s %(hqe)s, currently in %(from_state)s, '
25622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            'updated to %(to_state)s. Flags: %(flags)s')
25722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
25822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
25922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def record_state_transition(self, line):
26022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """Apply the state_transition_rgx to a line and record state changes.
26122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
26222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        @param line: The line we're expecting to contain a state transition.
26322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """
26422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        # Regex for getting hqe status. Eg:
26522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        # status:Running [active] -> Gathering
26622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        state_transition_rgx = ".*status:([a-zA-Z]+)( \[[a-z\,]+\])? -> ([a-zA-Z]+)"
26722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        match = apply_regex(state_transition_rgx, line)
26822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        if match:
26922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.line_info['from_state'] = match[0]
27022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.line_info['flags'] = match[1]
27122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.line_info['to_state'] = match[2]
27222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
27322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
27422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def parse_line(self, line):
27522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        super(HQELine, self).parse_line(line)
27622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        line_rgx = r'.*\| HQE: (([0-9]+).*)'
27722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        parsed_line = apply_regex(line_rgx, self.line_info['line'])
27822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        if parsed_line:
27922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.line_info['line'] = parsed_line[0]
28022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.line_info['hqe'] = parsed_line[1]
28122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            self.record_state_transition(self.line_info['line'])
28222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            return self.format_line()
28322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
28422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
28522243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass LogCrawler(object):
28622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """Crawl logs.
28722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
28822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    Log crawlers are meant to apply some basic preprocessing to a log, and crawl
28922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    the output validating state changes. They manage line and state machine
29022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    creation. The initial filtering applied to the log needs to be grab all lines
29122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    that match an action, such as the running of a job.
29222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """
29322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
29422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def __init__(self, log_name):
29522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        self.log = log_name
29622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        self.filter_command = 'cat %s' % log_name
29722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
29822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
29922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def preprocess_log(self):
30022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """Apply some basic filtering to the log.
30122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """
30222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        proc = subprocess.Popen(self.filter_command,
30322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                shell=True, stdout=subprocess.PIPE)
30422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        out, err = proc.communicate()
30522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        return out
30622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
30722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
30822243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bclass SchedulerLogCrawler(LogCrawler):
30922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """A log crawler for the scheduler logs.
31022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
31122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    This crawler is only capable of processing information about a single job.
31222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    """
31322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
31422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def __init__(self, log_name, **kwargs):
31522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        super(SchedulerLogCrawler, self).__init__(log_name)
31622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        self.job_id = kwargs['job_id']
31722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        self.line_processors = [HostLine([SingleJobHostSMP()]),
31822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                HQELine([SingleJobHqeSMP()])]
31922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        self.filter_command = ('%s | grep "for job: %s"' %
32022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                (self.filter_command, self.job_id))
32122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
32222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
32322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    def parse_log(self):
32422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """Parse each line of the preprocessed log output.
32522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
32622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        Pass each line through each possible line_processor. The one that matches
32722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        will populate itself, call flush, this will walk the state machine of that
32822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        line to the next step.
32922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        """
33022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        out = self.preprocess_log()
33122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        response = []
33222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        for job_line in out.split('\n'):
33322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            parsed_line = None
33422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            for processor in self.line_processors:
33522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                line = processor.parse_line(job_line)
33622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                if line and parsed_line:
33722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                    raise LogLineException('Multiple Parsers claiming the line %s: '
33822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                            'previous parsing: %s, current parsing: %s ' %
33922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                            (job_line, parsed_line, line))
34022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                elif line:
34122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                    parsed_line = line
34222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                    try:
34322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                        processor.flush()
34422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                    except StateMachineViolation as e:
34522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                        response.append(str(e))
34622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                        raise StateMachineViolation(response)
34722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            response.append(parsed_line if parsed_line else job_line)
34822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        return response
34922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
35022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
35122243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bdef process_logs():
35222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    if len(sys.argv) < 2:
35322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        print ('Usage: ./cron_scripts/log_distiller.py 0 8415620 '
35422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B               'You need to change the location of the log it will parse.'
35522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B                'The job_id needs to be in the afe database.')
35622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        sys.exit(1)
35722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
35822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    job_id = int(sys.argv[1])
35922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    rpc = frontend.AFE()
36022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    suite_jobs = rpc.run('get_jobs', id=job_id)
36122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    if not suite_jobs[0]['parent_job']:
36222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        suite_jobs = rpc.run('get_jobs', parent_job=job_id)
36322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    try:
36422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        logfile = sys.argv[2]
36522243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    except Exception:
36622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        logfile = LOGFILE
36722243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
36822243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    for job in suite_jobs:
36922243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        log_crawler = SchedulerLogCrawler(logfile, job_id=job['id'])
37022243eb016c251c72ac7f9002e75450d88e063d2Prashanth B        for line in log_crawler.parse_log():
37122243eb016c251c72ac7f9002e75450d88e063d2Prashanth B            print line
37222243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    return
37322243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
37422243eb016c251c72ac7f9002e75450d88e063d2Prashanth B
37522243eb016c251c72ac7f9002e75450d88e063d2Prashanth Bif __name__ == '__main__':
37622243eb016c251c72ac7f9002e75450d88e063d2Prashanth B    process_logs()
377