1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging
6import re
7import time
8import xmlrpclib
9
10from autotest_lib.client.common_lib import error
11from autotest_lib.server.cros.faft.firmware_test import FirmwareTest
12
13class firmware_ECThermal(FirmwareTest):
14    """
15    Servo based EC thermal engine test.
16    """
17    version = 1
18
19    # Delay for waiting fan to start or stop
20    FAN_DELAY = 5
21
22    # Delay for waiting device stressing to stablize
23    STRESS_DELAY = 30
24
25    # Delay for stressing device with fan off to check temperature increase
26    STRESS_DELAY_NO_FAN = 12
27
28    # Margin for comparing servo based and ectool based CPU temperature
29    TEMP_MISMATCH_MARGIN = 3
30
31    # Minimum increase of CPU temperature when stressing DUT
32    TEMP_STRESS_INCREASE = 3
33
34    # Pseudo INT_MAX. Used as infinity when comparing temperature readings
35    INT_MAX = 10000
36
37    # Sensor type ID of ignored sensors
38    SENSOR_TYPE_IGNORED = 255
39
40    # PID of DUT stressing processes
41    _stress_pid = list()
42
43    def enable_auto_fan_control(self):
44        """Enable EC automatic fan speed control"""
45        # We use set_nocheck because servo reports current target
46        # RPM instead 'auto', and therefore servo.set always fails.
47        self.servo.set_nocheck('fan_target_rpm', 'auto')
48
49
50    def max_fan(self):
51        """Maximize fan speed"""
52        # We use set_nocheck because servo reports current target
53        # RPM instead 'max', and therefore servo.set always fails.
54        self.servo.set_nocheck('fan_target_rpm', 'max')
55
56
57    def turn_off_fan(self):
58        """Turn off fan"""
59        self.servo.set('fan_target_rpm', 'off')
60
61
62    def _get_setting_for_type(self, type_id):
63        """
64        Retrieve thermal setting for a given type of sensor
65
66        Args:
67          type_id: The ID of sensor type.
68
69        Returns:
70          A list containing thresholds in the following order:
71            Warning
72            CPU off
73            All power off
74            Fan speed thresholds
75        """
76        setting = list()
77        current_id = 0
78        while True:
79            try:
80                lines = self.faft_client.system.run_shell_command_get_output(
81                        'ectool thermalget %d %d' % (type_id, current_id))
82            except xmlrpclib.Fault:
83                break
84            pattern = re.compile('Threshold \d* [a-z ]* \d* is (\d*) K.')
85            for line in lines:
86                matched = pattern.match(line)
87                if matched is not None:
88                    # Convert degree K to degree C
89                    setting.append(int(matched.group(1)) - 273)
90            current_id = current_id + 1
91
92        if len(setting) == 0:
93            return None
94        return setting
95
96
97    def get_fan_steps(self):
98        """Retrieve fan step config from EC"""
99        num_steps = len(self._thermal_setting[0]) - 3
100        self._fan_steps = list()
101        expected_pat = (["Lowest speed: ([0-9-]+) RPM"] +
102                        ["\d+ K:\s+([0-9-]+) RPM"] * num_steps)
103        match = self.ec.send_command_get_output("thermalfan 0", expected_pat)
104        for m in match:
105            self._fan_steps.append(int(m[1]))
106
107        # Get the actual value of each fan step
108        for i in xrange(num_steps + 1):
109            if self._fan_steps[i] == 0:
110                continue
111            self.servo.set_nocheck('fan_target_rpm', "%d" % self._fan_steps[i])
112            self._fan_steps[i] = int(self.servo.get('fan_target_rpm'))
113
114        logging.info("Actual fan steps: %s", self._fan_steps)
115
116
117    def get_thermal_setting(self):
118        """Retrieve thermal engine setting from EC"""
119        self._thermal_setting = list()
120        type_id = 0
121        while True:
122            setting = self._get_setting_for_type(type_id)
123            if setting is None:
124                break
125            self._thermal_setting.append(setting)
126            type_id = type_id + 1
127        logging.info("Number of tempearture sensor types: %d", type_id)
128
129        # Get the number of temperature sensors
130        self._num_temp_sensor = 0
131        while True:
132            try:
133                self.faft_client.system.run_shell_command('ectool temps %d' %
134                                                   self._num_temp_sensor)
135                self._num_temp_sensor = self._num_temp_sensor + 1
136            except xmlrpclib.Fault:
137                break
138        logging.info("Number of temperature sensor: %d", self._num_temp_sensor)
139
140
141    def initialize(self, host, cmdline_args):
142        super(firmware_ECThermal, self).initialize(host, cmdline_args)
143        self.ec.send_command("chan 0")
144        try:
145            self.faft_client.system.run_shell_command('stop temp_metrics')
146        except xmlrpclib.Fault:
147            self._has_temp_metrics = False
148        else:
149            logging.info('Stopped temp_metrics')
150            self._has_temp_metrics = True
151        if self.check_ec_capability(['thermal']):
152            self.get_thermal_setting()
153            self.get_fan_steps()
154            self.enable_auto_fan_control()
155
156
157    def cleanup(self):
158        if self.check_ec_capability(['thermal']):
159            self.enable_auto_fan_control()
160        if self._has_temp_metrics:
161            logging.info('Starting temp_metrics')
162            self.faft_client.system.run_shell_command('start temp_metrics')
163        self.ec.send_command("chan 0xffffffff")
164        super(firmware_ECThermal, self).cleanup()
165
166
167    def _find_cpu_sensor_id(self):
168        """
169        This function find CPU temperature sensor using ectool.
170
171        Returns:
172          Integer ID of CPU temperature sensor.
173
174        Raises:
175          error.TestFail: Raised if we fail to find PECI temparture through
176            ectool.
177        """
178        for temp_id in range(self._num_temp_sensor):
179            lines = self.faft_client.system.run_shell_command_get_output(
180                    'ectool tempsinfo %d' % temp_id)
181            for line in lines:
182                matched = re.match('Sensor name: (.*)', line)
183                if matched is not None and matched.group(1) == 'PECI':
184                    return temp_id
185        raise error.TestFail('Cannot find CPU temperature sensor ID.')
186
187
188    def _get_temp_reading(self, sensor_id):
189        """
190        Get temperature reading on a sensor through ectool
191
192        Args:
193          sensor_id: Temperature sensor ID.
194
195        Returns:
196          Temperature reading in degree C.
197
198        Raises:
199          xmlrpclib.Fault: Raised when we fail to read temperature.
200          error.TestError: Raised if ectool doesn't behave as we expected.
201        """
202        assert sensor_id < self._num_temp_sensor
203        pattern = re.compile('Reading temperature...(\d*)')
204        lines = self.faft_client.system.run_shell_command_get_output(
205                'ectool temps %d' % sensor_id)
206        for line in lines:
207            matched = pattern.match(line)
208            if matched is not None:
209                return int(matched.group(1)) - 273
210        # Should never reach here
211        raise error.TestError("Unexpected error occurred")
212
213
214    def check_temp_report(self):
215        """
216        Checker of temperature reporting.
217
218        This function reads CPU temperature from servo and ectool. If
219        the two readings mismatches by more than TEMP_MISMATCH_MARGIN,'
220        test fails.
221
222        Raises:
223          error.TestFail: Raised when temperature reading mismatches by
224            more than TEMP_MISMATCH_MARGIN.
225        """
226        cpu_temp_id = self._find_cpu_sensor_id()
227        logging.info("CPU temperature sensor ID is %d", cpu_temp_id)
228        ectool_cpu_temp = self._get_temp_reading(cpu_temp_id)
229        servo_cpu_temp = int(self.servo.get('cpu_temp'))
230        logging.info("CPU temperature from servo: %d C", servo_cpu_temp)
231        logging.info("CPU temperature from ectool: %d C", ectool_cpu_temp)
232        if abs(ectool_cpu_temp - servo_cpu_temp) > self.TEMP_MISMATCH_MARGIN:
233            raise error.TestFail(
234                    'CPU temperature readings from servo and ectool differ')
235
236
237    def _stress_dut(self, threads=4):
238        """
239        Stress DUT system.
240
241        By reading from /dev/urandom and writing to /dev/null, we can stress
242        DUT and cause CPU temperature to go up. We stress the system forever,
243        until _stop_stressing is called to kill the stress threads. This
244        function is non-blocking.
245
246        Args:
247          threads: Number of threads (processes) when stressing forever.
248
249        Returns:
250          A list of stress process IDs is returned.
251        """
252        logging.info("Stressing DUT with %d threads...", threads)
253        self.faft_client.system.run_shell_command('pkill dd')
254        stress_cmd = 'dd if=/dev/urandom of=/dev/null bs=1M &'
255        # Grep for [d]d instead of dd to prevent getting the PID of grep
256        # itself.
257        pid_cmd = "ps -ef | grep '[d]d if=/dev/urandom' | awk '{print $2}'"
258        self._stress_pid = list()
259        for _ in xrange(threads):
260            self.faft_client.system.run_shell_command(stress_cmd)
261        lines = self.faft_client.system.run_shell_command_get_output(
262                    pid_cmd)
263        for line in lines:
264            logging.info("PID is %s", line)
265            self._stress_pid.append(int(line.strip()))
266        return self._stress_pid
267
268
269    def _stop_stressing(self):
270        """Stop stressing DUT system"""
271        stop_cmd = 'kill -9 %d'
272        for pid in self._stress_pid:
273            self.faft_client.system.run_shell_command(stop_cmd % pid)
274
275
276    def check_fan_off(self):
277        """
278        Checker of fan turned off.
279
280        The function first delay FAN_DELAY seconds to ensure fan stops.
281        Then it reads fan speed and return False if fan speed is non-zero.
282        Then it stresses the system a bit and check if the temperature
283        goes up by more than TEMP_STRESS_INCREASE.
284
285        Raises:
286          error.TestFail: Raised when temperature doesn't increase by more than
287            TEMP_STRESS_INCREASE.
288        """
289        time.sleep(self.FAN_DELAY)
290        fan_speed = self.servo.get('fan_actual_rpm')
291        if int(fan_speed) != 0:
292            raise error.TestFail("Fan is not turned off.")
293        logging.info("EC reports fan turned off.")
294        cpu_temp_before = int(self.servo.get('cpu_temp'))
295        logging.info("CPU temperature before stressing is %d C",
296                     cpu_temp_before)
297        self._stress_dut()
298        time.sleep(self.STRESS_DELAY_NO_FAN)
299        cpu_temp_after = int(self.servo.get('cpu_temp'))
300        self._stop_stressing()
301        logging.info("CPU temperature after stressing is %d C",
302                     cpu_temp_after)
303        if cpu_temp_after - cpu_temp_before < self.TEMP_STRESS_INCREASE:
304            raise error.TestFail(
305                    "CPU temperature did not go up by more than %d degrees" %
306                    self.TEMP_STRESS_INCREASE)
307
308
309    def _get_temp_sensor_type(self, sensor_id):
310        """
311        Get type of a given temperature sensor
312
313        Args:
314          sensor_id: Temperature sensor ID.
315
316        Returns:
317          Type ID of the temperature sensor.
318
319        Raises:
320          error.TestError: Raised when ectool doesn't behave as we expected.
321        """
322        assert sensor_id < self._num_temp_sensor
323        pattern = re.compile('Sensor type: (\d*)')
324        lines = self.faft_client.system.run_shell_command_get_output(
325                'ectool tempsinfo %d' % sensor_id)
326        for line in lines:
327            matched = pattern.match(line)
328            if matched is not None:
329                return int(matched.group(1))
330        # Should never reach here
331        raise error.TestError("Unexpected error occurred")
332
333
334    def _check_fan_speed_per_sensor(self, fan_speed, sensor_id):
335        """
336        Check if the given fan_speed is reasonable from the view of certain
337        temperature sensor. There could be three types of outcome:
338          1. Fan speed is higher than expected. This may be due to other
339             sensor sensing higher temperature and setting fan to higher
340             speed.
341          2. Fan speed is as expected.
342          3. Fan speed is lower than expected. In this case, EC is not
343             working as expected and an error should be raised.
344
345        Args:
346          fan_speed: The current fan speed in RPM.
347          sensor_id: The ID of temperature sensor.
348
349        Returns:
350          0x00: Fan speed is higher than expected.
351          0x01: Fan speed is as expected.
352          0x10: Fan speed is lower than expected.
353
354        Raises:
355          error.TestError: Raised when getting unexpected fan speed.
356        """
357        sensor_type = self._get_temp_sensor_type(sensor_id)
358        if sensor_type == self.SENSOR_TYPE_IGNORED:
359            # This sensor should be ignored
360            return 0x00
361
362        if self._thermal_setting[sensor_type][-1] == -273:
363            # The fan stepping for this type of sensor is disabled
364            return 0x00
365
366        try:
367            idx = self._fan_steps.index(fan_speed)
368        except:
369            raise error.TestError("Unexpected fan speed: %d" % fan_speed)
370
371        if idx == 0:
372            lower_bound = -self.INT_MAX
373            upper_bound = self._thermal_setting[sensor_type][3]
374        elif idx == len(self._fan_steps) - 1:
375            lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3
376            upper_bound = self.INT_MAX
377        else:
378            lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3
379            upper_bound = self._thermal_setting[sensor_type][idx + 3]
380
381        temp_reading = self._get_temp_reading(sensor_id)
382        logging.info("Sensor %d = %d C", sensor_id, temp_reading)
383        logging.info("  Expecting %d - %d C", lower_bound, upper_bound)
384        if temp_reading > upper_bound:
385            return 0x00
386        elif temp_reading < lower_bound:
387            return 0x10
388        else:
389            return 0x01
390
391
392    def check_auto_fan(self):
393        """
394        Checker of thermal engine automatic fan speed control.
395
396        Stress DUT system for a longer period to make temperature more stable
397        and check if fan speed is controlled as expected.
398
399        Raises:
400          error.TestFail: Raised when fan speed is not as expected.
401        """
402        self._stress_dut()
403        time.sleep(self.STRESS_DELAY)
404        fan_rpm = int(self.servo.get('fan_target_rpm'))
405        logging.info('Fan speed is %d RPM', fan_rpm)
406        try:
407            result = reduce(lambda x, y: x | y,
408                            [self._check_fan_speed_per_sensor(fan_rpm, x)
409                             for x in range(self._num_temp_sensor)])
410        finally:
411            self._stop_stressing()
412        if result == 0x00:
413            raise error.TestFail("Fan speed higher than expected")
414        if result == 0x10:
415            raise error.TestFail("Fan speed lower than expected")
416
417
418    def run_once(self):
419        if not self.check_ec_capability(['thermal']):
420            raise error.TestNAError("Nothing needs to be tested on this device")
421        logging.info("Checking host temperature report.")
422        self.check_temp_report()
423
424        self.turn_off_fan()
425        logging.info("Verifying fan is turned off.")
426        self.check_fan_off()
427
428        self.enable_auto_fan_control()
429        logging.info("Verifying automatic fan control functionality.")
430        self.check_auto_fan()
431