1# SPDX-License-Identifier: Apache-2.0
2#
3# Copyright (C) 2016, ARM Limited and contributors.
4#
5# Licensed under the Apache License, Version 2.0 (the "License"); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18from bart.sched.SchedMultiAssert import SchedMultiAssert
19from env import TestEnv
20import json
21import time
22import trappy
23import unittest
24import os
25from wlgen import Periodic, RTA
26
27# Read the config file and update the globals
28CONF_FILE = os.path.join(
29    os.path.dirname(
30        os.path.abspath(__file__)),
31    "capacity_capping.config")
32
33with open(CONF_FILE, "r") as fh:
34    CONF_VARS = json.load(fh)
35    globals().update(CONF_VARS)
36
37class CapacityCappingTest(unittest.TestCase):
38    """
39    Goal
40    ====
41
42    Verify that dynamic CPU capacity capping works in the system.
43
44    Detailed Description
45    ====================
46
47    The maximum frequency of a core can be restricted to a lower value
48    than its absolute maximum frequency.  This may happen because of
49    thermal management or as a request from userspace via sysfs.
50    Dynamic CPU capacity capping provides PELT and the scheduler CPU
51    capacity management with a maximum frequency scaling corrector
52    which describes the influence of running a CPU with a current
53    maximum frequency lower than the absolute maximum frequency.
54
55    The test creates as many busy threads as there are big cpus.
56    These busy threads have high load and should run in the CPUs with
57    highest capacity.  The test has three phases of equal length.  In
58    the first phase, the system runs unconstrained.  In the second
59    phase, the maximum frequency of the big cpus is limited to the
60    lowest frequency that the big frequency domain can run at.
61    Finally, in the third phase, the maximum frequency of the big cpus
62    is restored to its absolute maximum, i.e. the system is
63    unconstrained again.
64
65    This test assumes that the lowest OPPs of the big cpus have less
66    capacity than the highest OPP of the little cpus.  If that is not
67    the case, this test will fail.  Arguably, capacity capping is not
68    needed in such a system.
69
70    Expected Behaviour
71    ==================
72
73    The threads have high load, so they should always run in the CPUs
74    with the highest capacity of the system.  In the first phase the
75    system is unconstrained, so they should run on the big CPUs.  In
76    the second phase, the big cluster's maximum frequency is limited
77    and the little CPUs have higher capacity.  Therefore, in the
78    second phase of the test, the threads should migrate to the little
79    cpus.  In the third phase the maximum frequency of the big cpus is
80    restored, so they become again the CPUs with the higest capacity
81    in the system.  The busy threads must run on the big cpus in the
82    third phase.
83
84    """
85
86    @classmethod
87    def setUpClass(cls):
88        cls.params = {}
89        cls.env = TestEnv(test_conf=TEST_CONF)
90        cls.trace_file = os.path.join(cls.env.res_dir, "cap_cap.dat")
91        cls.populate_params()
92
93        cls.run_workload()
94
95        trace = trappy.FTrace(cls.trace_file)
96        cls.sa = SchedMultiAssert(trace, cls.env.topology,
97                                  execnames=cls.params.keys())
98        times = cls.sa.getStartTime()
99        cls.wload_start_time = min(t["starttime"] for t in times.itervalues())
100
101    @classmethod
102    def populate_params(cls):
103        for idx in range(len(cls.env.target.bl.bigs)):
104            task_name = "busy_thread{}".format(idx)
105            cls.params[task_name] = Periodic(
106                duty_cycle_pct=BIG_DCYCLE,
107                duration_s=WORKLOAD_DURATION_S,
108                period_ms=WORKLOAD_PERIOD_MS,
109            ).get()
110
111    @classmethod
112    def run_workload(cls):
113        big_cpu = cls.env.target.bl.bigs[0]
114        big_cpufreq = "/sys/devices/system/cpu/cpu{}/cpufreq".format(big_cpu)
115        max_freq_path = os.path.join(big_cpufreq, "scaling_max_freq")
116        available_freqs_path = os.path.join(big_cpufreq,
117                                            "scaling_available_frequencies")
118
119        available_freqs_str = cls.env.target.read_value(available_freqs_path)
120        available_freqs = available_freqs_str.split()
121        min_frequency = available_freqs[0]
122        max_frequency = available_freqs[-1]
123
124        wload = RTA(cls.env.target, "busy_threads",
125                    calibration=cls.env.calibration())
126        wload.conf(kind="profile", params=cls.params)
127        phase_duration = WORKLOAD_DURATION_S / 3.
128
129        cls.env.ftrace.start()
130
131        wload.run(out_dir=cls.env.res_dir, background=True)
132        time.sleep(phase_duration)
133
134        # Writing values on the target can take a non-negligible amount of time.
135        # To prevent this from shifting the transitions between
136        # constrained/unconstrained phases, measure this write latency and
137        # reduce our sleep time by that amount.
138        def write_and_sleep(max_freq):
139            time_before = time.time()
140            cls.env.target.write_value(max_freq_path, max_freq)
141            write_latency = time.time() - time_before
142            if (write_latency > phase_duration):
143                raise ValueError(
144                    "Latency of Target.write_value greater than phase duration! "
145                    "Increase WORKLOAD_DURATION_S or speed up target connection")
146            time.sleep(phase_duration - write_latency)
147
148        write_and_sleep(min_frequency)
149        write_and_sleep(max_frequency)
150
151        cls.env.ftrace.stop()
152        cls.env.ftrace.get_trace(cls.trace_file)
153
154    def check_residencies(self, cpus, cpus_name, window, phase_description):
155        """Helper function to check the residencies of all busy threads on a
156        given set of cpus for a period of time."""
157
158        residency_dict = self.sa.getResidency("cluster", cpus, window=window,
159                                              percent=True)
160
161        for pid, task_res in residency_dict.iteritems():
162            msg = "Pid {} ran in {} cpus only {:.2f}% percent of the time when the system was {} (expected {:.2f}%)" \
163                .format(pid, cpus_name, task_res["residency"],
164                        phase_description, EXPECTED_BUSY_TIME_PCT)
165
166            self.assertGreater(task_res["residency"], EXPECTED_BUSY_TIME_PCT,
167                               msg)
168
169    def test_tasks_starts_on_big(self):
170        """All busy threads run in the beginning in big cpus"""
171
172        phase_duration = WORKLOAD_DURATION_S / 3.
173        unconstrained_window = (self.wload_start_time,
174                                self.wload_start_time + phase_duration)
175        self.check_residencies(self.env.target.bl.bigs, "big",
176                               unconstrained_window, "unconstrained")
177
178    def test_task_migrates_to_little_when_constrained(self):
179        """Busy threads migrate to little in the thermally constrained phase"""
180
181        phase_duration = WORKLOAD_DURATION_S / 3.
182        mig_start = self.wload_start_time + phase_duration
183        mig_end = mig_start + MIGRATION_WINDOW
184        num_tasks = len(self.params)
185
186        msg = "One or more of the busy threads didn't migrate to a little cpu between {} and {}" \
187              .format(mig_start, mig_end)
188        self.assertTrue(self.sa.assertSwitch("cluster", self.env.target.bl.bigs,
189                                             self.env.target.bl.littles,
190                                             window=(mig_start, mig_end),
191                                             rank=num_tasks),
192                        msg=msg)
193
194        # The tasks must have migrated by the end of the
195        # migration_window and they should not move until the end of
196        # the phase.
197        constrained_window = (mig_end,
198                              self.wload_start_time + (2 * phase_duration))
199        self.check_residencies(self.env.target.bl.littles, "little",
200                               constrained_window, "thermally constrained")
201
202    def test_task_returns_to_big_when_back_to_unconstrained(self):
203        """Busy threads return to big when system goes back to unconstrained
204
205        In the last phase, when the frequency capping is released, busy threads
206        return to the big cpus"""
207
208        phase_duration = WORKLOAD_DURATION_S / 3.
209        mig_start = self.wload_start_time + 2 * phase_duration
210        mig_end = mig_start + MIGRATION_WINDOW
211        num_tasks = len(self.params)
212
213        msg = "One of the busy threads didn't return to a big cpu"
214        self.assertTrue(self.sa.assertSwitch("cluster",
215                                             self.env.target.bl.littles,
216                                             self.env.target.bl.bigs,
217                                             window=(mig_start, mig_end),
218                                             rank=num_tasks),
219                        msg=msg)
220
221        # The tasks must have migrated by the end of the
222        # migration_window and they should continue to run on bigs
223        # until the end of the run.
224        last_phase = (mig_end, self.wload_start_time + WORKLOAD_DURATION_S)
225        self.check_residencies(self.env.target.bl.bigs, "big",
226                               last_phase, "unconstrained")
227