1/*
2** Copyright 2007, The Android Open Source Project
3**
4** Licensed under the Apache License, Version 2.0 (the "License");
5** you may not use this file except in compliance with the License.
6** You may obtain a copy of the License at
7**
8**     http://www.apache.org/licenses/LICENSE-2.0
9**
10** Unless required by applicable law or agreed to in writing, software
11** distributed under the License is distributed on an "AS IS" BASIS,
12** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13** See the License for the specific language governing permissions and
14** limitations under the License.
15*/
16
17#define LOG_TAG "SchedPolicy"
18
19#include <errno.h>
20#include <fcntl.h>
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24#include <unistd.h>
25
26#include <log/log.h>
27#include <cutils/sched_policy.h>
28
29#define UNUSED __attribute__((__unused__))
30
31/* Re-map SP_DEFAULT to the system default policy, and leave other values unchanged.
32 * Call this any place a SchedPolicy is used as an input parameter.
33 * Returns the possibly re-mapped policy.
34 */
35static inline SchedPolicy _policy(SchedPolicy p)
36{
37   return p == SP_DEFAULT ? SP_SYSTEM_DEFAULT : p;
38}
39
40#if defined(__ANDROID__)
41
42#include <pthread.h>
43#include <sched.h>
44#include <sys/prctl.h>
45
46#define POLICY_DEBUG 0
47
48// timer slack value in nS enforced when the thread moves to background
49#define TIMER_SLACK_BG 40000000
50#define TIMER_SLACK_FG 50000
51
52static pthread_once_t the_once = PTHREAD_ONCE_INIT;
53
54static int __sys_supports_timerslack = -1;
55
56// File descriptors open to /dev/cpuset/../tasks, setup by initialize, or -1 on error
57static int system_bg_cpuset_fd = -1;
58static int bg_cpuset_fd = -1;
59static int fg_cpuset_fd = -1;
60static int ta_cpuset_fd = -1; // special cpuset for top app
61
62// File descriptors open to /dev/stune/../tasks, setup by initialize, or -1 on error
63static int bg_schedboost_fd = -1;
64static int fg_schedboost_fd = -1;
65static int ta_schedboost_fd = -1;
66static int rt_schedboost_fd = -1;
67
68/* Add tid to the scheduling group defined by the policy */
69static int add_tid_to_cgroup(int tid, int fd)
70{
71    if (fd < 0) {
72        SLOGE("add_tid_to_cgroup failed; fd=%d\n", fd);
73        errno = EINVAL;
74        return -1;
75    }
76
77    // specialized itoa -- works for tid > 0
78    char text[22];
79    char *end = text + sizeof(text) - 1;
80    char *ptr = end;
81    *ptr = '\0';
82    while (tid > 0) {
83        *--ptr = '0' + (tid % 10);
84        tid = tid / 10;
85    }
86
87    if (write(fd, ptr, end - ptr) < 0) {
88        /*
89         * If the thread is in the process of exiting,
90         * don't flag an error
91         */
92        if (errno == ESRCH)
93                return 0;
94        SLOGW("add_tid_to_cgroup failed to write '%s' (%s); fd=%d\n",
95              ptr, strerror(errno), fd);
96        errno = EINVAL;
97        return -1;
98    }
99
100    return 0;
101}
102
103/*
104    If CONFIG_CPUSETS for Linux kernel is set, "tasks" can be found under
105    /dev/cpuset mounted in init.rc; otherwise, that file does not exist
106    even though the directory, /dev/cpuset, is still created (by init.rc).
107
108    A couple of other candidates (under cpuset mount directory):
109        notify_on_release
110        release_agent
111
112    Yet another way to decide if cpuset is enabled is to parse
113    /proc/self/status and search for lines begin with "Mems_allowed".
114
115    If CONFIG_PROC_PID_CPUSET is set, the existence "/proc/self/cpuset" can
116    be used to decide if CONFIG_CPUSETS is set, so we don't have a dependency
117    on where init.rc mounts cpuset. That's why we'd better require this
118    configuration be set if CONFIG_CPUSETS is set.
119
120    In older releases, this was controlled by build-time configuration.
121 */
122bool cpusets_enabled() {
123    static bool enabled = (access("/dev/cpuset/tasks", F_OK) == 0);
124
125    return enabled;
126}
127
128/*
129    Similar to CONFIG_CPUSETS above, but with a different configuration
130    CONFIG_CGROUP_SCHEDTUNE that's in Android common Linux kernel and Linaro
131    Stable Kernel (LSK), but not in mainline Linux as of v4.9.
132
133    In older releases, this was controlled by build-time configuration.
134 */
135bool schedboost_enabled() {
136    static bool enabled = (access("/dev/stune/tasks", F_OK) == 0);
137
138    return enabled;
139}
140
141static void __initialize() {
142    const char* filename;
143
144    if (cpusets_enabled()) {
145        if (!access("/dev/cpuset/tasks", W_OK)) {
146
147            filename = "/dev/cpuset/foreground/tasks";
148            fg_cpuset_fd = open(filename, O_WRONLY | O_CLOEXEC);
149            filename = "/dev/cpuset/background/tasks";
150            bg_cpuset_fd = open(filename, O_WRONLY | O_CLOEXEC);
151            filename = "/dev/cpuset/system-background/tasks";
152            system_bg_cpuset_fd = open(filename, O_WRONLY | O_CLOEXEC);
153            filename = "/dev/cpuset/top-app/tasks";
154            ta_cpuset_fd = open(filename, O_WRONLY | O_CLOEXEC);
155
156            if (schedboost_enabled()) {
157                filename = "/dev/stune/top-app/tasks";
158                ta_schedboost_fd = open(filename, O_WRONLY | O_CLOEXEC);
159                filename = "/dev/stune/foreground/tasks";
160                fg_schedboost_fd = open(filename, O_WRONLY | O_CLOEXEC);
161                filename = "/dev/stune/background/tasks";
162                bg_schedboost_fd = open(filename, O_WRONLY | O_CLOEXEC);
163                filename = "/dev/stune/rt/tasks";
164                rt_schedboost_fd = open(filename, O_WRONLY | O_CLOEXEC);
165            }
166        }
167    }
168
169    char buf[64];
170    snprintf(buf, sizeof(buf), "/proc/%d/timerslack_ns", getpid());
171    __sys_supports_timerslack = !access(buf, W_OK);
172}
173
174/*
175 * Returns the path under the requested cgroup subsystem (if it exists)
176 *
177 * The data from /proc/<pid>/cgroup looks (something) like:
178 *  2:cpu:/bg_non_interactive
179 *  1:cpuacct:/
180 *
181 * We return the part after the "/", which will be an empty string for
182 * the default cgroup.  If the string is longer than "bufLen", the string
183 * will be truncated.
184 */
185static int getCGroupSubsys(int tid, const char* subsys, char* buf, size_t bufLen)
186{
187#if defined(__ANDROID__)
188    char pathBuf[32];
189    char lineBuf[256];
190    FILE *fp;
191
192    snprintf(pathBuf, sizeof(pathBuf), "/proc/%d/cgroup", tid);
193    if (!(fp = fopen(pathBuf, "re"))) {
194        return -1;
195    }
196
197    while(fgets(lineBuf, sizeof(lineBuf) -1, fp)) {
198        char *next = lineBuf;
199        char *found_subsys;
200        char *grp;
201        size_t len;
202
203        /* Junk the first field */
204        if (!strsep(&next, ":")) {
205            goto out_bad_data;
206        }
207
208        if (!(found_subsys = strsep(&next, ":"))) {
209            goto out_bad_data;
210        }
211
212        if (strcmp(found_subsys, subsys)) {
213            /* Not the subsys we're looking for */
214            continue;
215        }
216
217        if (!(grp = strsep(&next, ":"))) {
218            goto out_bad_data;
219        }
220        grp++; /* Drop the leading '/' */
221        len = strlen(grp);
222        grp[len-1] = '\0'; /* Drop the trailing '\n' */
223
224        if (bufLen <= len) {
225            len = bufLen - 1;
226        }
227        strncpy(buf, grp, len);
228        buf[len] = '\0';
229        fclose(fp);
230        return 0;
231    }
232
233    SLOGE("Failed to find subsys %s", subsys);
234    fclose(fp);
235    return -1;
236 out_bad_data:
237    SLOGE("Bad cgroup data {%s}", lineBuf);
238    fclose(fp);
239    return -1;
240#else
241    errno = ENOSYS;
242    return -1;
243#endif
244}
245
246int get_sched_policy(int tid, SchedPolicy *policy)
247{
248    if (tid == 0) {
249        tid = gettid();
250    }
251    pthread_once(&the_once, __initialize);
252
253    char grpBuf[32];
254
255    grpBuf[0] = '\0';
256    if (schedboost_enabled()) {
257        if (getCGroupSubsys(tid, "schedtune", grpBuf, sizeof(grpBuf)) < 0) return -1;
258    }
259    if ((grpBuf[0] == '\0') && cpusets_enabled()) {
260        if (getCGroupSubsys(tid, "cpuset", grpBuf, sizeof(grpBuf)) < 0) return -1;
261    }
262    if (grpBuf[0] == '\0') {
263        *policy = SP_FOREGROUND;
264    } else if (!strcmp(grpBuf, "foreground")) {
265        *policy = SP_FOREGROUND;
266    } else if (!strcmp(grpBuf, "system-background")) {
267        *policy = SP_SYSTEM;
268    } else if (!strcmp(grpBuf, "background")) {
269        *policy = SP_BACKGROUND;
270    } else if (!strcmp(grpBuf, "top-app")) {
271        *policy = SP_TOP_APP;
272    } else {
273        errno = ERANGE;
274        return -1;
275    }
276    return 0;
277}
278
279int set_cpuset_policy(int tid, SchedPolicy policy)
280{
281    // in the absence of cpusets, use the old sched policy
282    if (!cpusets_enabled()) {
283        return set_sched_policy(tid, policy);
284    }
285
286    if (tid == 0) {
287        tid = gettid();
288    }
289    policy = _policy(policy);
290    pthread_once(&the_once, __initialize);
291
292    int fd = -1;
293    int boost_fd = -1;
294    switch (policy) {
295    case SP_BACKGROUND:
296        fd = bg_cpuset_fd;
297        boost_fd = bg_schedboost_fd;
298        break;
299    case SP_FOREGROUND:
300    case SP_AUDIO_APP:
301    case SP_AUDIO_SYS:
302        fd = fg_cpuset_fd;
303        boost_fd = fg_schedboost_fd;
304        break;
305    case SP_TOP_APP :
306        fd = ta_cpuset_fd;
307        boost_fd = ta_schedboost_fd;
308        break;
309    case SP_SYSTEM:
310        fd = system_bg_cpuset_fd;
311        break;
312    default:
313        boost_fd = fd = -1;
314        break;
315    }
316
317    if (add_tid_to_cgroup(tid, fd) != 0) {
318        if (errno != ESRCH && errno != ENOENT)
319            return -errno;
320    }
321
322    if (schedboost_enabled()) {
323        if (boost_fd > 0 && add_tid_to_cgroup(tid, boost_fd) != 0) {
324            if (errno != ESRCH && errno != ENOENT)
325                return -errno;
326        }
327    }
328
329    return 0;
330}
331
332static void set_timerslack_ns(int tid, unsigned long slack) {
333    // v4.6+ kernels support the /proc/<tid>/timerslack_ns interface.
334    // TODO: once we've backported this, log if the open(2) fails.
335    if (__sys_supports_timerslack) {
336        char buf[64];
337        snprintf(buf, sizeof(buf), "/proc/%d/timerslack_ns", tid);
338        int fd = open(buf, O_WRONLY | O_CLOEXEC);
339        if (fd != -1) {
340            int len = snprintf(buf, sizeof(buf), "%lu", slack);
341            if (write(fd, buf, len) != len) {
342                SLOGE("set_timerslack_ns write failed: %s\n", strerror(errno));
343            }
344            close(fd);
345            return;
346        }
347    }
348
349    // TODO: Remove when /proc/<tid>/timerslack_ns interface is backported.
350    if ((tid == 0) || (tid == gettid())) {
351        if (prctl(PR_SET_TIMERSLACK, slack) == -1) {
352            SLOGE("set_timerslack_ns prctl failed: %s\n", strerror(errno));
353        }
354    }
355}
356
357int set_sched_policy(int tid, SchedPolicy policy)
358{
359    if (tid == 0) {
360        tid = gettid();
361    }
362    policy = _policy(policy);
363    pthread_once(&the_once, __initialize);
364
365#if POLICY_DEBUG
366    char statfile[64];
367    char statline[1024];
368    char thread_name[255];
369
370    snprintf(statfile, sizeof(statfile), "/proc/%d/stat", tid);
371    memset(thread_name, 0, sizeof(thread_name));
372
373    int fd = open(statfile, O_RDONLY | O_CLOEXEC);
374    if (fd >= 0) {
375        int rc = read(fd, statline, 1023);
376        close(fd);
377        statline[rc] = 0;
378        char *p = statline;
379        char *q;
380
381        for (p = statline; *p != '('; p++);
382        p++;
383        for (q = p; *q != ')'; q++);
384
385        strncpy(thread_name, p, (q-p));
386    }
387    switch (policy) {
388    case SP_BACKGROUND:
389        SLOGD("vvv tid %d (%s)", tid, thread_name);
390        break;
391    case SP_FOREGROUND:
392    case SP_AUDIO_APP:
393    case SP_AUDIO_SYS:
394    case SP_TOP_APP:
395        SLOGD("^^^ tid %d (%s)", tid, thread_name);
396        break;
397    case SP_SYSTEM:
398        SLOGD("/// tid %d (%s)", tid, thread_name);
399        break;
400    case SP_RT_APP:
401	SLOGD("RT  tid %d (%s)", tid, thread_name);
402	break;
403    default:
404        SLOGD("??? tid %d (%s)", tid, thread_name);
405        break;
406    }
407#endif
408
409    if (schedboost_enabled()) {
410        int boost_fd = -1;
411        switch (policy) {
412        case SP_BACKGROUND:
413            boost_fd = bg_schedboost_fd;
414            break;
415        case SP_FOREGROUND:
416        case SP_AUDIO_APP:
417        case SP_AUDIO_SYS:
418            boost_fd = fg_schedboost_fd;
419            break;
420        case SP_TOP_APP:
421            boost_fd = ta_schedboost_fd;
422            break;
423        case SP_RT_APP:
424	    boost_fd = rt_schedboost_fd;
425	    break;
426        default:
427            boost_fd = -1;
428            break;
429        }
430
431        if (boost_fd > 0 && add_tid_to_cgroup(tid, boost_fd) != 0) {
432            if (errno != ESRCH && errno != ENOENT)
433                return -errno;
434        }
435
436    }
437
438    set_timerslack_ns(tid, policy == SP_BACKGROUND ? TIMER_SLACK_BG : TIMER_SLACK_FG);
439
440    return 0;
441}
442
443#else
444
445/* Stubs for non-Android targets. */
446
447int set_sched_policy(int tid UNUSED, SchedPolicy policy UNUSED)
448{
449    return 0;
450}
451
452int get_sched_policy(int tid UNUSED, SchedPolicy *policy)
453{
454    *policy = SP_SYSTEM_DEFAULT;
455    return 0;
456}
457
458#endif
459
460const char *get_sched_policy_name(SchedPolicy policy)
461{
462    policy = _policy(policy);
463    static const char * const strings[SP_CNT] = {
464       [SP_BACKGROUND] = "bg",
465       [SP_FOREGROUND] = "fg",
466       [SP_SYSTEM]     = "  ",
467       [SP_AUDIO_APP]  = "aa",
468       [SP_AUDIO_SYS]  = "as",
469       [SP_TOP_APP]    = "ta",
470       [SP_RT_APP]    = "rt",
471    };
472    if ((policy < SP_CNT) && (strings[policy] != NULL))
473        return strings[policy];
474    else
475        return "error";
476}
477