lmkd.c revision 1a0d9be53ec41ce335c5b586b6be2194d01eb23d
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "lowmemorykiller"
18
19#include <arpa/inet.h>
20#include <errno.h>
21#include <signal.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25#include <time.h>
26#include <sys/cdefs.h>
27#include <sys/epoll.h>
28#include <sys/eventfd.h>
29#include <sys/mman.h>
30#include <sys/socket.h>
31#include <sys/types.h>
32#include <unistd.h>
33
34#include <cutils/sockets.h>
35#include <log/log.h>
36#include <processgroup/processgroup.h>
37
38#ifndef __unused
39#define __unused __attribute__((__unused__))
40#endif
41
42#define MEMCG_SYSFS_PATH "/dev/memcg/"
43#define MEMPRESSURE_WATCH_LEVEL "medium"
44#define ZONEINFO_PATH "/proc/zoneinfo"
45#define LINE_MAX 128
46
47#define INKERNEL_MINFREE_PATH "/sys/module/lowmemorykiller/parameters/minfree"
48#define INKERNEL_ADJ_PATH "/sys/module/lowmemorykiller/parameters/adj"
49
50#define ARRAY_SIZE(x)   (sizeof(x) / sizeof(*(x)))
51
52enum lmk_cmd {
53    LMK_TARGET,
54    LMK_PROCPRIO,
55    LMK_PROCREMOVE,
56};
57
58#define MAX_TARGETS 6
59/*
60 * longest is LMK_TARGET followed by MAX_TARGETS each minfree and minkillprio
61 * values
62 */
63#define CTRL_PACKET_MAX (sizeof(int) * (MAX_TARGETS * 2 + 1))
64
65/* default to old in-kernel interface if no memory pressure events */
66static int use_inkernel_interface = 1;
67
68/* memory pressure level medium event */
69static int mpevfd;
70
71/* control socket listen and data */
72static int ctrl_lfd;
73static int ctrl_dfd = -1;
74static int ctrl_dfd_reopened; /* did we reopen ctrl conn on this loop? */
75
76/* 1 memory pressure level, 1 ctrl listen socket, 1 ctrl data socket */
77#define MAX_EPOLL_EVENTS 3
78static int epollfd;
79static int maxevents;
80
81#define OOM_DISABLE (-17)
82/* inclusive */
83#define OOM_ADJUST_MIN (-16)
84#define OOM_ADJUST_MAX 15
85
86/* kernel OOM score values */
87#define OOM_SCORE_ADJ_MIN       (-1000)
88#define OOM_SCORE_ADJ_MAX       1000
89
90static int lowmem_adj[MAX_TARGETS];
91static int lowmem_minfree[MAX_TARGETS];
92static int lowmem_targets_size;
93
94struct sysmeminfo {
95    int nr_free_pages;
96    int nr_file_pages;
97    int nr_shmem;
98    int totalreserve_pages;
99};
100
101struct adjslot_list {
102    struct adjslot_list *next;
103    struct adjslot_list *prev;
104};
105
106struct proc {
107    struct adjslot_list asl;
108    int pid;
109    uid_t uid;
110    int oomadj;
111    struct proc *pidhash_next;
112};
113
114#define PIDHASH_SZ 1024
115static struct proc *pidhash[PIDHASH_SZ];
116#define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
117
118#define ADJTOSLOT(adj) (adj + -OOM_ADJUST_MIN)
119static struct adjslot_list procadjslot_list[ADJTOSLOT(OOM_ADJUST_MAX) + 1];
120
121/*
122 * Wait 1-2 seconds for the death report of a killed process prior to
123 * considering killing more processes.
124 */
125#define KILL_TIMEOUT 2
126/* Time of last process kill we initiated, stop me before I kill again */
127static time_t kill_lasttime;
128
129/* PAGE_SIZE / 1024 */
130static long page_k;
131
132static int lowmem_oom_adj_to_oom_score_adj(int oom_adj)
133{
134    if (oom_adj == OOM_ADJUST_MAX)
135        return OOM_SCORE_ADJ_MAX;
136    else
137        return (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
138}
139
140static struct proc *pid_lookup(int pid) {
141    struct proc *procp;
142
143    for (procp = pidhash[pid_hashfn(pid)]; procp && procp->pid != pid;
144         procp = procp->pidhash_next)
145            ;
146
147    return procp;
148}
149
150static void adjslot_insert(struct adjslot_list *head, struct adjslot_list *new)
151{
152    struct adjslot_list *next = head->next;
153    new->prev = head;
154    new->next = next;
155    next->prev = new;
156    head->next = new;
157}
158
159static void adjslot_remove(struct adjslot_list *old)
160{
161    struct adjslot_list *prev = old->prev;
162    struct adjslot_list *next = old->next;
163    next->prev = prev;
164    prev->next = next;
165}
166
167static struct adjslot_list *adjslot_tail(struct adjslot_list *head) {
168    struct adjslot_list *asl = head->prev;
169
170    return asl == head ? NULL : asl;
171}
172
173static void proc_slot(struct proc *procp) {
174    int adjslot = ADJTOSLOT(procp->oomadj);
175
176    adjslot_insert(&procadjslot_list[adjslot], &procp->asl);
177}
178
179static void proc_unslot(struct proc *procp) {
180    adjslot_remove(&procp->asl);
181}
182
183static void proc_insert(struct proc *procp) {
184    int hval = pid_hashfn(procp->pid);
185
186    procp->pidhash_next = pidhash[hval];
187    pidhash[hval] = procp;
188    proc_slot(procp);
189}
190
191static int pid_remove(int pid) {
192    int hval = pid_hashfn(pid);
193    struct proc *procp;
194    struct proc *prevp;
195
196    for (procp = pidhash[hval], prevp = NULL; procp && procp->pid != pid;
197         procp = procp->pidhash_next)
198            prevp = procp;
199
200    if (!procp)
201        return -1;
202
203    if (!prevp)
204        pidhash[hval] = procp->pidhash_next;
205    else
206        prevp->pidhash_next = procp->pidhash_next;
207
208    proc_unslot(procp);
209    free(procp);
210    return 0;
211}
212
213static void writefilestring(char *path, char *s) {
214    int fd = open(path, O_WRONLY);
215    int len = strlen(s);
216    int ret;
217
218    if (fd < 0) {
219        ALOGE("Error opening %s; errno=%d", path, errno);
220        return;
221    }
222
223    ret = write(fd, s, len);
224    if (ret < 0) {
225        ALOGE("Error writing %s; errno=%d", path, errno);
226    } else if (ret < len) {
227        ALOGE("Short write on %s; length=%d", path, ret);
228    }
229
230    close(fd);
231}
232
233static void cmd_procprio(int pid, int uid, int oomadj) {
234    struct proc *procp;
235    char path[80];
236    char val[20];
237
238    if (oomadj < OOM_DISABLE || oomadj > OOM_ADJUST_MAX) {
239        ALOGE("Invalid PROCPRIO oomadj argument %d", oomadj);
240        return;
241    }
242
243    snprintf(path, sizeof(path), "/proc/%d/oom_score_adj", pid);
244    snprintf(val, sizeof(val), "%d", lowmem_oom_adj_to_oom_score_adj(oomadj));
245    writefilestring(path, val);
246
247    if (use_inkernel_interface)
248        return;
249
250    procp = pid_lookup(pid);
251    if (!procp) {
252            procp = malloc(sizeof(struct proc));
253            if (!procp) {
254                // Oh, the irony.  May need to rebuild our state.
255                return;
256            }
257
258            procp->pid = pid;
259            procp->uid = uid;
260            procp->oomadj = oomadj;
261            proc_insert(procp);
262    } else {
263        proc_unslot(procp);
264        procp->oomadj = oomadj;
265        proc_slot(procp);
266    }
267}
268
269static void cmd_procremove(int pid) {
270    if (use_inkernel_interface)
271        return;
272
273    pid_remove(pid);
274    kill_lasttime = 0;
275}
276
277static void cmd_target(int ntargets, int *params) {
278    int i;
279
280    if (ntargets > (int)ARRAY_SIZE(lowmem_adj))
281        return;
282
283    for (i = 0; i < ntargets; i++) {
284        lowmem_minfree[i] = ntohl(*params++);
285        lowmem_adj[i] = ntohl(*params++);
286    }
287
288    lowmem_targets_size = ntargets;
289
290    if (use_inkernel_interface) {
291        char minfreestr[128];
292        char killpriostr[128];
293
294        minfreestr[0] = '\0';
295        killpriostr[0] = '\0';
296
297        for (i = 0; i < lowmem_targets_size; i++) {
298            char val[40];
299
300            if (i) {
301                strlcat(minfreestr, ",", sizeof(minfreestr));
302                strlcat(killpriostr, ",", sizeof(killpriostr));
303            }
304
305            snprintf(val, sizeof(val), "%d", lowmem_minfree[i]);
306            strlcat(minfreestr, val, sizeof(minfreestr));
307            snprintf(val, sizeof(val), "%d", lowmem_adj[i]);
308            strlcat(killpriostr, val, sizeof(killpriostr));
309        }
310
311        writefilestring(INKERNEL_MINFREE_PATH, minfreestr);
312        writefilestring(INKERNEL_ADJ_PATH, killpriostr);
313    }
314}
315
316static void ctrl_data_close(void) {
317    ALOGI("Closing Activity Manager data connection");
318    close(ctrl_dfd);
319    ctrl_dfd = -1;
320    maxevents--;
321}
322
323static int ctrl_data_read(char *buf, size_t bufsz) {
324    int ret = 0;
325
326    ret = read(ctrl_dfd, buf, bufsz);
327
328    if (ret == -1) {
329        ALOGE("control data socket read failed; errno=%d", errno);
330    } else if (ret == 0) {
331        ALOGE("Got EOF on control data socket");
332        ret = -1;
333    }
334
335    return ret;
336}
337
338static void ctrl_command_handler(void) {
339    int ibuf[CTRL_PACKET_MAX / sizeof(int)];
340    int len;
341    int cmd = -1;
342    int nargs;
343    int targets;
344
345    len = ctrl_data_read((char *)ibuf, CTRL_PACKET_MAX);
346    if (len <= 0)
347        return;
348
349    nargs = len / sizeof(int) - 1;
350    if (nargs < 0)
351        goto wronglen;
352
353    cmd = ntohl(ibuf[0]);
354
355    switch(cmd) {
356    case LMK_TARGET:
357        targets = nargs / 2;
358        if (nargs & 0x1 || targets > (int)ARRAY_SIZE(lowmem_adj))
359            goto wronglen;
360        cmd_target(targets, &ibuf[1]);
361        break;
362    case LMK_PROCPRIO:
363        if (nargs != 3)
364            goto wronglen;
365        cmd_procprio(ntohl(ibuf[1]), ntohl(ibuf[2]), ntohl(ibuf[3]));
366        break;
367    case LMK_PROCREMOVE:
368        if (nargs != 1)
369            goto wronglen;
370        cmd_procremove(ntohl(ibuf[1]));
371        break;
372    default:
373        ALOGE("Received unknown command code %d", cmd);
374        return;
375    }
376
377    return;
378
379wronglen:
380    ALOGE("Wrong control socket read length cmd=%d len=%d", cmd, len);
381}
382
383static void ctrl_data_handler(uint32_t events) {
384    if (events & EPOLLHUP) {
385        ALOGI("ActivityManager disconnected");
386        if (!ctrl_dfd_reopened)
387            ctrl_data_close();
388    } else if (events & EPOLLIN) {
389        ctrl_command_handler();
390    }
391}
392
393static void ctrl_connect_handler(uint32_t events __unused) {
394    struct sockaddr addr;
395    socklen_t alen;
396    struct epoll_event epev;
397
398    if (ctrl_dfd >= 0) {
399        ctrl_data_close();
400        ctrl_dfd_reopened = 1;
401    }
402
403    alen = sizeof(addr);
404    ctrl_dfd = accept(ctrl_lfd, &addr, &alen);
405
406    if (ctrl_dfd < 0) {
407        ALOGE("lmkd control socket accept failed; errno=%d", errno);
408        return;
409    }
410
411    ALOGI("ActivityManager connected");
412    maxevents++;
413    epev.events = EPOLLIN;
414    epev.data.ptr = (void *)ctrl_data_handler;
415    if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ctrl_dfd, &epev) == -1) {
416        ALOGE("epoll_ctl for data connection socket failed; errno=%d", errno);
417        ctrl_data_close();
418        return;
419    }
420}
421
422static int zoneinfo_parse_protection(char *cp) {
423    int max = 0;
424    int zoneval;
425
426    if (*cp++ != '(')
427        return 0;
428
429    do {
430        zoneval = strtol(cp, &cp, 0);
431        if ((*cp != ',') && (*cp != ')'))
432            return 0;
433        if (zoneval > max)
434            max = zoneval;
435    } while ((cp = strtok(NULL, " ")));
436
437    return max;
438}
439
440static void zoneinfo_parse_line(char *line, struct sysmeminfo *mip) {
441    char *cp = line;
442    char *ap;
443
444    cp = strtok(line, " ");
445    if (!cp)
446        return;
447
448    ap = strtok(NULL, " ");
449    if (!ap)
450        return;
451
452    if (!strcmp(cp, "nr_free_pages"))
453        mip->nr_free_pages += strtol(ap, NULL, 0);
454    else if (!strcmp(cp, "nr_file_pages"))
455        mip->nr_file_pages += strtol(ap, NULL, 0);
456    else if (!strcmp(cp, "nr_shmem"))
457        mip->nr_shmem += strtol(ap, NULL, 0);
458    else if (!strcmp(cp, "high"))
459        mip->totalreserve_pages += strtol(ap, NULL, 0);
460    else if (!strcmp(cp, "protection:"))
461        mip->totalreserve_pages += zoneinfo_parse_protection(ap);
462}
463
464static int zoneinfo_parse(struct sysmeminfo *mip) {
465    FILE *f;
466    char line[LINE_MAX];
467
468    memset(mip, 0, sizeof(struct sysmeminfo));
469    f = fopen(ZONEINFO_PATH, "r");
470    if (!f) {
471        ALOGE("%s open: errno=%d", ZONEINFO_PATH, errno);
472        return -1;
473    }
474
475    while (fgets(line, LINE_MAX, f))
476            zoneinfo_parse_line(line, mip);
477
478    fclose(f);
479    return 0;
480}
481
482static int proc_get_size(int pid) {
483    char path[PATH_MAX];
484    char line[LINE_MAX];
485    FILE *f;
486    int rss = 0;
487    int total;
488
489    snprintf(path, PATH_MAX, "/proc/%d/statm", pid);
490    f = fopen(path, "r");
491    if (!f)
492        return -1;
493    if (!fgets(line, LINE_MAX, f)) {
494        fclose(f);
495        return -1;
496    }
497
498    sscanf(line, "%d %d ", &total, &rss);
499    fclose(f);
500    return rss;
501}
502
503static char *proc_get_name(int pid) {
504    char path[PATH_MAX];
505    static char line[LINE_MAX];
506    FILE *f;
507    char *cp;
508
509    snprintf(path, PATH_MAX, "/proc/%d/cmdline", pid);
510    f = fopen(path, "r");
511    if (!f)
512        return NULL;
513    if (!fgets(line, LINE_MAX, f)) {
514        fclose(f);
515        return NULL;
516    }
517
518    cp = strchr(line, ' ');
519    if (cp)
520        *cp = '\0';
521
522    return line;
523}
524
525static struct proc *proc_adj_lru(int oomadj) {
526    return (struct proc *)adjslot_tail(&procadjslot_list[ADJTOSLOT(oomadj)]);
527}
528
529static void mp_event(uint32_t events __unused) {
530    int i;
531    int ret;
532    unsigned long long evcount;
533    struct sysmeminfo mi;
534    int other_free;
535    int other_file;
536    int minfree = 0;
537    int min_score_adj = OOM_ADJUST_MAX + 1;
538
539    ret = read(mpevfd, &evcount, sizeof(evcount));
540    if (ret < 0)
541        ALOGE("Error reading memory pressure event fd; errno=%d",
542              errno);
543
544    if (time(NULL) - kill_lasttime < KILL_TIMEOUT)
545        return;
546
547    if (zoneinfo_parse(&mi) < 0)
548        return;
549
550    other_free = mi.nr_free_pages - mi.totalreserve_pages;
551    other_file = mi.nr_file_pages - mi.nr_shmem;
552
553    for (i = 0; i < lowmem_targets_size; i++) {
554        minfree = lowmem_minfree[i];
555        if (other_free < minfree && other_file < minfree) {
556            min_score_adj = lowmem_adj[i];
557            break;
558        }
559    }
560
561    if (min_score_adj == OOM_ADJUST_MAX + 1)
562        return;
563
564    for (i = OOM_ADJUST_MAX; i >= min_score_adj; i--) {
565        struct proc *procp;
566
567    retry:
568        procp = proc_adj_lru(i);
569
570        if (procp) {
571            int pid = procp->pid;
572            uid_t uid = procp->uid;
573            char *taskname;
574            int tasksize;
575            int r;
576
577            taskname = proc_get_name(pid);
578            if (!taskname) {
579                pid_remove(pid);
580                goto retry;
581            }
582
583            tasksize = proc_get_size(pid);
584            if (tasksize < 0) {
585                pid_remove(pid);
586                goto retry;
587            }
588
589            ALOGI("Killing '%s' (%d), uid %d, adj %d\n"
590                  "   to free %ldkB because cache %ldkB is below limit %ldkB for oom_adj %d\n"
591                  "   Free memory is %ldkB %s reserved",
592                  taskname, pid, uid, procp->oomadj, tasksize * page_k,
593                  other_file * page_k, minfree * page_k, min_score_adj,
594                  other_free * page_k, other_free >= 0 ? "above" : "below");
595            r = kill(pid, SIGKILL);
596            killProcessGroup(uid, pid, SIGKILL);
597            pid_remove(pid);
598
599            if (r) {
600                ALOGE("kill(%d): errno=%d", procp->pid, errno);
601                goto retry;
602            } else {
603                time(&kill_lasttime);
604                break;
605            }
606        }
607    }
608}
609
610static int init_mp(char *levelstr, void *event_handler)
611{
612    int mpfd;
613    int evfd;
614    int evctlfd;
615    char buf[256];
616    struct epoll_event epev;
617    int ret;
618
619    mpfd = open(MEMCG_SYSFS_PATH "memory.pressure_level", O_RDONLY);
620    if (mpfd < 0) {
621        ALOGI("No kernel memory.pressure_level support (errno=%d)", errno);
622        goto err_open_mpfd;
623    }
624
625    evctlfd = open(MEMCG_SYSFS_PATH "cgroup.event_control", O_WRONLY);
626    if (evctlfd < 0) {
627        ALOGI("No kernel memory cgroup event control (errno=%d)", errno);
628        goto err_open_evctlfd;
629    }
630
631    evfd = eventfd(0, EFD_NONBLOCK);
632    if (evfd < 0) {
633        ALOGE("eventfd failed for level %s; errno=%d", levelstr, errno);
634        goto err_eventfd;
635    }
636
637    ret = snprintf(buf, sizeof(buf), "%d %d %s", evfd, mpfd, levelstr);
638    if (ret >= (ssize_t)sizeof(buf)) {
639        ALOGE("cgroup.event_control line overflow for level %s", levelstr);
640        goto err;
641    }
642
643    ret = write(evctlfd, buf, strlen(buf) + 1);
644    if (ret == -1) {
645        ALOGE("cgroup.event_control write failed for level %s; errno=%d",
646              levelstr, errno);
647        goto err;
648    }
649
650    epev.events = EPOLLIN;
651    epev.data.ptr = event_handler;
652    ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, evfd, &epev);
653    if (ret == -1) {
654        ALOGE("epoll_ctl for level %s failed; errno=%d", levelstr, errno);
655        goto err;
656    }
657    maxevents++;
658    mpevfd = evfd;
659    return 0;
660
661err:
662    close(evfd);
663err_eventfd:
664    close(evctlfd);
665err_open_evctlfd:
666    close(mpfd);
667err_open_mpfd:
668    return -1;
669}
670
671static int init(void) {
672    struct epoll_event epev;
673    int i;
674    int ret;
675
676    page_k = sysconf(_SC_PAGESIZE);
677    if (page_k == -1)
678        page_k = PAGE_SIZE;
679    page_k /= 1024;
680
681    epollfd = epoll_create(MAX_EPOLL_EVENTS);
682    if (epollfd == -1) {
683        ALOGE("epoll_create failed (errno=%d)", errno);
684        return -1;
685    }
686
687    ctrl_lfd = android_get_control_socket("lmkd");
688    if (ctrl_lfd < 0) {
689        ALOGE("get lmkd control socket failed");
690        return -1;
691    }
692
693    ret = listen(ctrl_lfd, 1);
694    if (ret < 0) {
695        ALOGE("lmkd control socket listen failed (errno=%d)", errno);
696        return -1;
697    }
698
699    epev.events = EPOLLIN;
700    epev.data.ptr = (void *)ctrl_connect_handler;
701    if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ctrl_lfd, &epev) == -1) {
702        ALOGE("epoll_ctl for lmkd control socket failed (errno=%d)", errno);
703        return -1;
704    }
705    maxevents++;
706
707    use_inkernel_interface = !access(INKERNEL_MINFREE_PATH, W_OK);
708
709    if (use_inkernel_interface) {
710        ALOGI("Using in-kernel low memory killer interface");
711    } else {
712        ret = init_mp(MEMPRESSURE_WATCH_LEVEL, (void *)&mp_event);
713        if (ret)
714            ALOGE("Kernel does not support memory pressure events or in-kernel low memory killer");
715    }
716
717    for (i = 0; i <= ADJTOSLOT(OOM_ADJUST_MAX); i++) {
718        procadjslot_list[i].next = &procadjslot_list[i];
719        procadjslot_list[i].prev = &procadjslot_list[i];
720    }
721
722    return 0;
723}
724
725static void mainloop(void) {
726    while (1) {
727        struct epoll_event events[maxevents];
728        int nevents;
729        int i;
730
731        ctrl_dfd_reopened = 0;
732        nevents = epoll_wait(epollfd, events, maxevents, -1);
733
734        if (nevents == -1) {
735            if (errno == EINTR)
736                continue;
737            ALOGE("epoll_wait failed (errno=%d)", errno);
738            continue;
739        }
740
741        for (i = 0; i < nevents; ++i) {
742            if (events[i].events & EPOLLERR)
743                ALOGD("EPOLLERR on event #%d", i);
744            if (events[i].data.ptr)
745                (*(void (*)(uint32_t))events[i].data.ptr)(events[i].events);
746        }
747    }
748}
749
750int main(int argc __unused, char **argv __unused) {
751    struct sched_param param = {
752            .sched_priority = 1,
753    };
754
755    mlockall(MCL_FUTURE);
756    sched_setscheduler(0, SCHED_FIFO, &param);
757    if (!init())
758        mainloop();
759
760    ALOGI("exiting");
761    return 0;
762}
763