res_cache.c revision 9db1f3efa8499fa6a97ba5f4b2f12d431b7d25fe
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include "resolv_cache.h"
30#include <resolv.h>
31#include <stdarg.h>
32#include <stdio.h>
33#include <stdlib.h>
34#include <string.h>
35#include <time.h>
36#include "pthread.h"
37
38#include <errno.h>
39#include <arpa/nameser.h>
40#include <sys/system_properties.h>
41#include <net/if.h>
42#include <netdb.h>
43#include <linux/if.h>
44
45#include <arpa/inet.h>
46#include "resolv_private.h"
47#include "resolv_netid.h"
48#include "res_private.h"
49
50#include "private/libc_logging.h"
51
52/* This code implements a small and *simple* DNS resolver cache.
53 *
54 * It is only used to cache DNS answers for a time defined by the smallest TTL
55 * among the answer records in order to reduce DNS traffic. It is not supposed
56 * to be a full DNS cache, since we plan to implement that in the future in a
57 * dedicated process running on the system.
58 *
59 * Note that its design is kept simple very intentionally, i.e.:
60 *
61 *  - it takes raw DNS query packet data as input, and returns raw DNS
62 *    answer packet data as output
63 *
64 *    (this means that two similar queries that encode the DNS name
65 *     differently will be treated distinctly).
66 *
67 *    the smallest TTL value among the answer records are used as the time
68 *    to keep an answer in the cache.
69 *
70 *    this is bad, but we absolutely want to avoid parsing the answer packets
71 *    (and should be solved by the later full DNS cache process).
72 *
73 *  - the implementation is just a (query-data) => (answer-data) hash table
74 *    with a trivial least-recently-used expiration policy.
75 *
76 * Doing this keeps the code simple and avoids to deal with a lot of things
77 * that a full DNS cache is expected to do.
78 *
79 * The API is also very simple:
80 *
81 *   - the client calls _resolv_cache_get() to obtain a handle to the cache.
82 *     this will initialize the cache on first usage. the result can be NULL
83 *     if the cache is disabled.
84 *
85 *   - the client calls _resolv_cache_lookup() before performing a query
86 *
87 *     if the function returns RESOLV_CACHE_FOUND, a copy of the answer data
88 *     has been copied into the client-provided answer buffer.
89 *
90 *     if the function returns RESOLV_CACHE_NOTFOUND, the client should perform
91 *     a request normally, *then* call _resolv_cache_add() to add the received
92 *     answer to the cache.
93 *
94 *     if the function returns RESOLV_CACHE_UNSUPPORTED, the client should
95 *     perform a request normally, and *not* call _resolv_cache_add()
96 *
97 *     note that RESOLV_CACHE_UNSUPPORTED is also returned if the answer buffer
98 *     is too short to accomodate the cached result.
99 */
100
101/* the name of an environment variable that will be checked the first time
102 * this code is called if its value is "0", then the resolver cache is
103 * disabled.
104 */
105#define  CONFIG_ENV  "BIONIC_DNSCACHE"
106
107/* default number of entries kept in the cache. This value has been
108 * determined by browsing through various sites and counting the number
109 * of corresponding requests. Keep in mind that our framework is currently
110 * performing two requests per name lookup (one for IPv4, the other for IPv6)
111 *
112 *    www.google.com      4
113 *    www.ysearch.com     6
114 *    www.amazon.com      8
115 *    www.nytimes.com     22
116 *    www.espn.com        28
117 *    www.msn.com         28
118 *    www.lemonde.fr      35
119 *
120 * (determined in 2009-2-17 from Paris, France, results may vary depending
121 *  on location)
122 *
123 * most high-level websites use lots of media/ad servers with different names
124 * but these are generally reused when browsing through the site.
125 *
126 * As such, a value of 64 should be relatively comfortable at the moment.
127 *
128 * ******************************************
129 * * NOTE - this has changed.
130 * * 1) we've added IPv6 support so each dns query results in 2 responses
131 * * 2) we've made this a system-wide cache, so the cost is less (it's not
132 * *    duplicated in each process) and the need is greater (more processes
133 * *    making different requests).
134 * * Upping by 2x for IPv6
135 * * Upping by another 5x for the centralized nature
136 * *****************************************
137 */
138#define  CONFIG_MAX_ENTRIES    64 * 2 * 5
139/* name of the system property that can be used to set the cache size */
140
141/****************************************************************************/
142/****************************************************************************/
143/*****                                                                  *****/
144/*****                                                                  *****/
145/*****                                                                  *****/
146/****************************************************************************/
147/****************************************************************************/
148
149/* set to 1 to debug cache operations */
150#define  DEBUG       0
151
152/* set to 1 to debug query data */
153#define  DEBUG_DATA  0
154
155#if DEBUG
156#define __DEBUG__
157#else
158#define __DEBUG__ __attribute__((unused))
159#endif
160
161#undef XLOG
162
163#define XLOG(...) ({ \
164    if (DEBUG) { \
165        __libc_format_log(ANDROID_LOG_DEBUG,"libc",__VA_ARGS__); \
166    } else { \
167        ((void)0); \
168    } \
169})
170
171/** BOUNDED BUFFER FORMATTING
172 **/
173
174/* technical note:
175 *
176 *   the following debugging routines are used to append data to a bounded
177 *   buffer they take two parameters that are:
178 *
179 *   - p : a pointer to the current cursor position in the buffer
180 *         this value is initially set to the buffer's address.
181 *
182 *   - end : the address of the buffer's limit, i.e. of the first byte
183 *           after the buffer. this address should never be touched.
184 *
185 *           IMPORTANT: it is assumed that end > buffer_address, i.e.
186 *                      that the buffer is at least one byte.
187 *
188 *   the _bprint_() functions return the new value of 'p' after the data
189 *   has been appended, and also ensure the following:
190 *
191 *   - the returned value will never be strictly greater than 'end'
192 *
193 *   - a return value equal to 'end' means that truncation occured
194 *     (in which case, end[-1] will be set to 0)
195 *
196 *   - after returning from a _bprint_() function, the content of the buffer
197 *     is always 0-terminated, even in the event of truncation.
198 *
199 *  these conventions allow you to call _bprint_ functions multiple times and
200 *  only check for truncation at the end of the sequence, as in:
201 *
202 *     char  buff[1000], *p = buff, *end = p + sizeof(buff);
203 *
204 *     p = _bprint_c(p, end, '"');
205 *     p = _bprint_s(p, end, my_string);
206 *     p = _bprint_c(p, end, '"');
207 *
208 *     if (p >= end) {
209 *        // buffer was too small
210 *     }
211 *
212 *     printf( "%s", buff );
213 */
214
215/* add a char to a bounded buffer */
216char*
217_bprint_c( char*  p, char*  end, int  c )
218{
219    if (p < end) {
220        if (p+1 == end)
221            *p++ = 0;
222        else {
223            *p++ = (char) c;
224            *p   = 0;
225        }
226    }
227    return p;
228}
229
230/* add a sequence of bytes to a bounded buffer */
231char*
232_bprint_b( char*  p, char*  end, const char*  buf, int  len )
233{
234    int  avail = end - p;
235
236    if (avail <= 0 || len <= 0)
237        return p;
238
239    if (avail > len)
240        avail = len;
241
242    memcpy( p, buf, avail );
243    p += avail;
244
245    if (p < end)
246        p[0] = 0;
247    else
248        end[-1] = 0;
249
250    return p;
251}
252
253/* add a string to a bounded buffer */
254char*
255_bprint_s( char*  p, char*  end, const char*  str )
256{
257    return _bprint_b(p, end, str, strlen(str));
258}
259
260/* add a formatted string to a bounded buffer */
261char* _bprint( char*  p, char*  end, const char*  format, ... ) __DEBUG__;
262char* _bprint( char*  p, char*  end, const char*  format, ... )
263{
264    int      avail, n;
265    va_list  args;
266
267    avail = end - p;
268
269    if (avail <= 0)
270        return p;
271
272    va_start(args, format);
273    n = vsnprintf( p, avail, format, args);
274    va_end(args);
275
276    /* certain C libraries return -1 in case of truncation */
277    if (n < 0 || n > avail)
278        n = avail;
279
280    p += n;
281    /* certain C libraries do not zero-terminate in case of truncation */
282    if (p == end)
283        p[-1] = 0;
284
285    return p;
286}
287
288/* add a hex value to a bounded buffer, up to 8 digits */
289char*
290_bprint_hex( char*  p, char*  end, unsigned  value, int  numDigits )
291{
292    char   text[sizeof(unsigned)*2];
293    int    nn = 0;
294
295    while (numDigits-- > 0) {
296        text[nn++] = "0123456789abcdef"[(value >> (numDigits*4)) & 15];
297    }
298    return _bprint_b(p, end, text, nn);
299}
300
301/* add the hexadecimal dump of some memory area to a bounded buffer */
302char*
303_bprint_hexdump( char*  p, char*  end, const uint8_t*  data, int  datalen )
304{
305    int   lineSize = 16;
306
307    while (datalen > 0) {
308        int  avail = datalen;
309        int  nn;
310
311        if (avail > lineSize)
312            avail = lineSize;
313
314        for (nn = 0; nn < avail; nn++) {
315            if (nn > 0)
316                p = _bprint_c(p, end, ' ');
317            p = _bprint_hex(p, end, data[nn], 2);
318        }
319        for ( ; nn < lineSize; nn++ ) {
320            p = _bprint_s(p, end, "   ");
321        }
322        p = _bprint_s(p, end, "  ");
323
324        for (nn = 0; nn < avail; nn++) {
325            int  c = data[nn];
326
327            if (c < 32 || c > 127)
328                c = '.';
329
330            p = _bprint_c(p, end, c);
331        }
332        p = _bprint_c(p, end, '\n');
333
334        data    += avail;
335        datalen -= avail;
336    }
337    return p;
338}
339
340/* dump the content of a query of packet to the log */
341void XLOG_BYTES( const void*  base, int  len ) __DEBUG__;
342void XLOG_BYTES( const void*  base, int  len )
343{
344    if (DEBUG_DATA) {
345        char  buff[1024];
346        char*  p = buff, *end = p + sizeof(buff);
347
348        p = _bprint_hexdump(p, end, base, len);
349        XLOG("%s",buff);
350    }
351} __DEBUG__
352
353static time_t
354_time_now( void )
355{
356    struct timeval  tv;
357
358    gettimeofday( &tv, NULL );
359    return tv.tv_sec;
360}
361
362/* reminder: the general format of a DNS packet is the following:
363 *
364 *    HEADER  (12 bytes)
365 *    QUESTION  (variable)
366 *    ANSWER (variable)
367 *    AUTHORITY (variable)
368 *    ADDITIONNAL (variable)
369 *
370 * the HEADER is made of:
371 *
372 *   ID     : 16 : 16-bit unique query identification field
373 *
374 *   QR     :  1 : set to 0 for queries, and 1 for responses
375 *   Opcode :  4 : set to 0 for queries
376 *   AA     :  1 : set to 0 for queries
377 *   TC     :  1 : truncation flag, will be set to 0 in queries
378 *   RD     :  1 : recursion desired
379 *
380 *   RA     :  1 : recursion available (0 in queries)
381 *   Z      :  3 : three reserved zero bits
382 *   RCODE  :  4 : response code (always 0=NOERROR in queries)
383 *
384 *   QDCount: 16 : question count
385 *   ANCount: 16 : Answer count (0 in queries)
386 *   NSCount: 16: Authority Record count (0 in queries)
387 *   ARCount: 16: Additionnal Record count (0 in queries)
388 *
389 * the QUESTION is made of QDCount Question Record (QRs)
390 * the ANSWER is made of ANCount RRs
391 * the AUTHORITY is made of NSCount RRs
392 * the ADDITIONNAL is made of ARCount RRs
393 *
394 * Each Question Record (QR) is made of:
395 *
396 *   QNAME   : variable : Query DNS NAME
397 *   TYPE    : 16       : type of query (A=1, PTR=12, MX=15, AAAA=28, ALL=255)
398 *   CLASS   : 16       : class of query (IN=1)
399 *
400 * Each Resource Record (RR) is made of:
401 *
402 *   NAME    : variable : DNS NAME
403 *   TYPE    : 16       : type of query (A=1, PTR=12, MX=15, AAAA=28, ALL=255)
404 *   CLASS   : 16       : class of query (IN=1)
405 *   TTL     : 32       : seconds to cache this RR (0=none)
406 *   RDLENGTH: 16       : size of RDDATA in bytes
407 *   RDDATA  : variable : RR data (depends on TYPE)
408 *
409 * Each QNAME contains a domain name encoded as a sequence of 'labels'
410 * terminated by a zero. Each label has the following format:
411 *
412 *    LEN  : 8     : lenght of label (MUST be < 64)
413 *    NAME : 8*LEN : label length (must exclude dots)
414 *
415 * A value of 0 in the encoding is interpreted as the 'root' domain and
416 * terminates the encoding. So 'www.android.com' will be encoded as:
417 *
418 *   <3>www<7>android<3>com<0>
419 *
420 * Where <n> represents the byte with value 'n'
421 *
422 * Each NAME reflects the QNAME of the question, but has a slightly more
423 * complex encoding in order to provide message compression. This is achieved
424 * by using a 2-byte pointer, with format:
425 *
426 *    TYPE   : 2  : 0b11 to indicate a pointer, 0b01 and 0b10 are reserved
427 *    OFFSET : 14 : offset to another part of the DNS packet
428 *
429 * The offset is relative to the start of the DNS packet and must point
430 * A pointer terminates the encoding.
431 *
432 * The NAME can be encoded in one of the following formats:
433 *
434 *   - a sequence of simple labels terminated by 0 (like QNAMEs)
435 *   - a single pointer
436 *   - a sequence of simple labels terminated by a pointer
437 *
438 * A pointer shall always point to either a pointer of a sequence of
439 * labels (which can themselves be terminated by either a 0 or a pointer)
440 *
441 * The expanded length of a given domain name should not exceed 255 bytes.
442 *
443 * NOTE: we don't parse the answer packets, so don't need to deal with NAME
444 *       records, only QNAMEs.
445 */
446
447#define  DNS_HEADER_SIZE  12
448
449#define  DNS_TYPE_A   "\00\01"   /* big-endian decimal 1 */
450#define  DNS_TYPE_PTR "\00\014"  /* big-endian decimal 12 */
451#define  DNS_TYPE_MX  "\00\017"  /* big-endian decimal 15 */
452#define  DNS_TYPE_AAAA "\00\034" /* big-endian decimal 28 */
453#define  DNS_TYPE_ALL "\00\0377" /* big-endian decimal 255 */
454
455#define  DNS_CLASS_IN "\00\01"   /* big-endian decimal 1 */
456
457typedef struct {
458    const uint8_t*  base;
459    const uint8_t*  end;
460    const uint8_t*  cursor;
461} DnsPacket;
462
463static void
464_dnsPacket_init( DnsPacket*  packet, const uint8_t*  buff, int  bufflen )
465{
466    packet->base   = buff;
467    packet->end    = buff + bufflen;
468    packet->cursor = buff;
469}
470
471static void
472_dnsPacket_rewind( DnsPacket*  packet )
473{
474    packet->cursor = packet->base;
475}
476
477static void
478_dnsPacket_skip( DnsPacket*  packet, int  count )
479{
480    const uint8_t*  p = packet->cursor + count;
481
482    if (p > packet->end)
483        p = packet->end;
484
485    packet->cursor = p;
486}
487
488static int
489_dnsPacket_readInt16( DnsPacket*  packet )
490{
491    const uint8_t*  p = packet->cursor;
492
493    if (p+2 > packet->end)
494        return -1;
495
496    packet->cursor = p+2;
497    return (p[0]<< 8) | p[1];
498}
499
500/** QUERY CHECKING
501 **/
502
503/* check bytes in a dns packet. returns 1 on success, 0 on failure.
504 * the cursor is only advanced in the case of success
505 */
506static int
507_dnsPacket_checkBytes( DnsPacket*  packet, int  numBytes, const void*  bytes )
508{
509    const uint8_t*  p = packet->cursor;
510
511    if (p + numBytes > packet->end)
512        return 0;
513
514    if (memcmp(p, bytes, numBytes) != 0)
515        return 0;
516
517    packet->cursor = p + numBytes;
518    return 1;
519}
520
521/* parse and skip a given QNAME stored in a query packet,
522 * from the current cursor position. returns 1 on success,
523 * or 0 for malformed data.
524 */
525static int
526_dnsPacket_checkQName( DnsPacket*  packet )
527{
528    const uint8_t*  p   = packet->cursor;
529    const uint8_t*  end = packet->end;
530
531    for (;;) {
532        int  c;
533
534        if (p >= end)
535            break;
536
537        c = *p++;
538
539        if (c == 0) {
540            packet->cursor = p;
541            return 1;
542        }
543
544        /* we don't expect label compression in QNAMEs */
545        if (c >= 64)
546            break;
547
548        p += c;
549        /* we rely on the bound check at the start
550         * of the loop here */
551    }
552    /* malformed data */
553    XLOG("malformed QNAME");
554    return 0;
555}
556
557/* parse and skip a given QR stored in a packet.
558 * returns 1 on success, and 0 on failure
559 */
560static int
561_dnsPacket_checkQR( DnsPacket*  packet )
562{
563    if (!_dnsPacket_checkQName(packet))
564        return 0;
565
566    /* TYPE must be one of the things we support */
567    if (!_dnsPacket_checkBytes(packet, 2, DNS_TYPE_A) &&
568        !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_PTR) &&
569        !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_MX) &&
570        !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_AAAA) &&
571        !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_ALL))
572    {
573        XLOG("unsupported TYPE");
574        return 0;
575    }
576    /* CLASS must be IN */
577    if (!_dnsPacket_checkBytes(packet, 2, DNS_CLASS_IN)) {
578        XLOG("unsupported CLASS");
579        return 0;
580    }
581
582    return 1;
583}
584
585/* check the header of a DNS Query packet, return 1 if it is one
586 * type of query we can cache, or 0 otherwise
587 */
588static int
589_dnsPacket_checkQuery( DnsPacket*  packet )
590{
591    const uint8_t*  p = packet->base;
592    int             qdCount, anCount, dnCount, arCount;
593
594    if (p + DNS_HEADER_SIZE > packet->end) {
595        XLOG("query packet too small");
596        return 0;
597    }
598
599    /* QR must be set to 0, opcode must be 0 and AA must be 0 */
600    /* RA, Z, and RCODE must be 0 */
601    if ((p[2] & 0xFC) != 0 || p[3] != 0) {
602        XLOG("query packet flags unsupported");
603        return 0;
604    }
605
606    /* Note that we ignore the TC and RD bits here for the
607     * following reasons:
608     *
609     * - there is no point for a query packet sent to a server
610     *   to have the TC bit set, but the implementation might
611     *   set the bit in the query buffer for its own needs
612     *   between a _resolv_cache_lookup and a
613     *   _resolv_cache_add. We should not freak out if this
614     *   is the case.
615     *
616     * - we consider that the result from a RD=0 or a RD=1
617     *   query might be different, hence that the RD bit
618     *   should be used to differentiate cached result.
619     *
620     *   this implies that RD is checked when hashing or
621     *   comparing query packets, but not TC
622     */
623
624    /* ANCOUNT, DNCOUNT and ARCOUNT must be 0 */
625    qdCount = (p[4] << 8) | p[5];
626    anCount = (p[6] << 8) | p[7];
627    dnCount = (p[8] << 8) | p[9];
628    arCount = (p[10]<< 8) | p[11];
629
630    if (anCount != 0 || dnCount != 0 || arCount != 0) {
631        XLOG("query packet contains non-query records");
632        return 0;
633    }
634
635    if (qdCount == 0) {
636        XLOG("query packet doesn't contain query record");
637        return 0;
638    }
639
640    /* Check QDCOUNT QRs */
641    packet->cursor = p + DNS_HEADER_SIZE;
642
643    for (;qdCount > 0; qdCount--)
644        if (!_dnsPacket_checkQR(packet))
645            return 0;
646
647    return 1;
648}
649
650/** QUERY DEBUGGING
651 **/
652#if DEBUG
653static char*
654_dnsPacket_bprintQName(DnsPacket*  packet, char*  bp, char*  bend)
655{
656    const uint8_t*  p   = packet->cursor;
657    const uint8_t*  end = packet->end;
658    int             first = 1;
659
660    for (;;) {
661        int  c;
662
663        if (p >= end)
664            break;
665
666        c = *p++;
667
668        if (c == 0) {
669            packet->cursor = p;
670            return bp;
671        }
672
673        /* we don't expect label compression in QNAMEs */
674        if (c >= 64)
675            break;
676
677        if (first)
678            first = 0;
679        else
680            bp = _bprint_c(bp, bend, '.');
681
682        bp = _bprint_b(bp, bend, (const char*)p, c);
683
684        p += c;
685        /* we rely on the bound check at the start
686         * of the loop here */
687    }
688    /* malformed data */
689    bp = _bprint_s(bp, bend, "<MALFORMED>");
690    return bp;
691}
692
693static char*
694_dnsPacket_bprintQR(DnsPacket*  packet, char*  p, char*  end)
695{
696#define  QQ(x)   { DNS_TYPE_##x, #x }
697    static const struct {
698        const char*  typeBytes;
699        const char*  typeString;
700    } qTypes[] =
701    {
702        QQ(A), QQ(PTR), QQ(MX), QQ(AAAA), QQ(ALL),
703        { NULL, NULL }
704    };
705    int          nn;
706    const char*  typeString = NULL;
707
708    /* dump QNAME */
709    p = _dnsPacket_bprintQName(packet, p, end);
710
711    /* dump TYPE */
712    p = _bprint_s(p, end, " (");
713
714    for (nn = 0; qTypes[nn].typeBytes != NULL; nn++) {
715        if (_dnsPacket_checkBytes(packet, 2, qTypes[nn].typeBytes)) {
716            typeString = qTypes[nn].typeString;
717            break;
718        }
719    }
720
721    if (typeString != NULL)
722        p = _bprint_s(p, end, typeString);
723    else {
724        int  typeCode = _dnsPacket_readInt16(packet);
725        p = _bprint(p, end, "UNKNOWN-%d", typeCode);
726    }
727
728    p = _bprint_c(p, end, ')');
729
730    /* skip CLASS */
731    _dnsPacket_skip(packet, 2);
732    return p;
733}
734
735/* this function assumes the packet has already been checked */
736static char*
737_dnsPacket_bprintQuery( DnsPacket*  packet, char*  p, char*  end )
738{
739    int   qdCount;
740
741    if (packet->base[2] & 0x1) {
742        p = _bprint_s(p, end, "RECURSIVE ");
743    }
744
745    _dnsPacket_skip(packet, 4);
746    qdCount = _dnsPacket_readInt16(packet);
747    _dnsPacket_skip(packet, 6);
748
749    for ( ; qdCount > 0; qdCount-- ) {
750        p = _dnsPacket_bprintQR(packet, p, end);
751    }
752    return p;
753}
754#endif
755
756
757/** QUERY HASHING SUPPORT
758 **
759 ** THE FOLLOWING CODE ASSUMES THAT THE INPUT PACKET HAS ALREADY
760 ** BEEN SUCCESFULLY CHECKED.
761 **/
762
763/* use 32-bit FNV hash function */
764#define  FNV_MULT   16777619U
765#define  FNV_BASIS  2166136261U
766
767static unsigned
768_dnsPacket_hashBytes( DnsPacket*  packet, int  numBytes, unsigned  hash )
769{
770    const uint8_t*  p   = packet->cursor;
771    const uint8_t*  end = packet->end;
772
773    while (numBytes > 0 && p < end) {
774        hash = hash*FNV_MULT ^ *p++;
775    }
776    packet->cursor = p;
777    return hash;
778}
779
780
781static unsigned
782_dnsPacket_hashQName( DnsPacket*  packet, unsigned  hash )
783{
784    const uint8_t*  p   = packet->cursor;
785    const uint8_t*  end = packet->end;
786
787    for (;;) {
788        int  c;
789
790        if (p >= end) {  /* should not happen */
791            XLOG("%s: INTERNAL_ERROR: read-overflow !!\n", __FUNCTION__);
792            break;
793        }
794
795        c = *p++;
796
797        if (c == 0)
798            break;
799
800        if (c >= 64) {
801            XLOG("%s: INTERNAL_ERROR: malformed domain !!\n", __FUNCTION__);
802            break;
803        }
804        if (p + c >= end) {
805            XLOG("%s: INTERNAL_ERROR: simple label read-overflow !!\n",
806                    __FUNCTION__);
807            break;
808        }
809        while (c > 0) {
810            hash = hash*FNV_MULT ^ *p++;
811            c   -= 1;
812        }
813    }
814    packet->cursor = p;
815    return hash;
816}
817
818static unsigned
819_dnsPacket_hashQR( DnsPacket*  packet, unsigned  hash )
820{
821    hash = _dnsPacket_hashQName(packet, hash);
822    hash = _dnsPacket_hashBytes(packet, 4, hash); /* TYPE and CLASS */
823    return hash;
824}
825
826static unsigned
827_dnsPacket_hashQuery( DnsPacket*  packet )
828{
829    unsigned  hash = FNV_BASIS;
830    int       count;
831    _dnsPacket_rewind(packet);
832
833    /* we ignore the TC bit for reasons explained in
834     * _dnsPacket_checkQuery().
835     *
836     * however we hash the RD bit to differentiate
837     * between answers for recursive and non-recursive
838     * queries.
839     */
840    hash = hash*FNV_MULT ^ (packet->base[2] & 1);
841
842    /* assume: other flags are 0 */
843    _dnsPacket_skip(packet, 4);
844
845    /* read QDCOUNT */
846    count = _dnsPacket_readInt16(packet);
847
848    /* assume: ANcount, NScount, ARcount are 0 */
849    _dnsPacket_skip(packet, 6);
850
851    /* hash QDCOUNT QRs */
852    for ( ; count > 0; count-- )
853        hash = _dnsPacket_hashQR(packet, hash);
854
855    return hash;
856}
857
858
859/** QUERY COMPARISON
860 **
861 ** THE FOLLOWING CODE ASSUMES THAT THE INPUT PACKETS HAVE ALREADY
862 ** BEEN SUCCESFULLY CHECKED.
863 **/
864
865static int
866_dnsPacket_isEqualDomainName( DnsPacket*  pack1, DnsPacket*  pack2 )
867{
868    const uint8_t*  p1   = pack1->cursor;
869    const uint8_t*  end1 = pack1->end;
870    const uint8_t*  p2   = pack2->cursor;
871    const uint8_t*  end2 = pack2->end;
872
873    for (;;) {
874        int  c1, c2;
875
876        if (p1 >= end1 || p2 >= end2) {
877            XLOG("%s: INTERNAL_ERROR: read-overflow !!\n", __FUNCTION__);
878            break;
879        }
880        c1 = *p1++;
881        c2 = *p2++;
882        if (c1 != c2)
883            break;
884
885        if (c1 == 0) {
886            pack1->cursor = p1;
887            pack2->cursor = p2;
888            return 1;
889        }
890        if (c1 >= 64) {
891            XLOG("%s: INTERNAL_ERROR: malformed domain !!\n", __FUNCTION__);
892            break;
893        }
894        if ((p1+c1 > end1) || (p2+c1 > end2)) {
895            XLOG("%s: INTERNAL_ERROR: simple label read-overflow !!\n",
896                    __FUNCTION__);
897            break;
898        }
899        if (memcmp(p1, p2, c1) != 0)
900            break;
901        p1 += c1;
902        p2 += c1;
903        /* we rely on the bound checks at the start of the loop */
904    }
905    /* not the same, or one is malformed */
906    XLOG("different DN");
907    return 0;
908}
909
910static int
911_dnsPacket_isEqualBytes( DnsPacket*  pack1, DnsPacket*  pack2, int  numBytes )
912{
913    const uint8_t*  p1 = pack1->cursor;
914    const uint8_t*  p2 = pack2->cursor;
915
916    if ( p1 + numBytes > pack1->end || p2 + numBytes > pack2->end )
917        return 0;
918
919    if ( memcmp(p1, p2, numBytes) != 0 )
920        return 0;
921
922    pack1->cursor += numBytes;
923    pack2->cursor += numBytes;
924    return 1;
925}
926
927static int
928_dnsPacket_isEqualQR( DnsPacket*  pack1, DnsPacket*  pack2 )
929{
930    /* compare domain name encoding + TYPE + CLASS */
931    if ( !_dnsPacket_isEqualDomainName(pack1, pack2) ||
932         !_dnsPacket_isEqualBytes(pack1, pack2, 2+2) )
933        return 0;
934
935    return 1;
936}
937
938static int
939_dnsPacket_isEqualQuery( DnsPacket*  pack1, DnsPacket*  pack2 )
940{
941    int  count1, count2;
942
943    /* compare the headers, ignore most fields */
944    _dnsPacket_rewind(pack1);
945    _dnsPacket_rewind(pack2);
946
947    /* compare RD, ignore TC, see comment in _dnsPacket_checkQuery */
948    if ((pack1->base[2] & 1) != (pack2->base[2] & 1)) {
949        XLOG("different RD");
950        return 0;
951    }
952
953    /* assume: other flags are all 0 */
954    _dnsPacket_skip(pack1, 4);
955    _dnsPacket_skip(pack2, 4);
956
957    /* compare QDCOUNT */
958    count1 = _dnsPacket_readInt16(pack1);
959    count2 = _dnsPacket_readInt16(pack2);
960    if (count1 != count2 || count1 < 0) {
961        XLOG("different QDCOUNT");
962        return 0;
963    }
964
965    /* assume: ANcount, NScount and ARcount are all 0 */
966    _dnsPacket_skip(pack1, 6);
967    _dnsPacket_skip(pack2, 6);
968
969    /* compare the QDCOUNT QRs */
970    for ( ; count1 > 0; count1-- ) {
971        if (!_dnsPacket_isEqualQR(pack1, pack2)) {
972            XLOG("different QR");
973            return 0;
974        }
975    }
976    return 1;
977}
978
979/****************************************************************************/
980/****************************************************************************/
981/*****                                                                  *****/
982/*****                                                                  *****/
983/*****                                                                  *****/
984/****************************************************************************/
985/****************************************************************************/
986
987/* cache entry. for simplicity, 'hash' and 'hlink' are inlined in this
988 * structure though they are conceptually part of the hash table.
989 *
990 * similarly, mru_next and mru_prev are part of the global MRU list
991 */
992typedef struct Entry {
993    unsigned int     hash;   /* hash value */
994    struct Entry*    hlink;  /* next in collision chain */
995    struct Entry*    mru_prev;
996    struct Entry*    mru_next;
997
998    const uint8_t*   query;
999    int              querylen;
1000    const uint8_t*   answer;
1001    int              answerlen;
1002    time_t           expires;   /* time_t when the entry isn't valid any more */
1003    int              id;        /* for debugging purpose */
1004} Entry;
1005
1006/**
1007 * Find the TTL for a negative DNS result.  This is defined as the minimum
1008 * of the SOA records TTL and the MINIMUM-TTL field (RFC-2308).
1009 *
1010 * Return 0 if not found.
1011 */
1012static u_long
1013answer_getNegativeTTL(ns_msg handle) {
1014    int n, nscount;
1015    u_long result = 0;
1016    ns_rr rr;
1017
1018    nscount = ns_msg_count(handle, ns_s_ns);
1019    for (n = 0; n < nscount; n++) {
1020        if ((ns_parserr(&handle, ns_s_ns, n, &rr) == 0) && (ns_rr_type(rr) == ns_t_soa)) {
1021            const u_char *rdata = ns_rr_rdata(rr); // find the data
1022            const u_char *edata = rdata + ns_rr_rdlen(rr); // add the len to find the end
1023            int len;
1024            u_long ttl, rec_result = ns_rr_ttl(rr);
1025
1026            // find the MINIMUM-TTL field from the blob of binary data for this record
1027            // skip the server name
1028            len = dn_skipname(rdata, edata);
1029            if (len == -1) continue; // error skipping
1030            rdata += len;
1031
1032            // skip the admin name
1033            len = dn_skipname(rdata, edata);
1034            if (len == -1) continue; // error skipping
1035            rdata += len;
1036
1037            if (edata - rdata != 5*NS_INT32SZ) continue;
1038            // skip: serial number + refresh interval + retry interval + expiry
1039            rdata += NS_INT32SZ * 4;
1040            // finally read the MINIMUM TTL
1041            ttl = ns_get32(rdata);
1042            if (ttl < rec_result) {
1043                rec_result = ttl;
1044            }
1045            // Now that the record is read successfully, apply the new min TTL
1046            if (n == 0 || rec_result < result) {
1047                result = rec_result;
1048            }
1049        }
1050    }
1051    return result;
1052}
1053
1054/**
1055 * Parse the answer records and find the appropriate
1056 * smallest TTL among the records.  This might be from
1057 * the answer records if found or from the SOA record
1058 * if it's a negative result.
1059 *
1060 * The returned TTL is the number of seconds to
1061 * keep the answer in the cache.
1062 *
1063 * In case of parse error zero (0) is returned which
1064 * indicates that the answer shall not be cached.
1065 */
1066static u_long
1067answer_getTTL(const void* answer, int answerlen)
1068{
1069    ns_msg handle;
1070    int ancount, n;
1071    u_long result, ttl;
1072    ns_rr rr;
1073
1074    result = 0;
1075    if (ns_initparse(answer, answerlen, &handle) >= 0) {
1076        // get number of answer records
1077        ancount = ns_msg_count(handle, ns_s_an);
1078
1079        if (ancount == 0) {
1080            // a response with no answers?  Cache this negative result.
1081            result = answer_getNegativeTTL(handle);
1082        } else {
1083            for (n = 0; n < ancount; n++) {
1084                if (ns_parserr(&handle, ns_s_an, n, &rr) == 0) {
1085                    ttl = ns_rr_ttl(rr);
1086                    if (n == 0 || ttl < result) {
1087                        result = ttl;
1088                    }
1089                } else {
1090                    XLOG("ns_parserr failed ancount no = %d. errno = %s\n", n, strerror(errno));
1091                }
1092            }
1093        }
1094    } else {
1095        XLOG("ns_parserr failed. %s\n", strerror(errno));
1096    }
1097
1098    XLOG("TTL = %lu\n", result);
1099
1100    return result;
1101}
1102
1103static void
1104entry_free( Entry*  e )
1105{
1106    /* everything is allocated in a single memory block */
1107    if (e) {
1108        free(e);
1109    }
1110}
1111
1112static __inline__ void
1113entry_mru_remove( Entry*  e )
1114{
1115    e->mru_prev->mru_next = e->mru_next;
1116    e->mru_next->mru_prev = e->mru_prev;
1117}
1118
1119static __inline__ void
1120entry_mru_add( Entry*  e, Entry*  list )
1121{
1122    Entry*  first = list->mru_next;
1123
1124    e->mru_next = first;
1125    e->mru_prev = list;
1126
1127    list->mru_next  = e;
1128    first->mru_prev = e;
1129}
1130
1131/* compute the hash of a given entry, this is a hash of most
1132 * data in the query (key) */
1133static unsigned
1134entry_hash( const Entry*  e )
1135{
1136    DnsPacket  pack[1];
1137
1138    _dnsPacket_init(pack, e->query, e->querylen);
1139    return _dnsPacket_hashQuery(pack);
1140}
1141
1142/* initialize an Entry as a search key, this also checks the input query packet
1143 * returns 1 on success, or 0 in case of unsupported/malformed data */
1144static int
1145entry_init_key( Entry*  e, const void*  query, int  querylen )
1146{
1147    DnsPacket  pack[1];
1148
1149    memset(e, 0, sizeof(*e));
1150
1151    e->query    = query;
1152    e->querylen = querylen;
1153    e->hash     = entry_hash(e);
1154
1155    _dnsPacket_init(pack, query, querylen);
1156
1157    return _dnsPacket_checkQuery(pack);
1158}
1159
1160/* allocate a new entry as a cache node */
1161static Entry*
1162entry_alloc( const Entry*  init, const void*  answer, int  answerlen )
1163{
1164    Entry*  e;
1165    int     size;
1166
1167    size = sizeof(*e) + init->querylen + answerlen;
1168    e    = calloc(size, 1);
1169    if (e == NULL)
1170        return e;
1171
1172    e->hash     = init->hash;
1173    e->query    = (const uint8_t*)(e+1);
1174    e->querylen = init->querylen;
1175
1176    memcpy( (char*)e->query, init->query, e->querylen );
1177
1178    e->answer    = e->query + e->querylen;
1179    e->answerlen = answerlen;
1180
1181    memcpy( (char*)e->answer, answer, e->answerlen );
1182
1183    return e;
1184}
1185
1186static int
1187entry_equals( const Entry*  e1, const Entry*  e2 )
1188{
1189    DnsPacket  pack1[1], pack2[1];
1190
1191    if (e1->querylen != e2->querylen) {
1192        return 0;
1193    }
1194    _dnsPacket_init(pack1, e1->query, e1->querylen);
1195    _dnsPacket_init(pack2, e2->query, e2->querylen);
1196
1197    return _dnsPacket_isEqualQuery(pack1, pack2);
1198}
1199
1200/****************************************************************************/
1201/****************************************************************************/
1202/*****                                                                  *****/
1203/*****                                                                  *****/
1204/*****                                                                  *****/
1205/****************************************************************************/
1206/****************************************************************************/
1207
1208/* We use a simple hash table with external collision lists
1209 * for simplicity, the hash-table fields 'hash' and 'hlink' are
1210 * inlined in the Entry structure.
1211 */
1212
1213/* Maximum time for a thread to wait for an pending request */
1214#define PENDING_REQUEST_TIMEOUT 20;
1215
1216typedef struct pending_req_info {
1217    unsigned int                hash;
1218    pthread_cond_t              cond;
1219    struct pending_req_info*    next;
1220} PendingReqInfo;
1221
1222typedef struct resolv_cache {
1223    int              max_entries;
1224    int              num_entries;
1225    Entry            mru_list;
1226    int              last_id;
1227    Entry*           entries;
1228    PendingReqInfo   pending_requests;
1229} Cache;
1230
1231struct resolv_cache_info {
1232    unsigned                    netid;
1233    Cache*                      cache;
1234    struct resolv_cache_info*   next;
1235    char*                       nameservers[MAXNS +1];
1236    struct addrinfo*            nsaddrinfo[MAXNS + 1];
1237    char                        defdname[256];
1238    int                         dnsrch_offset[MAXDNSRCH+1];  // offsets into defdname
1239};
1240
1241#define  HTABLE_VALID(x)  ((x) != NULL && (x) != HTABLE_DELETED)
1242
1243static pthread_once_t        _res_cache_once = PTHREAD_ONCE_INIT;
1244static void _res_cache_init(void);
1245
1246// lock protecting everything in the _resolve_cache_info structs (next ptr, etc)
1247static pthread_mutex_t _res_cache_list_lock;
1248
1249/* gets cache associated with a network, or NULL if none exists */
1250static struct resolv_cache* _find_named_cache_locked(unsigned netid);
1251
1252static void
1253_cache_flush_pending_requests_locked( struct resolv_cache* cache )
1254{
1255    struct pending_req_info *ri, *tmp;
1256    if (cache) {
1257        ri = cache->pending_requests.next;
1258
1259        while (ri) {
1260            tmp = ri;
1261            ri = ri->next;
1262            pthread_cond_broadcast(&tmp->cond);
1263
1264            pthread_cond_destroy(&tmp->cond);
1265            free(tmp);
1266        }
1267
1268        cache->pending_requests.next = NULL;
1269    }
1270}
1271
1272/* Return 0 if no pending request is found matching the key.
1273 * If a matching request is found the calling thread will wait until
1274 * the matching request completes, then update *cache and return 1. */
1275static int
1276_cache_check_pending_request_locked( struct resolv_cache** cache, Entry* key, unsigned netid )
1277{
1278    struct pending_req_info *ri, *prev;
1279    int exist = 0;
1280
1281    if (*cache && key) {
1282        ri = (*cache)->pending_requests.next;
1283        prev = &(*cache)->pending_requests;
1284        while (ri) {
1285            if (ri->hash == key->hash) {
1286                exist = 1;
1287                break;
1288            }
1289            prev = ri;
1290            ri = ri->next;
1291        }
1292
1293        if (!exist) {
1294            ri = calloc(1, sizeof(struct pending_req_info));
1295            if (ri) {
1296                ri->hash = key->hash;
1297                pthread_cond_init(&ri->cond, NULL);
1298                prev->next = ri;
1299            }
1300        } else {
1301            struct timespec ts = {0,0};
1302            XLOG("Waiting for previous request");
1303            ts.tv_sec = _time_now() + PENDING_REQUEST_TIMEOUT;
1304            pthread_cond_timedwait(&ri->cond, &_res_cache_list_lock, &ts);
1305            /* Must update *cache as it could have been deleted. */
1306            *cache = _find_named_cache_locked(netid);
1307        }
1308    }
1309
1310    return exist;
1311}
1312
1313/* notify any waiting thread that waiting on a request
1314 * matching the key has been added to the cache */
1315static void
1316_cache_notify_waiting_tid_locked( struct resolv_cache* cache, Entry* key )
1317{
1318    struct pending_req_info *ri, *prev;
1319
1320    if (cache && key) {
1321        ri = cache->pending_requests.next;
1322        prev = &cache->pending_requests;
1323        while (ri) {
1324            if (ri->hash == key->hash) {
1325                pthread_cond_broadcast(&ri->cond);
1326                break;
1327            }
1328            prev = ri;
1329            ri = ri->next;
1330        }
1331
1332        // remove item from list and destroy
1333        if (ri) {
1334            prev->next = ri->next;
1335            pthread_cond_destroy(&ri->cond);
1336            free(ri);
1337        }
1338    }
1339}
1340
1341/* notify the cache that the query failed */
1342void
1343_resolv_cache_query_failed( unsigned    netid,
1344                   const void* query,
1345                   int         querylen)
1346{
1347    Entry    key[1];
1348    Cache*   cache;
1349
1350    if (!entry_init_key(key, query, querylen))
1351        return;
1352
1353    pthread_mutex_lock(&_res_cache_list_lock);
1354
1355    cache = _find_named_cache_locked(netid);
1356
1357    if (cache) {
1358        _cache_notify_waiting_tid_locked(cache, key);
1359    }
1360
1361    pthread_mutex_unlock(&_res_cache_list_lock);
1362}
1363
1364static void
1365_cache_flush_locked( Cache*  cache )
1366{
1367    int     nn;
1368
1369    for (nn = 0; nn < cache->max_entries; nn++)
1370    {
1371        Entry**  pnode = (Entry**) &cache->entries[nn];
1372
1373        while (*pnode != NULL) {
1374            Entry*  node = *pnode;
1375            *pnode = node->hlink;
1376            entry_free(node);
1377        }
1378    }
1379
1380    // flush pending request
1381    _cache_flush_pending_requests_locked(cache);
1382
1383    cache->mru_list.mru_next = cache->mru_list.mru_prev = &cache->mru_list;
1384    cache->num_entries       = 0;
1385    cache->last_id           = 0;
1386
1387    XLOG("*************************\n"
1388         "*** DNS CACHE FLUSHED ***\n"
1389         "*************************");
1390}
1391
1392static int
1393_res_cache_get_max_entries( void )
1394{
1395    int cache_size = CONFIG_MAX_ENTRIES;
1396
1397    const char* cache_mode = getenv("ANDROID_DNS_MODE");
1398    if (cache_mode == NULL || strcmp(cache_mode, "local") != 0) {
1399        // Don't use the cache in local mode. This is used by the proxy itself.
1400        cache_size = 0;
1401    }
1402
1403    XLOG("cache size: %d", cache_size);
1404    return cache_size;
1405}
1406
1407static struct resolv_cache*
1408_resolv_cache_create( void )
1409{
1410    struct resolv_cache*  cache;
1411
1412    cache = calloc(sizeof(*cache), 1);
1413    if (cache) {
1414        cache->max_entries = _res_cache_get_max_entries();
1415        cache->entries = calloc(sizeof(*cache->entries), cache->max_entries);
1416        if (cache->entries) {
1417            cache->mru_list.mru_prev = cache->mru_list.mru_next = &cache->mru_list;
1418            XLOG("%s: cache created\n", __FUNCTION__);
1419        } else {
1420            free(cache);
1421            cache = NULL;
1422        }
1423    }
1424    return cache;
1425}
1426
1427
1428#if DEBUG
1429static void
1430_dump_query( const uint8_t*  query, int  querylen )
1431{
1432    char       temp[256], *p=temp, *end=p+sizeof(temp);
1433    DnsPacket  pack[1];
1434
1435    _dnsPacket_init(pack, query, querylen);
1436    p = _dnsPacket_bprintQuery(pack, p, end);
1437    XLOG("QUERY: %s", temp);
1438}
1439
1440static void
1441_cache_dump_mru( Cache*  cache )
1442{
1443    char    temp[512], *p=temp, *end=p+sizeof(temp);
1444    Entry*  e;
1445
1446    p = _bprint(temp, end, "MRU LIST (%2d): ", cache->num_entries);
1447    for (e = cache->mru_list.mru_next; e != &cache->mru_list; e = e->mru_next)
1448        p = _bprint(p, end, " %d", e->id);
1449
1450    XLOG("%s", temp);
1451}
1452
1453static void
1454_dump_answer(const void* answer, int answerlen)
1455{
1456    res_state statep;
1457    FILE* fp;
1458    char* buf;
1459    int fileLen;
1460
1461    fp = fopen("/data/reslog.txt", "w+e");
1462    if (fp != NULL) {
1463        statep = __res_get_state();
1464
1465        res_pquery(statep, answer, answerlen, fp);
1466
1467        //Get file length
1468        fseek(fp, 0, SEEK_END);
1469        fileLen=ftell(fp);
1470        fseek(fp, 0, SEEK_SET);
1471        buf = (char *)malloc(fileLen+1);
1472        if (buf != NULL) {
1473            //Read file contents into buffer
1474            fread(buf, fileLen, 1, fp);
1475            XLOG("%s\n", buf);
1476            free(buf);
1477        }
1478        fclose(fp);
1479        remove("/data/reslog.txt");
1480    }
1481    else {
1482        errno = 0; // else debug is introducing error signals
1483        XLOG("%s: can't open file\n", __FUNCTION__);
1484    }
1485}
1486#endif
1487
1488#if DEBUG
1489#  define  XLOG_QUERY(q,len)   _dump_query((q), (len))
1490#  define  XLOG_ANSWER(a, len) _dump_answer((a), (len))
1491#else
1492#  define  XLOG_QUERY(q,len)   ((void)0)
1493#  define  XLOG_ANSWER(a,len)  ((void)0)
1494#endif
1495
1496/* This function tries to find a key within the hash table
1497 * In case of success, it will return a *pointer* to the hashed key.
1498 * In case of failure, it will return a *pointer* to NULL
1499 *
1500 * So, the caller must check '*result' to check for success/failure.
1501 *
1502 * The main idea is that the result can later be used directly in
1503 * calls to _resolv_cache_add or _resolv_cache_remove as the 'lookup'
1504 * parameter. This makes the code simpler and avoids re-searching
1505 * for the key position in the htable.
1506 *
1507 * The result of a lookup_p is only valid until you alter the hash
1508 * table.
1509 */
1510static Entry**
1511_cache_lookup_p( Cache*   cache,
1512                 Entry*   key )
1513{
1514    int      index = key->hash % cache->max_entries;
1515    Entry**  pnode = (Entry**) &cache->entries[ index ];
1516
1517    while (*pnode != NULL) {
1518        Entry*  node = *pnode;
1519
1520        if (node == NULL)
1521            break;
1522
1523        if (node->hash == key->hash && entry_equals(node, key))
1524            break;
1525
1526        pnode = &node->hlink;
1527    }
1528    return pnode;
1529}
1530
1531/* Add a new entry to the hash table. 'lookup' must be the
1532 * result of an immediate previous failed _lookup_p() call
1533 * (i.e. with *lookup == NULL), and 'e' is the pointer to the
1534 * newly created entry
1535 */
1536static void
1537_cache_add_p( Cache*   cache,
1538              Entry**  lookup,
1539              Entry*   e )
1540{
1541    *lookup = e;
1542    e->id = ++cache->last_id;
1543    entry_mru_add(e, &cache->mru_list);
1544    cache->num_entries += 1;
1545
1546    XLOG("%s: entry %d added (count=%d)", __FUNCTION__,
1547         e->id, cache->num_entries);
1548}
1549
1550/* Remove an existing entry from the hash table,
1551 * 'lookup' must be the result of an immediate previous
1552 * and succesful _lookup_p() call.
1553 */
1554static void
1555_cache_remove_p( Cache*   cache,
1556                 Entry**  lookup )
1557{
1558    Entry*  e  = *lookup;
1559
1560    XLOG("%s: entry %d removed (count=%d)", __FUNCTION__,
1561         e->id, cache->num_entries-1);
1562
1563    entry_mru_remove(e);
1564    *lookup = e->hlink;
1565    entry_free(e);
1566    cache->num_entries -= 1;
1567}
1568
1569/* Remove the oldest entry from the hash table.
1570 */
1571static void
1572_cache_remove_oldest( Cache*  cache )
1573{
1574    Entry*   oldest = cache->mru_list.mru_prev;
1575    Entry**  lookup = _cache_lookup_p(cache, oldest);
1576
1577    if (*lookup == NULL) { /* should not happen */
1578        XLOG("%s: OLDEST NOT IN HTABLE ?", __FUNCTION__);
1579        return;
1580    }
1581    if (DEBUG) {
1582        XLOG("Cache full - removing oldest");
1583        XLOG_QUERY(oldest->query, oldest->querylen);
1584    }
1585    _cache_remove_p(cache, lookup);
1586}
1587
1588/* Remove all expired entries from the hash table.
1589 */
1590static void _cache_remove_expired(Cache* cache) {
1591    Entry* e;
1592    time_t now = _time_now();
1593
1594    for (e = cache->mru_list.mru_next; e != &cache->mru_list;) {
1595        // Entry is old, remove
1596        if (now >= e->expires) {
1597            Entry** lookup = _cache_lookup_p(cache, e);
1598            if (*lookup == NULL) { /* should not happen */
1599                XLOG("%s: ENTRY NOT IN HTABLE ?", __FUNCTION__);
1600                return;
1601            }
1602            e = e->mru_next;
1603            _cache_remove_p(cache, lookup);
1604        } else {
1605            e = e->mru_next;
1606        }
1607    }
1608}
1609
1610ResolvCacheStatus
1611_resolv_cache_lookup( unsigned              netid,
1612                      const void*           query,
1613                      int                   querylen,
1614                      void*                 answer,
1615                      int                   answersize,
1616                      int                  *answerlen )
1617{
1618    Entry      key[1];
1619    Entry**    lookup;
1620    Entry*     e;
1621    time_t     now;
1622    Cache*     cache;
1623
1624    ResolvCacheStatus  result = RESOLV_CACHE_NOTFOUND;
1625
1626    XLOG("%s: lookup", __FUNCTION__);
1627    XLOG_QUERY(query, querylen);
1628
1629    /* we don't cache malformed queries */
1630    if (!entry_init_key(key, query, querylen)) {
1631        XLOG("%s: unsupported query", __FUNCTION__);
1632        return RESOLV_CACHE_UNSUPPORTED;
1633    }
1634    /* lookup cache */
1635    pthread_once(&_res_cache_once, _res_cache_init);
1636    pthread_mutex_lock(&_res_cache_list_lock);
1637
1638    cache = _find_named_cache_locked(netid);
1639    if (cache == NULL) {
1640        result = RESOLV_CACHE_UNSUPPORTED;
1641        goto Exit;
1642    }
1643
1644    /* see the description of _lookup_p to understand this.
1645     * the function always return a non-NULL pointer.
1646     */
1647    lookup = _cache_lookup_p(cache, key);
1648    e      = *lookup;
1649
1650    if (e == NULL) {
1651        XLOG( "NOT IN CACHE");
1652        // calling thread will wait if an outstanding request is found
1653        // that matching this query
1654        if (!_cache_check_pending_request_locked(&cache, key, netid) || cache == NULL) {
1655            goto Exit;
1656        } else {
1657            lookup = _cache_lookup_p(cache, key);
1658            e = *lookup;
1659            if (e == NULL) {
1660                goto Exit;
1661            }
1662        }
1663    }
1664
1665    now = _time_now();
1666
1667    /* remove stale entries here */
1668    if (now >= e->expires) {
1669        XLOG( " NOT IN CACHE (STALE ENTRY %p DISCARDED)", *lookup );
1670        XLOG_QUERY(e->query, e->querylen);
1671        _cache_remove_p(cache, lookup);
1672        goto Exit;
1673    }
1674
1675    *answerlen = e->answerlen;
1676    if (e->answerlen > answersize) {
1677        /* NOTE: we return UNSUPPORTED if the answer buffer is too short */
1678        result = RESOLV_CACHE_UNSUPPORTED;
1679        XLOG(" ANSWER TOO LONG");
1680        goto Exit;
1681    }
1682
1683    memcpy( answer, e->answer, e->answerlen );
1684
1685    /* bump up this entry to the top of the MRU list */
1686    if (e != cache->mru_list.mru_next) {
1687        entry_mru_remove( e );
1688        entry_mru_add( e, &cache->mru_list );
1689    }
1690
1691    XLOG( "FOUND IN CACHE entry=%p", e );
1692    result = RESOLV_CACHE_FOUND;
1693
1694Exit:
1695    pthread_mutex_unlock(&_res_cache_list_lock);
1696    return result;
1697}
1698
1699
1700void
1701_resolv_cache_add( unsigned              netid,
1702                   const void*           query,
1703                   int                   querylen,
1704                   const void*           answer,
1705                   int                   answerlen )
1706{
1707    Entry    key[1];
1708    Entry*   e;
1709    Entry**  lookup;
1710    u_long   ttl;
1711    Cache*   cache = NULL;
1712
1713    /* don't assume that the query has already been cached
1714     */
1715    if (!entry_init_key( key, query, querylen )) {
1716        XLOG( "%s: passed invalid query ?", __FUNCTION__);
1717        return;
1718    }
1719
1720    pthread_mutex_lock(&_res_cache_list_lock);
1721
1722    cache = _find_named_cache_locked(netid);
1723    if (cache == NULL) {
1724        goto Exit;
1725    }
1726
1727    XLOG( "%s: query:", __FUNCTION__ );
1728    XLOG_QUERY(query,querylen);
1729    XLOG_ANSWER(answer, answerlen);
1730#if DEBUG_DATA
1731    XLOG( "answer:");
1732    XLOG_BYTES(answer,answerlen);
1733#endif
1734
1735    lookup = _cache_lookup_p(cache, key);
1736    e      = *lookup;
1737
1738    if (e != NULL) { /* should not happen */
1739        XLOG("%s: ALREADY IN CACHE (%p) ? IGNORING ADD",
1740             __FUNCTION__, e);
1741        goto Exit;
1742    }
1743
1744    if (cache->num_entries >= cache->max_entries) {
1745        _cache_remove_expired(cache);
1746        if (cache->num_entries >= cache->max_entries) {
1747            _cache_remove_oldest(cache);
1748        }
1749        /* need to lookup again */
1750        lookup = _cache_lookup_p(cache, key);
1751        e      = *lookup;
1752        if (e != NULL) {
1753            XLOG("%s: ALREADY IN CACHE (%p) ? IGNORING ADD",
1754                __FUNCTION__, e);
1755            goto Exit;
1756        }
1757    }
1758
1759    ttl = answer_getTTL(answer, answerlen);
1760    if (ttl > 0) {
1761        e = entry_alloc(key, answer, answerlen);
1762        if (e != NULL) {
1763            e->expires = ttl + _time_now();
1764            _cache_add_p(cache, lookup, e);
1765        }
1766    }
1767#if DEBUG
1768    _cache_dump_mru(cache);
1769#endif
1770Exit:
1771    if (cache != NULL) {
1772      _cache_notify_waiting_tid_locked(cache, key);
1773    }
1774    pthread_mutex_unlock(&_res_cache_list_lock);
1775}
1776
1777/****************************************************************************/
1778/****************************************************************************/
1779/*****                                                                  *****/
1780/*****                                                                  *****/
1781/*****                                                                  *****/
1782/****************************************************************************/
1783/****************************************************************************/
1784
1785// Head of the list of caches.  Protected by _res_cache_list_lock.
1786static struct resolv_cache_info _res_cache_list;
1787
1788/* insert resolv_cache_info into the list of resolv_cache_infos */
1789static void _insert_cache_info_locked(struct resolv_cache_info* cache_info);
1790/* creates a resolv_cache_info */
1791static struct resolv_cache_info* _create_cache_info( void );
1792/* gets a resolv_cache_info associated with a network, or NULL if not found */
1793static struct resolv_cache_info* _find_cache_info_locked(unsigned netid);
1794/* look up the named cache, and creates one if needed */
1795static struct resolv_cache* _get_res_cache_for_net_locked(unsigned netid);
1796/* empty the named cache */
1797static void _flush_cache_for_net_locked(unsigned netid);
1798/* empty the nameservers set for the named cache */
1799static void _free_nameservers_locked(struct resolv_cache_info* cache_info);
1800/* return 1 if the provided list of name servers differs from the list of name servers
1801 * currently attached to the provided cache_info */
1802static int _resolv_is_nameservers_equal_locked(struct resolv_cache_info* cache_info,
1803        const char** servers, int numservers);
1804
1805static void
1806_res_cache_init(void)
1807{
1808    const char*  env = getenv(CONFIG_ENV);
1809
1810    if (env && atoi(env) == 0) {
1811        /* the cache is disabled */
1812        return;
1813    }
1814
1815    memset(&_res_cache_list, 0, sizeof(_res_cache_list));
1816    pthread_mutex_init(&_res_cache_list_lock, NULL);
1817}
1818
1819static struct resolv_cache*
1820_get_res_cache_for_net_locked(unsigned netid)
1821{
1822    struct resolv_cache* cache = _find_named_cache_locked(netid);
1823    if (!cache) {
1824        struct resolv_cache_info* cache_info = _create_cache_info();
1825        if (cache_info) {
1826            cache = _resolv_cache_create();
1827            if (cache) {
1828                cache_info->cache = cache;
1829                cache_info->netid = netid;
1830                _insert_cache_info_locked(cache_info);
1831            } else {
1832                free(cache_info);
1833            }
1834        }
1835    }
1836    return cache;
1837}
1838
1839void
1840_resolv_flush_cache_for_net(unsigned netid)
1841{
1842    pthread_once(&_res_cache_once, _res_cache_init);
1843    pthread_mutex_lock(&_res_cache_list_lock);
1844
1845    _flush_cache_for_net_locked(netid);
1846
1847    pthread_mutex_unlock(&_res_cache_list_lock);
1848}
1849
1850static void
1851_flush_cache_for_net_locked(unsigned netid)
1852{
1853    struct resolv_cache* cache = _find_named_cache_locked(netid);
1854    if (cache) {
1855        _cache_flush_locked(cache);
1856    }
1857}
1858
1859void _resolv_delete_cache_for_net(unsigned netid)
1860{
1861    pthread_once(&_res_cache_once, _res_cache_init);
1862    pthread_mutex_lock(&_res_cache_list_lock);
1863
1864    struct resolv_cache_info* prev_cache_info = &_res_cache_list;
1865
1866    while (prev_cache_info->next) {
1867        struct resolv_cache_info* cache_info = prev_cache_info->next;
1868
1869        if (cache_info->netid == netid) {
1870            prev_cache_info->next = cache_info->next;
1871            _cache_flush_locked(cache_info->cache);
1872            free(cache_info->cache->entries);
1873            free(cache_info->cache);
1874            _free_nameservers_locked(cache_info);
1875            free(cache_info);
1876            break;
1877        }
1878
1879        prev_cache_info = prev_cache_info->next;
1880    }
1881
1882    pthread_mutex_unlock(&_res_cache_list_lock);
1883}
1884
1885static struct resolv_cache_info*
1886_create_cache_info(void)
1887{
1888    struct resolv_cache_info* cache_info;
1889
1890    cache_info = calloc(sizeof(*cache_info), 1);
1891    return cache_info;
1892}
1893
1894static void
1895_insert_cache_info_locked(struct resolv_cache_info* cache_info)
1896{
1897    struct resolv_cache_info* last;
1898
1899    for (last = &_res_cache_list; last->next; last = last->next);
1900
1901    last->next = cache_info;
1902
1903}
1904
1905static struct resolv_cache*
1906_find_named_cache_locked(unsigned netid) {
1907
1908    struct resolv_cache_info* info = _find_cache_info_locked(netid);
1909
1910    if (info != NULL) return info->cache;
1911
1912    return NULL;
1913}
1914
1915static struct resolv_cache_info*
1916_find_cache_info_locked(unsigned netid)
1917{
1918    struct resolv_cache_info* cache_info = _res_cache_list.next;
1919
1920    while (cache_info) {
1921        if (cache_info->netid == netid) {
1922            break;
1923        }
1924
1925        cache_info = cache_info->next;
1926    }
1927    return cache_info;
1928}
1929
1930void
1931_resolv_set_nameservers_for_net(unsigned netid, const char** servers, int numservers,
1932        const char *domains)
1933{
1934    int i, rt, index;
1935    struct addrinfo hints;
1936    char sbuf[NI_MAXSERV];
1937    register char *cp;
1938    int *offset;
1939
1940    pthread_once(&_res_cache_once, _res_cache_init);
1941    pthread_mutex_lock(&_res_cache_list_lock);
1942
1943    // creates the cache if not created
1944    _get_res_cache_for_net_locked(netid);
1945
1946    struct resolv_cache_info* cache_info = _find_cache_info_locked(netid);
1947
1948    if (cache_info != NULL &&
1949            !_resolv_is_nameservers_equal_locked(cache_info, servers, numservers)) {
1950        // free current before adding new
1951        _free_nameservers_locked(cache_info);
1952
1953        memset(&hints, 0, sizeof(hints));
1954        hints.ai_family = PF_UNSPEC;
1955        hints.ai_socktype = SOCK_DGRAM; /*dummy*/
1956        hints.ai_flags = AI_NUMERICHOST;
1957        snprintf(sbuf, sizeof(sbuf), "%u", NAMESERVER_PORT);
1958
1959        index = 0;
1960        for (i = 0; i < numservers && i < MAXNS; i++) {
1961            rt = getaddrinfo(servers[i], sbuf, &hints, &cache_info->nsaddrinfo[index]);
1962            if (rt == 0) {
1963                cache_info->nameservers[index] = strdup(servers[i]);
1964                index++;
1965                XLOG("%s: netid = %u, addr = %s\n", __FUNCTION__, netid, servers[i]);
1966            } else {
1967                cache_info->nsaddrinfo[index] = NULL;
1968            }
1969        }
1970
1971        // code moved from res_init.c, load_domain_search_list
1972        strlcpy(cache_info->defdname, domains, sizeof(cache_info->defdname));
1973        if ((cp = strchr(cache_info->defdname, '\n')) != NULL)
1974            *cp = '\0';
1975        cp = cache_info->defdname;
1976        offset = cache_info->dnsrch_offset;
1977        while (offset < cache_info->dnsrch_offset + MAXDNSRCH) {
1978            while (*cp == ' ' || *cp == '\t') /* skip leading white space */
1979                cp++;
1980            if (*cp == '\0') /* stop if nothing more to do */
1981                break;
1982            *offset++ = cp - cache_info->defdname; /* record this search domain */
1983            while (*cp) { /* zero-terminate it */
1984                if (*cp == ' '|| *cp == '\t') {
1985                    *cp++ = '\0';
1986                    break;
1987                }
1988                cp++;
1989            }
1990        }
1991        *offset = -1; /* cache_info->dnsrch_offset has MAXDNSRCH+1 items */
1992
1993        // flush cache since new settings
1994        _flush_cache_for_net_locked(netid);
1995
1996    }
1997
1998    pthread_mutex_unlock(&_res_cache_list_lock);
1999}
2000
2001static int
2002_resolv_is_nameservers_equal_locked(struct resolv_cache_info* cache_info,
2003        const char** servers, int numservers)
2004{
2005    int i;
2006    char** ns;
2007    int currentservers;
2008    int equal = 1;
2009
2010    if (numservers > MAXNS) numservers = MAXNS;
2011
2012    // Find out how many nameservers we had before.
2013    currentservers = 0;
2014    for (ns = cache_info->nameservers; *ns; ns++)
2015        currentservers++;
2016
2017    if (currentservers != numservers)
2018        return 0;
2019
2020    // Compare each name server against current name servers.
2021    // TODO: this is incorrect if the list of current or previous nameservers
2022    // contains duplicates. This does not really matter because the framework
2023    // filters out duplicates, but we should probably fix it. It's also
2024    // insensitive to the order of the nameservers; we should probably fix that
2025    // too.
2026    for (i = 0; i < numservers && equal; i++) {
2027        ns = cache_info->nameservers;
2028        equal = 0;
2029        while(*ns) {
2030            if (strcmp(*ns, servers[i]) == 0) {
2031                equal = 1;
2032                break;
2033            }
2034            ns++;
2035        }
2036    }
2037
2038    return equal;
2039}
2040
2041static void
2042_free_nameservers_locked(struct resolv_cache_info* cache_info)
2043{
2044    int i;
2045    for (i = 0; i <= MAXNS; i++) {
2046        free(cache_info->nameservers[i]);
2047        cache_info->nameservers[i] = NULL;
2048        if (cache_info->nsaddrinfo[i] != NULL) {
2049            freeaddrinfo(cache_info->nsaddrinfo[i]);
2050            cache_info->nsaddrinfo[i] = NULL;
2051        }
2052    }
2053}
2054
2055void
2056_resolv_populate_res_for_net(res_state statp)
2057{
2058    if (statp == NULL) {
2059        return;
2060    }
2061
2062    pthread_once(&_res_cache_once, _res_cache_init);
2063    pthread_mutex_lock(&_res_cache_list_lock);
2064
2065    struct resolv_cache_info* info = _find_cache_info_locked(statp->netid);
2066    if (info != NULL) {
2067        int nserv;
2068        struct addrinfo* ai;
2069        XLOG("%s: %u\n", __FUNCTION__, statp->netid);
2070        for (nserv = 0; nserv < MAXNS; nserv++) {
2071            ai = info->nsaddrinfo[nserv];
2072            if (ai == NULL) {
2073                break;
2074            }
2075
2076            if ((size_t) ai->ai_addrlen <= sizeof(statp->_u._ext.ext->nsaddrs[0])) {
2077                if (statp->_u._ext.ext != NULL) {
2078                    memcpy(&statp->_u._ext.ext->nsaddrs[nserv], ai->ai_addr, ai->ai_addrlen);
2079                    statp->nsaddr_list[nserv].sin_family = AF_UNSPEC;
2080                } else {
2081                    if ((size_t) ai->ai_addrlen
2082                            <= sizeof(statp->nsaddr_list[0])) {
2083                        memcpy(&statp->nsaddr_list[nserv], ai->ai_addr,
2084                                ai->ai_addrlen);
2085                    } else {
2086                        statp->nsaddr_list[nserv].sin_family = AF_UNSPEC;
2087                    }
2088                }
2089            } else {
2090                XLOG("%s: found too long addrlen", __FUNCTION__);
2091            }
2092        }
2093        statp->nscount = nserv;
2094        // now do search domains.  Note that we cache the offsets as this code runs alot
2095        // but the setting/offset-computer only runs when set/changed
2096        // WARNING: Don't use str*cpy() here, this string contains zeroes.
2097        memcpy(statp->defdname, info->defdname, sizeof(statp->defdname));
2098        register char **pp = statp->dnsrch;
2099        register int *p = info->dnsrch_offset;
2100        while (pp < statp->dnsrch + MAXDNSRCH && *p != -1) {
2101            *pp++ = &statp->defdname[0] + *p++;
2102        }
2103    }
2104    pthread_mutex_unlock(&_res_cache_list_lock);
2105}
2106